Skip to content

Commit

Permalink
Serialize MediaType as an integer to avoid string parsing
Browse files Browse the repository at this point in the history
Resolves opensearch-project#15979

Signed-off-by: Andrew Ross <andrross@amazon.com>
  • Loading branch information
andrross committed Sep 18, 2024
1 parent 4802d0d commit bd090d8
Show file tree
Hide file tree
Showing 16 changed files with 146 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
import org.opensearch.core.common.text.Text;
import org.opensearch.core.concurrency.OpenSearchRejectedExecutionException;
import org.opensearch.core.xcontent.MediaType;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.semver.SemverRange;

import java.io.ByteArrayInputStream;
Expand Down Expand Up @@ -353,7 +352,7 @@ public BigInteger readBigInteger() throws IOException {
}

public MediaType readMediaType() throws IOException {
return MediaTypeRegistry.fromMediaType(readString());
return MediaType.readFrom(this);
}

@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@

package org.opensearch.core.xcontent;

import org.opensearch.Version;
import org.opensearch.common.annotation.PublicApi;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;

import java.io.IOException;
Expand Down Expand Up @@ -73,6 +76,13 @@ default String typeWithSubtype() {
return type() + "/" + subtype();
}

/**
* Unique identifier typically used for binary serialization. Must be distinct
* from the unique IDs of all other MediaTypes registered with {@link MediaTypeRegistry}.
* See {@link MediaType#readFrom} and {@link MediaType#writeTo}.
*/
int uniqueId();

XContent xContent();

boolean detectedXContent(final byte[] bytes, int offset, int length);
Expand All @@ -89,6 +99,22 @@ default String mediaType() {

XContentBuilder contentBuilder(final OutputStream os) throws IOException;

default void writeTo(StreamOutput output) throws IOException {
if (output.getVersion().onOrAfter(Version.V_3_0_0)) {
output.writeVInt(uniqueId());
} else {
output.writeString(this.mediaType());
}
}

static MediaType readFrom(StreamInput in) throws IOException {
if (in.getVersion().onOrAfter(Version.V_2_10_0) && in.getVersion().before(Version.V_3_0_0)) {
return MediaTypeRegistry.fromMediaType(in.readString());
} else {
return MediaTypeRegistry.fromUniqueId(in.readVInt());
}
}

/**
* Accepts a format string, which is most of the time is equivalent to {@link MediaType#subtype()}
* and attempts to match the value to an {@link MediaType}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
public final class MediaTypeRegistry {
private static Map<String, MediaType> formatToMediaType = Map.of();
private static Map<String, MediaType> typeWithSubtypeToMediaType = Map.of();
private static Map<Integer, MediaType> uniqueIdToMediaType = Map.of();

// Default mediaType singleton
private static MediaType DEFAULT_MEDIA_TYPE;
Expand Down Expand Up @@ -84,12 +85,19 @@ private static void register(MediaType[] acceptedMediaTypes, Map<String, MediaTy
// ensures the map is not overwritten:
Map<String, MediaType> typeMap = new HashMap<>(typeWithSubtypeToMediaType);
Map<String, MediaType> formatMap = new HashMap<>(formatToMediaType);
Map<Integer, MediaType> uniqueIdMap = new HashMap<>(uniqueIdToMediaType);
for (MediaType mediaType : acceptedMediaTypes) {
if (formatMap.containsKey(mediaType.format())) {
throw new IllegalArgumentException("unable to register mediaType: [" + mediaType.format() + "]. Type already exists.");
}
if (uniqueIdMap.containsKey(mediaType.uniqueId())) {
throw new IllegalArgumentException(
"unable to register mediaType with ID: [" + mediaType.uniqueId() + "]. ID already exists."
);
}
typeMap.put(mediaType.typeWithSubtype(), mediaType);
formatMap.put(mediaType.format(), mediaType);
uniqueIdMap.put(mediaType.uniqueId(), mediaType);
}
for (Map.Entry<String, MediaType> entry : additionalMediaTypes.entrySet()) {
String typeWithSubtype = entry.getKey().toLowerCase(Locale.ROOT);
Expand All @@ -111,6 +119,11 @@ private static void register(MediaType[] acceptedMediaTypes, Map<String, MediaTy

formatToMediaType = Map.copyOf(formatMap);
typeWithSubtypeToMediaType = Map.copyOf(typeMap);
uniqueIdToMediaType = Map.copyOf(uniqueIdMap);
}

public static MediaType fromUniqueId(int id) {
return uniqueIdToMediaType.get(id);
}

public static MediaType fromMediaType(String mediaType) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.core.xcontent;

import org.opensearch.Version;
import org.opensearch.common.io.stream.BytesStreamOutput;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.test.OpenSearchTestCase;

import java.io.IOException;

public class MediaTypeSerializationTests extends OpenSearchTestCase {

public void testRoundtrip() throws IOException {
try (BytesStreamOutput output = new BytesStreamOutput()) {
XContentType.JSON.writeTo(output);
XContentType.SMILE.writeTo(output);
XContentType.YAML.writeTo(output);
XContentType.CBOR.writeTo(output);
try (StreamInput in = output.bytes().streamInput()) {
assertEquals(XContentType.JSON, MediaType.readFrom(in));
assertEquals(XContentType.SMILE, MediaType.readFrom(in));
assertEquals(XContentType.YAML, MediaType.readFrom(in));
assertEquals(XContentType.CBOR, MediaType.readFrom(in));
}
}
}

public void testHardcodedOrdinals() throws IOException {
try (BytesStreamOutput output = new BytesStreamOutput()) {
output.writeVInt(0);
output.writeVInt(1);
output.writeVInt(2);
output.writeVInt(3);
try (StreamInput in = output.bytes().streamInput()) {
assertEquals(XContentType.JSON, MediaType.readFrom(in));
assertEquals(XContentType.SMILE, MediaType.readFrom(in));
assertEquals(XContentType.YAML, MediaType.readFrom(in));
assertEquals(XContentType.CBOR, MediaType.readFrom(in));
}
}
}

public void testBackwardsCompatibilityWithSerializedEnums() throws IOException {
// Prior to version 2.10, OpenSearch would serialize XContentType as enums, which
// writes the ordinal as a VInt. This test ensure the new MediaType.readFrom method is
// functionally compatible with this previous approach.
try (BytesStreamOutput output = new BytesStreamOutput()) {
output.writeEnum(XContentType.JSON);
output.writeEnum(XContentType.SMILE);
output.writeEnum(XContentType.YAML);
output.writeEnum(XContentType.CBOR);
try (StreamInput in = output.bytes().streamInput()) {
assertEquals(XContentType.JSON, MediaType.readFrom(in));
assertEquals(XContentType.SMILE, MediaType.readFrom(in));
assertEquals(XContentType.YAML, MediaType.readFrom(in));
assertEquals(XContentType.CBOR, MediaType.readFrom(in));
}
}
}

public void testStringVersion() throws IOException {
try (BytesStreamOutput output = new BytesStreamOutput()) {
output.writeString(XContentType.JSON.mediaType());
output.writeString(XContentType.SMILE.mediaType());
output.writeString(XContentType.YAML.mediaType());
output.writeString(XContentType.CBOR.mediaType());
try (StreamInput in = output.bytes().streamInput()) {
in.setVersion(Version.V_2_11_0);
assertEquals(XContentType.JSON, MediaType.readFrom(in));
assertEquals(XContentType.SMILE, MediaType.readFrom(in));
assertEquals(XContentType.YAML, MediaType.readFrom(in));
assertEquals(XContentType.CBOR, MediaType.readFrom(in));
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import com.fasterxml.jackson.dataformat.cbor.CBORConstants;
import com.fasterxml.jackson.dataformat.smile.SmileConstants;

import org.opensearch.Version;
import org.opensearch.common.annotation.PublicApi;
import org.opensearch.common.xcontent.cbor.CborXContent;
import org.opensearch.common.xcontent.json.JsonXContent;
Expand Down Expand Up @@ -260,6 +261,15 @@ public String format() {

@Override
public void writeTo(StreamOutput output) throws IOException {
output.writeString(this.mediaType());
if (output.getVersion().onOrAfter(Version.V_3_0_0)) {
output.writeVInt(uniqueId());
} else {
output.writeString(this.mediaType());
}
}

@Override
public int uniqueId() {
return this.ordinal();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -254,11 +254,7 @@ protected PercolateQueryBuilder(String field, Supplier<BytesReference> documentS
}
documents = in.readList(StreamInput::readBytesReference);
if (documents.isEmpty() == false) {
if (in.getVersion().onOrAfter(Version.V_2_10_0)) {
documentXContentType = in.readMediaType();
} else {
documentXContentType = in.readEnum(XContentType.class);
}
documentXContentType = MediaType.readFrom(in);
} else {
documentXContentType = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,11 +70,7 @@ public PutStoredScriptRequest(StreamInput in) throws IOException {
super(in);
id = in.readOptionalString();
content = in.readBytesReference();
if (in.getVersion().onOrAfter(Version.V_2_10_0)) {
mediaType = in.readMediaType();
} else {
mediaType = in.readEnum(XContentType.class);
}
mediaType = MediaType.readFrom(in);
context = in.readOptionalString();
source = new StoredScriptSource(in);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,7 @@ public IndexRequest(@Nullable ShardId shardId, StreamInput in) throws IOExceptio
isRetry = in.readBoolean();
autoGeneratedTimestamp = in.readLong();
if (in.readBoolean()) {
if (in.getVersion().onOrAfter(Version.V_2_10_0)) {
contentType = in.readMediaType();
} else {
contentType = in.readEnum(XContentType.class);
}
contentType = MediaType.readFrom(in);
} else {
contentType = null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,7 @@ public PutPipelineRequest(StreamInput in) throws IOException {
super(in);
id = in.readString();
source = in.readBytesReference();
if (in.getVersion().onOrAfter(Version.V_2_10_0)) {
mediaType = in.readMediaType();
} else {
mediaType = in.readEnum(XContentType.class);
}
mediaType = MediaType.readFrom(in);
}

PutPipelineRequest() {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,7 @@ public SimulatePipelineRequest(BytesReference source, MediaType mediaType) {
id = in.readOptionalString();
verbose = in.readBoolean();
source = in.readBytesReference();
if (in.getVersion().onOrAfter(Version.V_2_10_0)) {
mediaType = in.readMediaType();
} else {
mediaType = in.readEnum(XContentType.class);
}
mediaType = MediaType.readFrom(in);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,7 @@ public PutSearchPipelineRequest(StreamInput in) throws IOException {
super(in);
id = in.readString();
source = in.readBytesReference();
if (in.getVersion().onOrAfter(Version.V_2_10_0)) {
mediaType = in.readMediaType();
} else {
mediaType = in.readEnum(XContentType.class);
}
mediaType = MediaType.readFrom(in);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,7 @@ public TermVectorsRequest() {}

if (in.readBoolean()) {
doc = in.readBytesReference();
if (in.getVersion().onOrAfter(Version.V_2_10_0)) {
mediaType = in.readMediaType();
} else {
mediaType = in.readEnum(XContentType.class);
}
mediaType = MediaType.readFrom(in);
}
routing = in.readOptionalString();
preference = in.readOptionalString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,7 @@ public ExtensionRestRequest(StreamInput in) throws IOException {
params = in.readMap(StreamInput::readString, StreamInput::readString);
headers = in.readMap(StreamInput::readString, StreamInput::readStringList);
if (in.readBoolean()) {
if (in.getVersion().onOrAfter(Version.V_2_10_0)) {
mediaType = in.readMediaType();
} else {
mediaType = in.readEnum(XContentType.class);
}
mediaType = MediaType.readFrom(in);
}
content = in.readBytesReference();
principalIdentifierToken = in.readString();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,7 @@ public Item(@Nullable String index, XContentBuilder doc) {
}
if (in.readBoolean()) {
doc = (BytesReference) in.readGenericValue();
if (in.getVersion().onOrAfter(Version.V_2_10_0)) {
mediaType = in.readMediaType();
} else {
mediaType = in.readEnum(XContentType.class);
}
mediaType = MediaType.readFrom(in);
} else {
id = in.readString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public static PipelineConfiguration readFrom(StreamInput in) throws IOException
return new PipelineConfiguration(
in.readString(),
in.readBytesReference(),
in.getVersion().onOrAfter(Version.V_2_10_0) ? in.readMediaType() : in.readEnum(XContentType.class)
MediaType.readFrom(in)
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ public static PipelineConfiguration readFrom(StreamInput in) throws IOException
return new PipelineConfiguration(
in.readString(),
in.readBytesReference(),
in.getVersion().onOrAfter(Version.V_2_10_0) ? in.readMediaType() : in.readEnum(XContentType.class)
MediaType.readFrom(in)
);
}

Expand Down

0 comments on commit bd090d8

Please sign in to comment.