Skip to content

Commit

Permalink
Fix the handling of null default values. (#182)
Browse files Browse the repository at this point in the history
* Fix the way a null default value is handled.

This PR concerns calls to `createSchemaField()` where the default value
passed in is null.

Null as a default value is only really valid in two situations:
(1) When the field is of type "null", or
(2) When the field is of type union where the first entry in the union
    is "null".

Case (1) is a weird case that nobody really cares about in practice.

The Avro libraries themselves (all versions) recognize case (2) (and
case (2) only) and convert the null into a NullNode object. In all
other cases, the null is left alone (no conversion to NullNode).

So, we do the same here.

Note: The compat helper's behaviour was inconsistent before this PR.
In Avro 1.8 and earlier, we always converted a null default into an
object; In Avro 1.9 and 1.10, null was never converted. This PR makes
them behave consistently.

* Use the field's schema instead of passing it in as a param.

Also add much better comments and tests.
  • Loading branch information
srramach authored Aug 24, 2021
1 parent fb7abe5 commit 5cd9cf0
Show file tree
Hide file tree
Showing 20 changed files with 1,076 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,20 @@ public static void traverseSchema(Schema schema, SchemaVisitor visitor) {
traverseSchema(schema, visitor, visited);
}

/**
* Returns true if a null value is allowed as the default value for a field
* (given its schema). It is valid if and only if:
* (1) The field's type is null, or
* (2) The field is a union, where the first alternative type is null.
*/
public static boolean isNullAValidDefaultForSchema(Schema schema) {
return schema != null &&
(schema.getType() == Schema.Type.NULL ||
schema.getType() == Schema.Type.UNION &&
!schema.getTypes().isEmpty() &&
schema.getTypes().get(0).getType() == Schema.Type.NULL);
}

/**
* given a (parent) schema, and a field name, find the schema for that field.
* if the field is a union, returns the (only) non-null branch of the union
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ public class Jackson1Utils {
*/
public static JsonNode toJsonNode(Object datum) {
if (datum == null) {
return JsonNodeFactory.instance.nullNode();
return null;
} else if (datum instanceof JsonNode) {
return (JsonNode) datum;
} else if (datum instanceof byte[]) {
try {
return JsonNodeFactory.instance.textNode(new String((byte[]) datum, BYTES_CHARSET));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

package com.linkedin.avroutil1.compatibility.avro110;

import com.linkedin.avroutil1.compatibility.AvroSchemaUtil;
import com.linkedin.avroutil1.compatibility.FieldBuilder;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field.Order;
Expand Down Expand Up @@ -37,6 +38,10 @@ public FieldBuilder110(String name) {
@Override
public FieldBuilder setSchema(Schema schema) {
_schema = schema;
if (_defaultVal == Schema.Field.NULL_DEFAULT_VALUE) {
// Check if null is still a valid default for the schema.
setDefault(null);
}
return this;
}

Expand All @@ -48,6 +53,23 @@ public FieldBuilder setDoc(String doc) {

@Override
public FieldBuilder setDefault(Object defaultValue) {
// If defaultValue is null, it's ambiguous. It could mean either of these:
// (1) The default value was not specified, or
// (2) The default value was specified to be null.
//
// To disambiguate, we check to see if null is a valid value for the
// field's schema. If it is, we convert it into a special object (marker)
// that's known to Avro. If it's not, it's case (1); we leave it as is.
//
// This means there's no way (using the helper) to create a field whose
// schema allows null as a default, but you want to say "no default was
// specified". That's a small price to pay for not bloating the helper API.
//
// Note that we don't validate all possible default values against the
// schema. That's Avro's job. We only check for the ambiguous case here.
if (defaultValue == null && AvroSchemaUtil.isNullAValidDefaultForSchema(_schema)) {
defaultValue = Schema.Field.NULL_DEFAULT_VALUE;
}
_defaultVal = defaultValue;
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

package com.linkedin.avroutil1.compatibility.avro14;

import com.linkedin.avroutil1.compatibility.AvroSchemaUtil;
import com.linkedin.avroutil1.compatibility.FieldBuilder;
import com.linkedin.avroutil1.compatibility.Jackson1Utils;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field.Order;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.NullNode;

import java.lang.reflect.Field;
import java.util.Map;
Expand Down Expand Up @@ -52,6 +54,10 @@ public FieldBuilder14(String name) {
@Override
public FieldBuilder setSchema(Schema schema) {
_schema = schema;
if (_defaultVal == NullNode.getInstance()) {
// Check if null is still a valid default for the schema.
setDefault((Object) null);
}
return this;
}

Expand All @@ -63,6 +69,23 @@ public FieldBuilder setDoc(String doc) {

@Override
public FieldBuilder setDefault(Object defaultValue) {
// If defaultValue is null, it's ambiguous. It could mean either of these:
// (1) The default value was not specified, or
// (2) The default value was specified to be null.
//
// To disambiguate, we check to see if null is a valid value for the
// field's schema. If it is, we convert it into a special object (marker)
// that's known to Avro. If it's not, it's case (1); we leave it as is.
//
// This means there's no way (using the helper) to create a field whose
// schema allows null as a default, but you want to say "no default was
// specified". That's a small price to pay for not bloating the helper API.
//
// Note that we don't validate all possible default values against the
// schema. That's Avro's job. We only check for the ambiguous case here.
if (defaultValue == null && AvroSchemaUtil.isNullAValidDefaultForSchema(_schema)) {
return setDefault(NullNode.getInstance());
}
return setDefault(Jackson1Utils.toJsonNode(defaultValue));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

package com.linkedin.avroutil1.compatibility.avro15;

import com.linkedin.avroutil1.compatibility.AvroSchemaUtil;
import com.linkedin.avroutil1.compatibility.FieldBuilder;
import com.linkedin.avroutil1.compatibility.Jackson1Utils;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field.Order;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.NullNode;

import java.lang.reflect.Field;
import java.util.Map;
Expand Down Expand Up @@ -52,6 +54,10 @@ public FieldBuilder15(String name) {
@Override
public FieldBuilder setSchema(Schema schema) {
_schema = schema;
if (_defaultVal == NullNode.getInstance()) {
// Check if null is still a valid default for the schema.
setDefault((Object) null);
}
return this;
}

Expand All @@ -63,6 +69,23 @@ public FieldBuilder setDoc(String doc) {

@Override
public FieldBuilder setDefault(Object defaultValue) {
// If defaultValue is null, it's ambiguous. It could mean either of these:
// (1) The default value was not specified, or
// (2) The default value was specified to be null.
//
// To disambiguate, we check to see if null is a valid value for the
// field's schema. If it is, we convert it into a special object (marker)
// that's known to Avro. If it's not, it's case (1); we leave it as is.
//
// This means there's no way (using the helper) to create a field whose
// schema allows null as a default, but you want to say "no default was
// specified". That's a small price to pay for not bloating the helper API.
//
// Note that we don't validate all possible default values against the
// schema. That's Avro's job. We only check for the ambiguous case here.
if (defaultValue == null && AvroSchemaUtil.isNullAValidDefaultForSchema(_schema)) {
return setDefault(NullNode.getInstance());
}
return setDefault(Jackson1Utils.toJsonNode(defaultValue));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

package com.linkedin.avroutil1.compatibility.avro16;

import com.linkedin.avroutil1.compatibility.AvroSchemaUtil;
import com.linkedin.avroutil1.compatibility.FieldBuilder;
import com.linkedin.avroutil1.compatibility.Jackson1Utils;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field.Order;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.NullNode;

import java.util.Map;

Expand Down Expand Up @@ -39,6 +41,10 @@ public FieldBuilder16(String name) {
@Override
public FieldBuilder setSchema(Schema schema) {
_schema = schema;
if (_defaultVal == NullNode.getInstance()) {
// Check if null is still a valid default for the schema.
setDefault((Object) null);
}
return this;
}

Expand All @@ -50,6 +56,23 @@ public FieldBuilder setDoc(String doc) {

@Override
public FieldBuilder setDefault(Object defaultValue) {
// If defaultValue is null, it's ambiguous. It could mean either of these:
// (1) The default value was not specified, or
// (2) The default value was specified to be null.
//
// To disambiguate, we check to see if null is a valid value for the
// field's schema. If it is, we convert it into a special object (marker)
// that's known to Avro. If it's not, it's case (1); we leave it as is.
//
// This means there's no way (using the helper) to create a field whose
// schema allows null as a default, but you want to say "no default was
// specified". That's a small price to pay for not bloating the helper API.
//
// Note that we don't validate all possible default values against the
// schema. That's Avro's job. We only check for the ambiguous case here.
if (defaultValue == null && AvroSchemaUtil.isNullAValidDefaultForSchema(_schema)) {
return setDefault(NullNode.getInstance());
}
return setDefault(Jackson1Utils.toJsonNode(defaultValue));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

package com.linkedin.avroutil1.compatibility.avro17;

import com.linkedin.avroutil1.compatibility.AvroSchemaUtil;
import com.linkedin.avroutil1.compatibility.FieldBuilder;
import com.linkedin.avroutil1.compatibility.Jackson1Utils;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field.Order;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.NullNode;

import java.util.Map;

Expand Down Expand Up @@ -39,6 +41,10 @@ public FieldBuilder17(String name) {
@Override
public FieldBuilder setSchema(Schema schema) {
_schema = schema;
if (_defaultVal == NullNode.getInstance()) {
// Check if null is still a valid default for the schema.
setDefault((Object) null);
}
return this;
}

Expand All @@ -50,6 +56,23 @@ public FieldBuilder setDoc(String doc) {

@Override
public FieldBuilder setDefault(Object defaultValue) {
// If defaultValue is null, it's ambiguous. It could mean either of these:
// (1) The default value was not specified, or
// (2) The default value was specified to be null.
//
// To disambiguate, we check to see if null is a valid value for the
// field's schema. If it is, we convert it into a special object (marker)
// that's known to Avro. If it's not, it's case (1); we leave it as is.
//
// This means there's no way (using the helper) to create a field whose
// schema allows null as a default, but you want to say "no default was
// specified". That's a small price to pay for not bloating the helper API.
//
// Note that we don't validate all possible default values against the
// schema. That's Avro's job. We only check for the ambiguous case here.
if (defaultValue == null && AvroSchemaUtil.isNullAValidDefaultForSchema(_schema)) {
return setDefault(NullNode.getInstance());
}
return setDefault(Jackson1Utils.toJsonNode(defaultValue));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@

package com.linkedin.avroutil1.compatibility.avro18;

import com.linkedin.avroutil1.compatibility.AvroSchemaUtil;
import com.linkedin.avroutil1.compatibility.FieldBuilder;
import com.linkedin.avroutil1.compatibility.Jackson1Utils;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field.Order;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.NullNode;

import java.util.Map;

Expand Down Expand Up @@ -42,6 +44,10 @@ public FieldBuilder18(String name) {
@Override
public FieldBuilder setSchema(Schema schema) {
_schema = schema;
if (_defaultVal == NullNode.getInstance()) {
// Check if null is still a valid default for the schema.
setDefault((Object) null);
}
return this;
}

Expand All @@ -53,6 +59,23 @@ public FieldBuilder setDoc(String doc) {

@Override
public FieldBuilder setDefault(Object defaultValue) {
// If defaultValue is null, it's ambiguous. It could mean either of these:
// (1) The default value was not specified, or
// (2) The default value was specified to be null.
//
// To disambiguate, we check to see if null is a valid value for the
// field's schema. If it is, we convert it into a special object (marker)
//
// This means there's no way (using the helper) to create a field whose
// schema allows null as a default, but you want to say "no default was
// specified". That's a small price to pay for not bloating the helper API.
// that's known to Avro. If it's not, it's case (1); we leave it as is.
//
// Note that we don't validate all possible default values against the
// schema. That's Avro's job. We only check for the ambiguous case here.
if (defaultValue == null && AvroSchemaUtil.isNullAValidDefaultForSchema(_schema)) {
return setDefault(NullNode.getInstance());
}
return setDefault(Jackson1Utils.toJsonNode(defaultValue));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

package com.linkedin.avroutil1.compatibility.avro19;

import com.linkedin.avroutil1.compatibility.AvroSchemaUtil;
import com.linkedin.avroutil1.compatibility.FieldBuilder;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field.Order;
Expand Down Expand Up @@ -37,6 +38,10 @@ public FieldBuilder19(String name) {
@Override
public FieldBuilder setSchema(Schema schema) {
_schema = schema;
if (_defaultVal == Schema.Field.NULL_DEFAULT_VALUE) {
// Check if null is still a valid default for the schema.
setDefault(null);
}
return this;
}

Expand All @@ -48,6 +53,23 @@ public FieldBuilder setDoc(String doc) {

@Override
public FieldBuilder setDefault(Object defaultValue) {
// If defaultValue is null, it's ambiguous. It could mean either of these:
// (1) The default value was not specified, or
// (2) The default value was specified to be null.
//
// To disambiguate, we check to see if null is a valid value for the
// field's schema. If it is, we convert it into a special object (marker)
// that's known to Avro. If it's not, it's case (1); we leave it as is.
//
// This means there's no way (using the helper) to create a field whose
// schema allows null as a default, but you want to say "no default was
// specified". That's a small price to pay for not bloating the helper API.
//
// Note that we don't validate all possible default values against the
// schema. That's Avro's job. We only check for the ambiguous case here.
if (defaultValue == null && AvroSchemaUtil.isNullAValidDefaultForSchema(_schema)) {
defaultValue = Schema.Field.NULL_DEFAULT_VALUE;
}
_defaultVal = defaultValue;
return this;
}
Expand Down
Loading

0 comments on commit 5cd9cf0

Please sign in to comment.