Skip to content

Commit

Permalink
tentative: use string enum instead of string union to avoid two copie…
Browse files Browse the repository at this point in the history
…s of constant string
  • Loading branch information
Lin Jian committed May 21, 2020
1 parent 6487153 commit 18e8c2b
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 83 deletions.
54 changes: 4 additions & 50 deletions sdk/storage/storage-internal-avro/src/AvroConstants.ts
Original file line number Diff line number Diff line change
@@ -1,50 +1,4 @@
export const AvroConstants = {
SYNC_MARKER_SIZE: 16,

// 'O', 'b', 'j', 1
INIT_BYTES: new Uint8Array([79, 98, 106, 1]),

CODEC_KEY: "avro.codec",

SCHEMA_KEY: "avro.schema",

NULL: "null",

BOOLEAN: "boolean",

INT: "int",

LONG: "long",

FLOAT: "float",

DOUBLE: "double",

BYTES: "bytes",

STRING: "string",

RECORD: "record",

ENUM: "enum",

MAP: "map",

ARRAY: "array",

UNION: "union",

FIXED: "fixed",

ALIASES: "aliases",

NAME: "name",

FIELDS: "fields",

TYPE: "type",

SYMBOLS: "symbols",

VALUES: "values"
};
export const AVRO_SYNC_MARKER_SIZE: number = 16;
export const AVRO_INIT_BYTES: Uint8Array = new Uint8Array([79, 98, 106, 1]);
export const AVRO_CODEC_KEY: string = "avro.codec";
export const AVRO_SCHEMA_KEY: string = "avro.schema";
70 changes: 43 additions & 27 deletions sdk/storage/storage-internal-avro/src/AvroParser.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { AvroReadable } from "./AvroReadable";
import { KeyValuePair } from "./utils/utils.common";
import { AvroConstants } from "./AvroConstants";

export class AvroParser {
/**
Expand Down Expand Up @@ -165,8 +164,17 @@ interface RecordField {
type: string | ObjectSchema | (string | ObjectSchema)[]; // Unions may not immediately contain other unions.
}

enum AvroComplex {
RECORD = 'record',
ENUM = 'enum',
ARRAY = 'array',
MAP = 'map',
UNION = 'union',
FIXED = 'fixed',
}

interface ObjectSchema {
type: "record" | "enum" | "array" | "map" | "fixed";
type: Exclude<AvroComplex, AvroComplex.UNION>;
name?: string;
aliases?: string;
fields?: RecordField[];
Expand All @@ -184,7 +192,7 @@ export abstract class AvroType {
public abstract read(stream: AvroReadable): Promise<Object | null>;

/**
* Determinds the AvroType from the Avro Schema.
* Determines the AvroType from the Avro Schema.
*/
public static fromSchema(schema: string | Object): AvroType {
if (typeof schema == "string") {
Expand All @@ -199,14 +207,14 @@ export abstract class AvroType {
private static fromStringSchema(schema: string): AvroType {
// FIXME: simpler way to tell if schema is of type AvroPrimitive?
switch (schema) {
case AvroConstants.NULL:
case AvroConstants.BOOLEAN:
case AvroConstants.INT:
case AvroConstants.LONG:
case AvroConstants.FLOAT:
case AvroConstants.DOUBLE:
case AvroConstants.BYTES:
case AvroConstants.STRING:
case AvroPrimitive.NULL:
case AvroPrimitive.BOOLEAN:
case AvroPrimitive.INT:
case AvroPrimitive.LONG:
case AvroPrimitive.FLOAT:
case AvroPrimitive.DOUBLE:
case AvroPrimitive.BYTES:
case AvroPrimitive.STRING:
return new AvroPrimitiveType(schema as AvroPrimitive);
default:
throw new Error(`Unexpected Avro type ${schema}`);
Expand All @@ -222,10 +230,10 @@ export abstract class AvroType {
// Primitives can be defined as strings or objects
try {
return AvroType.fromStringSchema(type);
} catch (err) {}
} catch (err) { }

switch (type) {
case AvroConstants.RECORD:
case AvroComplex.RECORD:
if (schema.aliases) {
throw new Error(`aliases currently is not supported, schema: ${schema}`);
}
Expand All @@ -241,29 +249,37 @@ export abstract class AvroType {
fields[field.name] = AvroType.fromSchema(field.type);
}
return new AvroRecordType(fields, schema.name);
case AvroConstants.ENUM:
case AvroComplex.ENUM:
if (schema.aliases) {
throw new Error(`aliases currently is not supported, schema: ${schema}`);
}
if (!schema.symbols) {
throw new Error(`Required attribute 'symbols' doesn't exist on schema: ${schema}`);
}
return new AvroEnumType(schema.symbols);
case AvroConstants.MAP:
case AvroComplex.MAP:
if (!schema.values) {
throw new Error(`Required attribute 'values' doesn't exist on schema: ${schema}`);
}
return new AvroMapType(AvroType.fromSchema(schema.values));
case AvroConstants.ARRAY: // Unused today
case AvroConstants.UNION: // Unused today
case AvroConstants.FIXED: // Unused today
case AvroComplex.ARRAY: // Unused today
case AvroComplex.FIXED: // Unused today
default:
throw new Error(`Unexpected Avro type ${type} in ${schema}`);
}
}
}

type AvroPrimitive = "null" | "boolean" | "int " | "long" | "float" | "double" | "bytes" | "string";
enum AvroPrimitive {
NULL = "null",
BOOLEAN = 'boolean',
INT = 'int',
LONG = 'long',
FLOAT = 'float',
DOUBLE = 'double',
BYTES = 'bytes',
STRING = 'string'
}

class AvroPrimitiveType extends AvroType {
private _primitive: AvroPrimitive;
Expand All @@ -275,21 +291,21 @@ class AvroPrimitiveType extends AvroType {

public async read(stream: AvroReadable): Promise<Object | null> {
switch (this._primitive) {
case AvroConstants.NULL:
case AvroPrimitive.NULL:
return await AvroParser.readNull();
case AvroConstants.BOOLEAN:
case AvroPrimitive.BOOLEAN:
return await AvroParser.readBoolean(stream);
case AvroConstants.INT:
case AvroPrimitive.INT:
return await AvroParser.readInt(stream);
case AvroConstants.LONG:
case AvroPrimitive.LONG:
return await AvroParser.readLong(stream);
case AvroConstants.FLOAT:
case AvroPrimitive.FLOAT:
return await AvroParser.readFloat(stream);
case AvroConstants.DOUBLE:
case AvroPrimitive.DOUBLE:
return await AvroParser.readDouble(stream);
case AvroConstants.BYTES:
case AvroPrimitive.BYTES:
return await AvroParser.readBytes(stream);
case AvroConstants.STRING:
case AvroPrimitive.STRING:
return await AvroParser.readString(stream);
default:
throw new Error("Unknown Avro Primitive");
Expand Down
12 changes: 6 additions & 6 deletions sdk/storage/storage-internal-avro/src/AvroReader.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { AvroReadable } from "./AvroReadable";
import { AvroConstants } from "./AvroConstants";
import { AVRO_SYNC_MARKER_SIZE, AVRO_INIT_BYTES, AVRO_CODEC_KEY, AVRO_SCHEMA_KEY } from "./AvroConstants";
import { arraysEqual } from "./utils/utils.common";
import { AvroType, AvroParser } from "./AvroParser";

Expand Down Expand Up @@ -54,9 +54,9 @@ export class AvroReader {
private async initialize() {
const header = await AvroParser.readFixedBytes(
this._headerStream,
AvroConstants.INIT_BYTES.length
AVRO_INIT_BYTES.length
);
if (!arraysEqual(header, AvroConstants.INIT_BYTES)) {
if (!arraysEqual(header, AVRO_INIT_BYTES)) {
throw new Error("Stream is not an Avro file.");
}

Expand All @@ -65,19 +65,19 @@ export class AvroReader {
this._metadata = await AvroParser.readMap(this._headerStream, AvroParser.readString);

// Validate codec
const codec = this._metadata![AvroConstants.CODEC_KEY];
const codec = this._metadata![AVRO_CODEC_KEY];
if (!(codec == undefined || codec == "null")) {
throw new Error("Codecs are not supported");
}

// The 16-byte, randomly-generated sync marker for this file.
this._syncMarker = await AvroParser.readFixedBytes(
this._headerStream,
AvroConstants.SYNC_MARKER_SIZE
AVRO_SYNC_MARKER_SIZE
);

// Parse the schema
const schema = JSON.parse(this._metadata![AvroConstants.SCHEMA_KEY]);
const schema = JSON.parse(this._metadata![AVRO_SCHEMA_KEY]);
this._itemType = AvroType.fromSchema(schema);

if (this._blockOffset == 0) {
Expand Down

0 comments on commit 18e8c2b

Please sign in to comment.