Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(protobuf) Additional protobuf features #4493

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies {
datahub "com.google.protobuf:protobuf-java:$protobuf_version"
datahub 'org.jgrapht:jgrapht-core:1.5.1'
datahub 'com.google.guava:guava:27.0.1-jre'
datahub 'com.google.code.gson:gson:2.8.6'
}

sourceSets {
Expand Down
Binary file not shown.
Binary file not shown.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,70 +16,122 @@ import "google/protobuf/descriptor.proto";

*/
enum DataHubMetadataType {
PROPERTY = 0; // Datahub Custom Property
TAG = 1; // Datahub Tag
TERM = 2; // Datahub Term
}

/*
Example below: The following is not required for annotation processing. This is an example
of creating an annotation using an enum.
*/

enum MetaEnumExample {
UNKNOWN = 0;
ENTITY = 1;
EVENT = 2;
PROPERTY = 0; // Datahub Custom Property
TAG = 1; // Datahub Tag
TAG_LIST = 2; // comma delimited string
TERM = 3; // Datahub Term
OWNER = 4; // Datahub Owner
DOMAIN = 5; // Datahub Domain
}

// Assuming Glossary Term defined from bootstrap example
enum Classification {
HighlyConfidential = 0;
Confidential = 1;
Sensitive = 2;
HighlyConfidential = 0;
Confidential = 1;
Sensitive = 2;
}

message datahubField {
extend google.protobuf.FieldOptions {
// Required: Mark option field with how to export to DataHub in one or more places.
repeated DataHubMetadataType type = 5000;

// Set true if the field is a primary key. This works for any boolean with `primary_key` in it.
bool is_primary_key = 5010;
}
}

message securityField {
extend google.protobuf.FieldOptions {

// Extract classification field option as a Term, either works
string classification = 5100 [(datahubField.type) = TERM];
Classification classification_enum = 5101 [(datahubField.type) = TERM];
}
}

message field {
extend google.protobuf.FieldOptions {

string tags = 5150 [(datahubField.type) = TAG_LIST];
}
}

message fld {
extend google.protobuf.FieldOptions {
// Required: Mark option field with how to export to DataHub in one or more places.
repeated meta.DataHubMetadataType type = 6000;
message ownership {
extend google.protobuf.MessageOptions {

/*
Examples below: The following is not required for annotation processing.
*/
repeated string team = 5200 [(datahubField.type) = OWNER, (datahubField.type) = PROPERTY];
string data_steward = 5201 [(datahubField.type) = OWNER];
string domain = 5202 [(datahubField.type) = DOMAIN, (datahubField.type) = PROPERTY];
}
}

message security {
extend google.protobuf.MessageOptions {

// Set true if the field is a primary key. This works for any boolean with `primary_key` in it.
bool is_primary_key = 6010;
// Place the classification term at the Message/Dataset level, either string or enum is supported
string classification = 5300 [(datahubField.type) = TERM, (datahubField.type) = PROPERTY];
Classification classification_enum = 5301 [(datahubField.type) = TERM, (datahubField.type) = PROPERTY];
}
}

// Extract classification field option as a Term, either works
string classification = 6001 [(meta.fld.type) = TERM];
meta.Classification classification_enum = 6002 [(meta.fld.type) = TERM];
message kafka {
extend google.protobuf.MessageOptions {

// Expose this option as a tag on the field.
string product_type = 70004 [(meta.fld.type) = TAG];
bool product_type_bool = 70005 [(meta.fld.type) = TAG];
meta.MetaEnumExample product_type_enum = 70006 [(meta.fld.type) = TAG];
}
repeated string topics = 5400 [(datahubField.type) = PROPERTY];
}
}

message msg {
extend google.protobuf.MessageOptions {
/*
Examples below: The following is not required for annotation processing.
*/

// Place the classification term at the Message/Dataset level, either string or enum is supported
string classification = 4000 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY];
meta.Classification classification_enum = 4001 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY];

// Attach these Message/Dataset options as a tag and property.
string product = 5001 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];
string project = 5002 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];
string team = 5003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];

string domain = 60003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];
meta.MetaEnumExample type = 60004 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];
bool bool_feature = 60005 [(meta.fld.type) = TAG];
string alert_channel = 60007 [(meta.fld.type) = PROPERTY];
}
enum Frequency {
REALTIME = 0;
DAILY = 1;
WEEKLY = 2;
MONTHLY = 3;
YEARLY = 4;
}

message lifecycle {
extend google.protobuf.MessageOptions {

bool archived = 5500 [(datahubField.type) = TAG, (datahubField.type) = PROPERTY];
Frequency frequency = 5510 [(datahubField.type) = TAG, (datahubField.type) = PROPERTY];
string ttl = 5520 [(datahubField.type) = TAG];
}
}

enum MessageType {
ENTITY = 0;
EVENT = 1;
IMPRESSION = 2;
}

message message {
extend google.protobuf.MessageOptions {

string tags = 5600 [(datahubField.type) = TAG_LIST];
MessageType type = 5610 [(datahubField.type) = TAG, (datahubField.type) = PROPERTY];
}
}

message props {
extend google.protobuf.MessageOptions {

string prop1 = 5701 [(datahubField.type) = PROPERTY];
bool prop2 = 5702 [(datahubField.type) = PROPERTY];
MessageType prop3 = 5703 [(datahubField.type) = PROPERTY];

repeated string prop4 = 5704 [(datahubField.type) = PROPERTY];
repeated MessageType prop6 = 5706 [(datahubField.type) = PROPERTY];
}
}

message tags {
extend google.protobuf.MessageOptions {

string tag_str = 5801 [(datahubField.type) = TAG];
bool tag_bool = 5802 [(datahubField.type) = TAG];
MessageType tag_enum = 5803 [(datahubField.type) = TAG];

string tag_list = 5804 [(datahubField.type) = TAG_LIST];
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
syntax = "proto3";
package protobuf.clickstream;

import "protobuf/meta/meta.proto";
import "google/protobuf/timestamp.proto";


/**
Clickstream data
**/
message Click {
option(meta.message.type) = EVENT;

option(meta.kafka.topics) = "clickstream_clicks";

option(meta.lifecycle.frequency) = REALTIME;
option(meta.lifecycle.ttl) = "180d";
option(meta.lifecycle.archived) = true;

google.protobuf.Timestamp timestamp = 1; // event timestamp
map<string, uint32> map_field = 7; // https://developers.google.com/protocol-buffers/docs/proto3#maps
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
syntax = "proto3";
package protobuf.clickstream;

import "protobuf/meta/meta.proto";
import "protobuf/v1/clickstream/ClickEvent.proto";
import "protobuf/v1/clickstream/SearchEvent.proto";
import "protobuf/v1/clickstream/ImpressionEvent.proto";

/**
Represents an internet browser.

Slack channel: #getting-started

Git owner: @datahub-project/johndoe

References:
https://en.wikipedia.org/wiki/Web_browser
**/
message Device {
option(meta.ownership.domain) = "Marketing";
option(meta.ownership.team) = "Analytics";
option(meta.ownership.team) = "IT";
option(meta.ownership.data_steward) = "corpUser:John Doe";

option(meta.message.type) = ENTITY;

option(meta.kafka.topics) = "devices";

// the device specific identifier
string device_id = 1 [(meta.datahubField.is_primary_key) = true];

// the device type associated with this event
DeviceType device_type = 2;

// the user ids associated with this device
repeated string user_id = 3;

// device's user agent
// https://en.wikipedia.org/wiki/User_agent
string user_agent = 4;

// device's ip address
// https://en.wikipedia.org/wiki/IP_address
string ip_address = 5
[(meta.securityField.classification) = "Classification.Sensitive"];

// Search history
repeated Search searches = 100;

// Impression history
repeated Impression impressions = 101;

// Click history
repeated Click clicks = 102;
}

enum DeviceType {
DESKTOP = 0;
MOBILE = 1;
TABLET = 2;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
syntax = "proto3";
package protobuf.clickstream;

import "protobuf/meta/meta.proto";
import "google/protobuf/timestamp.proto";

/**
Clickstream impressions
**/
message Impression {
option(meta.message.type) = EVENT;
option(meta.kafka.topics) = "clickstream_impressions";

option(meta.props.prop1) = "prop1 value";
option(meta.props.prop2) = true;
option(meta.props.prop3) = EVENT;

option(meta.props.prop4) = "value1";
option(meta.props.prop4) = "value2";
option(meta.props.prop6) = EVENT;
option(meta.props.prop6) = IMPRESSION;

option(meta.tags.tag_str) = "value1";
option(meta.tags.tag_bool) = true;
// option(meta.tags.tag_enum) = EVENT;
option(meta.tags.tag_list) = "a, b, c";

option(meta.security.classification_enum) = HighlyConfidential;
option(meta.security.classification) = "Classification.Sensitive";

google.protobuf.Timestamp timestamp = 1; // event timestamp
string details = 2; // event details
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
syntax = "proto3";
package protobuf.clickstream;

import "protobuf/meta/meta.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";


/**
Search event
**/
message Search {
option(meta.message.type) = EVENT;

option(meta.kafka.topics) = "clickstream_searches";

option(meta.lifecycle.frequency) = REALTIME;
option(meta.lifecycle.ttl) = "180d";
option(meta.lifecycle.archived) = true;

google.protobuf.Timestamp timestamp = 1; // event timestamp
google.protobuf.StringValue search_term = 2; // search term
google.protobuf.Int64Value results = 3; // results displayed
}
Loading