Skip to content

Commit

Permalink
feat(integration): protobuf - additional annotations and features (#4493
Browse files Browse the repository at this point in the history
)
  • Loading branch information
leifker authored Mar 30, 2022
1 parent 14df3cf commit 9cc6d8a
Show file tree
Hide file tree
Showing 44 changed files with 1,081 additions and 243 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies {
datahub "com.google.protobuf:protobuf-java:$protobuf_version"
datahub 'org.jgrapht:jgrapht-core:1.5.1'
datahub 'com.google.guava:guava:27.0.1-jre'
datahub 'com.google.code.gson:gson:2.8.6'
}

sourceSets {
Expand Down
Binary file not shown.
Binary file not shown.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,70 +16,122 @@ import "google/protobuf/descriptor.proto";
*/
enum DataHubMetadataType {
PROPERTY = 0; // Datahub Custom Property
TAG = 1; // Datahub Tag
TERM = 2; // Datahub Term
}

/*
Example below: The following is not required for annotation processing. This is an example
of creating an annotation using an enum.
*/

enum MetaEnumExample {
UNKNOWN = 0;
ENTITY = 1;
EVENT = 2;
PROPERTY = 0; // Datahub Custom Property
TAG = 1; // Datahub Tag
TAG_LIST = 2; // comma delimited string
TERM = 3; // Datahub Term
OWNER = 4; // Datahub Owner
DOMAIN = 5; // Datahub Domain
}

// Assuming Glossary Term defined from bootstrap example
enum Classification {
HighlyConfidential = 0;
Confidential = 1;
Sensitive = 2;
HighlyConfidential = 0;
Confidential = 1;
Sensitive = 2;
}

message datahubField {
extend google.protobuf.FieldOptions {
// Required: Mark option field with how to export to DataHub in one or more places.
repeated DataHubMetadataType type = 5000;

// Set true if the field is a primary key. This works for any boolean with `primary_key` in it.
bool is_primary_key = 5010;
}
}

message securityField {
extend google.protobuf.FieldOptions {

// Extract classification field option as a Term, either works
string classification = 5100 [(datahubField.type) = TERM];
Classification classification_enum = 5101 [(datahubField.type) = TERM];
}
}

message field {
extend google.protobuf.FieldOptions {

string tags = 5150 [(datahubField.type) = TAG_LIST];
}
}

message fld {
extend google.protobuf.FieldOptions {
// Required: Mark option field with how to export to DataHub in one or more places.
repeated meta.DataHubMetadataType type = 6000;
message ownership {
extend google.protobuf.MessageOptions {

/*
Examples below: The following is not required for annotation processing.
*/
repeated string team = 5200 [(datahubField.type) = OWNER, (datahubField.type) = PROPERTY];
string data_steward = 5201 [(datahubField.type) = OWNER];
string domain = 5202 [(datahubField.type) = DOMAIN, (datahubField.type) = PROPERTY];
}
}

message security {
extend google.protobuf.MessageOptions {

// Set true if the field is a primary key. This works for any boolean with `primary_key` in it.
bool is_primary_key = 6010;
// Place the classification term at the Message/Dataset level, either string or enum is supported
string classification = 5300 [(datahubField.type) = TERM, (datahubField.type) = PROPERTY];
Classification classification_enum = 5301 [(datahubField.type) = TERM, (datahubField.type) = PROPERTY];
}
}

// Extract classification field option as a Term, either works
string classification = 6001 [(meta.fld.type) = TERM];
meta.Classification classification_enum = 6002 [(meta.fld.type) = TERM];
message kafka {
extend google.protobuf.MessageOptions {

// Expose this option as a tag on the field.
string product_type = 70004 [(meta.fld.type) = TAG];
bool product_type_bool = 70005 [(meta.fld.type) = TAG];
meta.MetaEnumExample product_type_enum = 70006 [(meta.fld.type) = TAG];
}
repeated string topics = 5400 [(datahubField.type) = PROPERTY];
}
}

message msg {
extend google.protobuf.MessageOptions {
/*
Examples below: The following is not required for annotation processing.
*/

// Place the classification term at the Message/Dataset level, either string or enum is supported
string classification = 4000 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY];
meta.Classification classification_enum = 4001 [(meta.fld.type) = TERM, (meta.fld.type) = PROPERTY];

// Attach these Message/Dataset options as a tag and property.
string product = 5001 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];
string project = 5002 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];
string team = 5003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];

string domain = 60003 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];
meta.MetaEnumExample type = 60004 [(meta.fld.type) = TAG, (meta.fld.type) = PROPERTY];
bool bool_feature = 60005 [(meta.fld.type) = TAG];
string alert_channel = 60007 [(meta.fld.type) = PROPERTY];
}
enum Frequency {
REALTIME = 0;
DAILY = 1;
WEEKLY = 2;
MONTHLY = 3;
YEARLY = 4;
}

message lifecycle {
extend google.protobuf.MessageOptions {

bool archived = 5500 [(datahubField.type) = TAG, (datahubField.type) = PROPERTY];
Frequency frequency = 5510 [(datahubField.type) = TAG, (datahubField.type) = PROPERTY];
string ttl = 5520 [(datahubField.type) = TAG];
}
}

enum MessageType {
ENTITY = 0;
EVENT = 1;
IMPRESSION = 2;
}

message message {
extend google.protobuf.MessageOptions {

string tags = 5600 [(datahubField.type) = TAG_LIST];
MessageType type = 5610 [(datahubField.type) = TAG, (datahubField.type) = PROPERTY];
}
}

message props {
extend google.protobuf.MessageOptions {

string prop1 = 5701 [(datahubField.type) = PROPERTY];
bool prop2 = 5702 [(datahubField.type) = PROPERTY];
MessageType prop3 = 5703 [(datahubField.type) = PROPERTY];

repeated string prop4 = 5704 [(datahubField.type) = PROPERTY];
repeated MessageType prop6 = 5706 [(datahubField.type) = PROPERTY];
}
}

message tags {
extend google.protobuf.MessageOptions {

string tag_str = 5801 [(datahubField.type) = TAG];
bool tag_bool = 5802 [(datahubField.type) = TAG];
MessageType tag_enum = 5803 [(datahubField.type) = TAG];

string tag_list = 5804 [(datahubField.type) = TAG_LIST];
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
syntax = "proto3";
package protobuf.clickstream;

import "protobuf/meta/meta.proto";
import "google/protobuf/timestamp.proto";


/**
Clickstream data
**/
message Click {
option(meta.message.type) = EVENT;

option(meta.kafka.topics) = "clickstream_clicks";

option(meta.lifecycle.frequency) = REALTIME;
option(meta.lifecycle.ttl) = "180d";
option(meta.lifecycle.archived) = true;

google.protobuf.Timestamp timestamp = 1; // event timestamp
map<string, uint32> map_field = 7; // https://developers.google.com/protocol-buffers/docs/proto3#maps
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
syntax = "proto3";
package protobuf.clickstream;

import "protobuf/meta/meta.proto";
import "protobuf/v1/clickstream/ClickEvent.proto";
import "protobuf/v1/clickstream/SearchEvent.proto";
import "protobuf/v1/clickstream/ImpressionEvent.proto";

/**
Represents an internet browser.
Slack channel: #getting-started
Git owner: @datahub-project/johndoe
References:
https://en.wikipedia.org/wiki/Web_browser
**/
message Device {
option(meta.ownership.domain) = "Marketing";
option(meta.ownership.team) = "Analytics";
option(meta.ownership.team) = "IT";
option(meta.ownership.data_steward) = "corpUser:John Doe";

option(meta.message.type) = ENTITY;

option(meta.kafka.topics) = "devices";

// the device specific identifier
string device_id = 1 [(meta.datahubField.is_primary_key) = true];

// the device type associated with this event
DeviceType device_type = 2;

// the user ids associated with this device
repeated string user_id = 3;

// device's user agent
// https://en.wikipedia.org/wiki/User_agent
string user_agent = 4;

// device's ip address
// https://en.wikipedia.org/wiki/IP_address
string ip_address = 5
[(meta.securityField.classification) = "Classification.Sensitive"];

// Search history
repeated Search searches = 100;

// Impression history
repeated Impression impressions = 101;

// Click history
repeated Click clicks = 102;
}

enum DeviceType {
DESKTOP = 0;
MOBILE = 1;
TABLET = 2;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
syntax = "proto3";
package protobuf.clickstream;

import "protobuf/meta/meta.proto";
import "google/protobuf/timestamp.proto";

/**
Clickstream impressions
**/
message Impression {
option(meta.message.type) = EVENT;
option(meta.kafka.topics) = "clickstream_impressions";

option(meta.props.prop1) = "prop1 value";
option(meta.props.prop2) = true;
option(meta.props.prop3) = EVENT;

option(meta.props.prop4) = "value1";
option(meta.props.prop4) = "value2";
option(meta.props.prop6) = EVENT;
option(meta.props.prop6) = IMPRESSION;

option(meta.tags.tag_str) = "value1";
option(meta.tags.tag_bool) = true;
// option(meta.tags.tag_enum) = EVENT;
option(meta.tags.tag_list) = "a, b, c";

option(meta.security.classification_enum) = HighlyConfidential;
option(meta.security.classification) = "Classification.Sensitive";

google.protobuf.Timestamp timestamp = 1; // event timestamp
string details = 2; // event details
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
syntax = "proto3";
package protobuf.clickstream;

import "protobuf/meta/meta.proto";
import "google/protobuf/timestamp.proto";
import "google/protobuf/wrappers.proto";


/**
Search event
**/
message Search {
option(meta.message.type) = EVENT;

option(meta.kafka.topics) = "clickstream_searches";

option(meta.lifecycle.frequency) = REALTIME;
option(meta.lifecycle.ttl) = "180d";
option(meta.lifecycle.archived) = true;

google.protobuf.Timestamp timestamp = 1; // event timestamp
google.protobuf.StringValue search_term = 2; // search term
google.protobuf.Int64Value results = 3; // results displayed
}
Loading

0 comments on commit 9cc6d8a

Please sign in to comment.