Skip to content

Commit

Permalink
Kinesis connector
Browse files Browse the repository at this point in the history
  • Loading branch information
ankitdixit committed Sep 24, 2019
1 parent fb66853 commit 1bb6309
Show file tree
Hide file tree
Showing 50 changed files with 5,800 additions and 0 deletions.
55 changes: 55 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
<module>presto-jmx</module>
<module>presto-record-decoder</module>
<module>presto-kafka</module>
<module>presto-kinesis</module>
<module>presto-redis</module>
<module>presto-accumulo</module>
<module>presto-cassandra</module>
Expand Down Expand Up @@ -885,6 +886,60 @@
</exclusions>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-kinesis</artifactId>
<version>${dep.aws-sdk.version}</version>
<exclusions>
<exclusion>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>amazon-kinesis-client</artifactId>
<version>1.6.3</version>
<exclusions>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
</exclusion>
<exclusion>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</exclusion>
<exclusion>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</exclusion>
<exclusion>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk</artifactId>
</exclusion>
<exclusion>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-core</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-dynamodb</artifactId>
<version>${dep.aws-sdk.version}</version>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-sts</artifactId>
Expand Down
1 change: 1 addition & 0 deletions presto-docs/src/main/sphinx/connector.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@ from different data sources.
connector/thrift
connector/tpcds
connector/tpch
connector/kinesis
252 changes: 252 additions & 0 deletions presto-docs/src/main/sphinx/connector/kinesis.rst

Large diffs are not rendered by default.

34 changes: 34 additions & 0 deletions presto-kinesis/etc/catalog/kinesis.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Configuration for the kinesis connector
#
# This is a sample configuration file for the Kinesis connector.
# The values given here are the defaults.

# Connector name, usually keep this as kinesis
connector.name=kinesis

kinesis.default-schema=default
kinesis.aws-region=us-east-1
kinesis.hide-internal-columns=false
kinesis.batch-size=10000
kinesis.max-batches=100
kinesis.fetch-attempts=2
kinesis.sleep-time=1000ms

# Use an initial shard iterator type of AT_TIMESTAMP starting
# iteratorOffsetSeconds before the current time
kinesis.iterator-from-timestamp=true
kinesis.iterator-offset-seconds=86400
# info log one line each time a Kinesis batch is read with some useful info
#kinesis.log-batches=true

# enable query checkpointing via Dynamo DB (configure other
# properties below if needed)
kinesis.checkpoint-enabled=false

# properties related to checkpointing with Dynamo DB
#kinesis.dynamo-read-capacity=50
#kinesis.dynamo-write-capacity=10
#kinesis.checkpoint-interval-ms=60000ms
#kinesis.checkpoint-logical-name=process1
#kinesis.iterator-number=0

38 changes: 38 additions & 0 deletions presto-kinesis/etc/kinesis/testtable.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"tableName": "test_table",
"schemaName": "prod",
"streamName": "test_kinesis_stream",
"message": {
"dataFormat": "json",
"fields": [
{
"name": "client_id",
"type": "BIGINT",
"mapping": "client_id",
"comment": "The client ID field"
},
{
"name": "acct_balance",
"type": "DOUBLE",
"mapping": "acct_balance",
"comment": "Current account balance"
},
{
"name": "service_type",
"mapping": "service_type",
"type": "VARCHAR(20)"
},
{
"name": "signup_date",
"mapping": "signup_date",
"type": "DATE",
"dataFormat": "iso8601"
}
]
},
"comment" : "This test adds some extra fields to make sure they are ignored and don't cause issues.",
"client_metadata" : {
"name" : "Sample Query",
"query" : "select client_id, service_type, signup_date, _shard_id, _message_length from prod.test_table"
}
}
228 changes: 228 additions & 0 deletions presto-kinesis/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>io.prestosql</groupId>
<artifactId>presto-root</artifactId>
<version>320-SNAPSHOT</version>
</parent>

<artifactId>presto-kinesis</artifactId>
<description>Presto - Kinesis Connector</description>
<packaging>presto-plugin</packaging>

<properties>
<air.main.basedir>${project.parent.basedir}</air.main.basedir>
<!--TODO: move to latest aws version-->
<dep.aws-sdk.version>1.11.12</dep.aws-sdk.version>

</properties>

<dependencies>
<dependency>
<groupId>io.airlift</groupId>
<artifactId>bootstrap</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>json</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>log</artifactId>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>configuration</artifactId>
</dependency>

<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
</dependency>

<dependency>
<groupId>com.google.inject</groupId>
<artifactId>guice</artifactId>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>

<dependency>
<groupId>javax.inject</groupId>
<artifactId>javax.inject</artifactId>
</dependency>

<dependency>
<groupId>javax.annotation</groupId>
<artifactId>javax.annotation-api</artifactId>
</dependency>

<dependency>
<groupId>javax.validation</groupId>
<artifactId>validation-api</artifactId>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-kinesis</artifactId>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>amazon-kinesis-client</artifactId>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-dynamodb</artifactId>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-core</artifactId>
<exclusions>
<exclusion>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>aws-java-sdk-s3</artifactId>
<version>${dep.aws-sdk.version}</version>
<exclusions>
<exclusion>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
</exclusion>
<exclusion>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
</exclusion>
</exclusions>
</dependency>

<!-- Presto SPI -->
<dependency>
<groupId>org.openjdk.jol</groupId>
<artifactId>jol-core</artifactId>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>io.prestosql</groupId>
<artifactId>presto-spi</artifactId>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>slice</artifactId>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>units</artifactId>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
<scope>provided</scope>
</dependency>

<dependency>
<groupId>com.google.code.findbugs</groupId>
<artifactId>jsr305</artifactId>
</dependency>

<!-- for testing -->
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.prestosql</groupId>
<artifactId>presto-tests</artifactId>
<scope>test</scope>
</dependency>

<dependency>
<groupId>io.prestosql</groupId>
<artifactId>presto-record-decoder</artifactId>
</dependency>

<dependency>
<groupId>io.prestosql</groupId>
<artifactId>presto-main</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes>
<!--Run this after uncommenting properties below-->
<exclude>**/TestMinimalFunctionality.java</exclude>
<exclude>**/TestS3TableConfigClient.java</exclude>
</excludes>
<systemPropertyVariables>
<kinesis.awsAccessKey>ACCESS-KEY</kinesis.awsAccessKey>
<kinesis.awsSecretKey>SECRET-KEY</kinesis.awsSecretKey>
<kinesis.test-table-description-location>s3://S3-LOC</kinesis.test-table-description-location>
</systemPropertyVariables>
</configuration>
</plugin>
</plugins>
</build>

<profiles>
<profile>
<id>test-kinesis</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<excludes combine.self="override" />
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>
Loading

0 comments on commit 1bb6309

Please sign in to comment.