cloudquery · amanenk · Aug 4, 2022 · Jul 26, 2022 · Jul 26, 2022 · Aug 2, 2022
@@ -764,6 +764,7 @@ type GlueClient interface {
 	GetMLTaskRuns(ctx context.Context, params *glue.GetMLTaskRunsInput, optFns ...func(*glue.Options)) (*glue.GetMLTaskRunsOutput, error)
 	GetDataCatalogEncryptionSettings(ctx context.Context, params *glue.GetDataCatalogEncryptionSettingsInput, optFns ...func(*glue.Options)) (*glue.GetDataCatalogEncryptionSettingsOutput, error)
 	GetDevEndpoints(ctx context.Context, params *glue.GetDevEndpointsInput, optFns ...func(*glue.Options)) (*glue.GetDevEndpointsOutput, error)
+	GetCrawlers(ctx context.Context, params *glue.GetCrawlersInput, optFns ...func(*glue.Options)) (*glue.GetCrawlersOutput, error)
 }
 
 //go:generate mockgen -package=mocks -destination=./mocks/kinesis.go . KinesisClient

@@ -0,0 +1,10 @@
+
+# Table: aws_glue_crawler_targets_catalog_targets
+Specifies an Glue Data Catalog target
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|crawler_cq_id|uuid|Unique CloudQuery ID of aws_glue_crawlers table (FK)|
+|database_name|text|The name of the database to be synchronized|
+|tables|text[]|A list of the tables to be synchronized|
+|connection_name|text|The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORK Connection type|
@@ -0,0 +1,10 @@
+
+# Table: aws_glue_crawler_targets_delta_targets
+Specifies a Delta data store to crawl one or more Delta tables
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|crawler_cq_id|uuid|Unique CloudQuery ID of aws_glue_crawlers table (FK)|
+|connection_name|text|The name of the connection to use to connect to the Delta table target|
+|delta_tables|text[]|A list of the Amazon S3 paths to the Delta tables|
+|write_manifest|boolean|Specifies whether to write the manifest files to the Delta table path|
@@ -0,0 +1,10 @@
+
+# Table: aws_glue_crawler_targets_dynamo_db_targets
+Specifies an Amazon DynamoDB table to crawl
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|crawler_cq_id|uuid|Unique CloudQuery ID of aws_glue_crawlers table (FK)|
+|path|text|The name of the DynamoDB table to crawl|
+|scan_all|boolean|Indicates whether to scan all the records, or to sample rows from the table Scanning all the records can take a long time when the table is not a high throughput table|
+|scan_rate|float|The percentage of the configured read capacity units to use by the Glue crawler Read capacity units is a term defined by DynamoDB, and is a numeric value that acts as rate limiter for the number of reads that can be performed on that table per second|
@@ -0,0 +1,10 @@
+
+# Table: aws_glue_crawler_targets_jdbc_targets
+Specifies a JDBC data store to crawl
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|crawler_cq_id|uuid|Unique CloudQuery ID of aws_glue_crawlers table (FK)|
+|connection_name|text|The name of the connection to use to connect to the JDBC target|
+|exclusions|text[]|A list of glob patterns used to exclude from the crawl|
+|path|text|The path of the JDBC target|
@@ -0,0 +1,10 @@
+
+# Table: aws_glue_crawler_targets_mongo_db_targets
+Specifies an Amazon DocumentDB or MongoDB data store to crawl
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|crawler_cq_id|uuid|Unique CloudQuery ID of aws_glue_crawlers table (FK)|
+|connection_name|text|The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target|
+|path|text|The path of the Amazon DocumentDB or MongoDB target (database/collection)|
+|scan_all|boolean|Indicates whether to scan all the records, or to sample rows from the table Scanning all the records can take a long time when the table is not a high throughput table|
@@ -0,0 +1,13 @@
+
+# Table: aws_glue_crawler_targets_s3_targets
+Specifies a data store in Amazon Simple Storage Service (Amazon S3)
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|crawler_cq_id|uuid|Unique CloudQuery ID of aws_glue_crawlers table (FK)|
+|connection_name|text|The name of a connection which allows a job or crawler to access data in Amazon S3 within an Amazon Virtual Private Cloud environment (Amazon VPC)|
+|dlq_event_queue_arn|text|A valid Amazon dead-letter SQS ARN|
+|event_queue_arn|text|A valid Amazon SQS ARN|
+|exclusions|text[]|A list of glob patterns used to exclude from the crawl|
+|path|text|The path to the Amazon S3 target|
+|sample_size|bigint|Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset|
@@ -0,0 +1,37 @@
+
+# Table: aws_glue_crawlers
+Specifies a crawler program that examines a data source and uses classifiers to try to determine its schema
+## Columns
+| Name        | Type           | Description  |
+| ------------- | ------------- | -----  |
+|arn|text|ARN of the resource.|
+|account_id|text|The AWS Account ID of the resource.|
+|region|text|The AWS Region of the resource.|
+|tags|jsonb||
+|classifiers|text[]|A list of UTF-8 strings that specify the custom classifiers that are associated with the crawler|
+|configuration|text|Crawler configuration information|
+|crawl_elapsed_time|bigint|If the crawler is running, contains the total time elapsed since the last crawl began|
+|crawler_security_configuration|text|The name of the SecurityConfiguration structure to be used by this crawler|
+|creation_time|timestamp without time zone|The time that the crawler was created|
+|database_name|text|The name of the database in which the crawler's output is stored|
+|description|text|A description of the crawler|
+|lake_formation_configuration_account_id|text|Required for cross account crawls|
+|lake_formation_configuration_use_lake_formation_credentials|boolean|Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials|
+|last_crawl_error_message|text|If an error occurred, the error information about the last crawl|
+|last_crawl_log_group|text|The log group for the last crawl|
+|last_crawl_log_stream|text|The log stream for the last crawl|
+|last_crawl_message_prefix|text|The prefix for a message about this crawl|
+|last_crawl_start_time|timestamp without time zone|The time at which the crawl started|
+|last_crawl_status|text|Status of the last crawl|
+|last_updated|timestamp without time zone|The time that the crawler was last updated|
+|lineage_configuration_crawler_lineage_settings|text|Specifies whether data lineage is enabled for the crawler|
+|name|text|The name of the crawler|
+|recrawl_behavior|text|Specifies whether to crawl the entire dataset again or to crawl only folders that were added since the last crawler run|
+|role|text|The Amazon Resource Name (ARN) of an IAM role that's used to access customer resources, such as Amazon Simple Storage Service (Amazon S3) data|
+|schedule_expression|text|A cron expression used to specify the schedule (see Time-Based Schedules for Jobs and Crawlers (https://docsawsamazoncom/glue/latest/dg/monitor-data-warehouse-schedulehtml) For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *)|
+|schedule_state|text|The state of the schedule|
+|schema_change_policy_delete_behavior|text|The deletion behavior when the crawler finds a deleted object|
+|schema_change_policy_update_behavior|text|The update behavior when the crawler finds a changed schema|
+|state|text|Indicates whether the crawler is running, or whether a run is pending|
+|table_prefix|text|The prefix added to the names of tables that are created|
+|version|bigint|The version of the crawler|
@@ -168,6 +168,7 @@ func Provider() *provider.Provider {
 			"emr.block_public_access_configs":         emr.EmrBlockPublicAccessConfigs(),
 			"emr.clusters":                            emr.EmrClusters(),
 			"fsx.backups":                             fsx.FsxBackups(),
+			"glue.crawlers":                           glue.Crawlers(),
 			"glue.databases":                          glue.Databases(),
 			"glue.datacatalog_encryption_settings":    glue.DatacatalogEncryptionSettings(),
 			"glue.dev_endpoints":                      glue.DevEndpoints(),