AWS has killed the S3 Select service so this project will not receive any future updates. Feel free to fork it.
Kysely dialects, plugins and other goodies for Amazon S3 Select.
Inspired by Thomas Aribart's great post.
npm i kysely-s3-select
npm i kysely-s3-select kysely @aws-sdk/client-s3
yarn add kysely-s3-select kysely @aws-sdk/client-s3
pnpm add kysely-s3-select kysely @aws-sdk/client-s3
This package was not tested in Deno, aws-sdk-v3 might not be supported.
This package uses/extends some Kysely types and classes, which are imported using its NPM package name -- not a relative file path or CDN url.
To fix that, add an import_map.json
file.
{
"imports": {
"kysely": "https://cdn.jsdelivr.net/npm/[email protected]/dist/esm/index.js"
}
}
import { S3Client } from '@aws-sdk/client-s3'
import { Kysely } from 'kysely'
import { S3SelectDialect } from 'kysely-s3-select'
interface ConditionsCSV {
S3Object: Condition
}
interface Condition {
START: string
STOP: string
PATIENT: string
CODE: string
DESCRIPTION: string
}
const conditions = new Kysely<ConditionsCSV>({
dialect: new S3SelectDialect({
bucket: 'synthea-open-data',
client: new S3Client({
region: 'us-east-1', // optional
}),
contentType: 'csv', // one of 'csv' | 'json' | 'parquet'
// csvOptions: { // optional
// allowQuotedRecordDelimiter: false, // optional
// comments?: '#', // optional
// fieldDelimiter?: ',', // optional
// fileHeaderInfo?: 'use', // optional
// quoteCharacter?: '"', // optional
// quoteEscapeCharacter?: '"', // optional
// recordDelimiter?: '\n', // optional
// },
key: 'coherent/unzipped/csv/conditions.csv',
})
})
const results = await conditions
.selectFrom('S3Object')
.where('START', '>=', '2000')
.where('STOP', '!=', '')
.select(['PATIENT as patient', 'DESCRIPTION as description'])
.limit(50)
.execute()
interface PatientBundleJSON {
S3Object: Bundle
}
interface Bundle {
resourceType: 'Bundle'
type: 'transaction'
entry: Entry[]
}
interface Entry {
fullUrl: string
resource: Patient
request: object
}
interface Patient {
resourceType: 'Patient'
id: string
meta: object
text: object
extension: object[]
identifier: object[]
name: {
use: 'official'
family: string
given: string[]
prefix: string[]
}[]
telecom: object[]
gender: 'male' | 'female'
birthDate: string
deceasedDateTime: string
address: object[]
maritalStatus: object
multipleBirthBoolean: boolean
communication: object[]
}
const patientBundle = new Kysely<PatientBundleJSON>({
dialect: new S3SelectDialect({
bucket: 'synthea-open-data',
client: new S3Client({
region: 'us-east-1', // optional
}),
contentType: 'json', // one of 'csv' | 'json' | 'parquet'
// jsonOptions: { // optional
// type: 'document', // optional, one of 'document' | 'lines'
// },
key: 'coherent/unzipped/fhir/Abe604_Frami345_b8dd1798-beef-094d-1be4-f90ee0e6b7d5.json',
})
})
const patient = await patientBundle
.selectFrom(
sql<Partial<Entry['resource']>>`S3Object[*].${sql.ref('entry')}[*].${sql.ref('resource')}`.as('resource'),
)
.where('resource.resourceType', '=', 'Patient')
.select(['resource.id as id', 'resource.name as name'])
.limit(1)
.$castTo<Pick<Patient, 'id' | 'name'>>()
.executeTakeFirstOrThrow()
MIT License, see LICENSE