Skip to content

Commit

Permalink
Merge pull request #171 from umccr/configurable_s3_endpoint
Browse files Browse the repository at this point in the history
Custom S3 endpoint support (i.e: Minio)
  • Loading branch information
brainstorm authored May 4, 2023
2 parents d8bb729 + 25f92fb commit ca3c2e6
Show file tree
Hide file tree
Showing 8 changed files with 523 additions and 418 deletions.
813 changes: 436 additions & 377 deletions Cargo.lock

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
A **server** implementation of the [htsget protocol][htsget-protocol] for bioinformatics in Rust. It is:
* **Fully-featured**: supports BAM and CRAM for reads, and VCF and BCF for variants, as well as other aspects of the protocol such as TLS, and CORS.
* **Serverless**: supports local server instances using [Actix Web][actix-web], and serverless instances using [AWS Lambda Rust Runtime][aws-lambda-rust-runtime].
* **Storage interchangeable**: supports local filesystem storage, and storage on AWS S3.
* **Thoroughly tested and benchmarked**: tested using a purpose-built [test suite][htsget-test], and benchmarked using [criterion-rs].
* **Storage interchangeable**: supports local filesystem storage as well as objects via [Minio][minio] and AWS S3.
* **Thoroughly tested and benchmarked**: tested using a purpose-built [test suite][htsget-test] and benchmarked using [criterion-rs].

To get started, see [Usage].

Expand Down Expand Up @@ -138,3 +138,4 @@ This project is licensed under the [MIT license][license].
[htsget-lambda]: htsget-lambda
[license]: LICENSE
[aws-lambda-rust-runtime]: https://github.com/awslabs/aws-lambda-rust-runtime
[minio]: https://min.io/
36 changes: 30 additions & 6 deletions htsget-config/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -359,13 +359,36 @@ See [here][rust-log] for more information on setting this variable.
[tracing]: https://github.com/tokio-rs/tracing
[rust-log]: https://rust-lang-nursery.github.io/rust-cookbook/development_tools/debugging/config_log.html

#### AWS config
### Minio

Config for AWS is read entirely from environment variables. A default configuration is loaded from environment variables using the [aws-config] crate.
Check out the [AWS documentation][aws-sdk] for the rust SDK for more information.
Operating a local object storage like [Minio][minio] can be easily achieved by leveraging the `endpoint` directive as shown below:

[aws-config]: https://docs.rs/aws-config/latest/aws_config/
[aws-sdk]: https://docs.aws.amazon.com/sdk-for-rust/latest/dg/welcome.html
```toml
[[resolvers]]
regex = ".*"
substitution_string = "$0"

[resolvers.storage]
bucket = 'bucket'
endpoint = "http://127.0.0.1:9000"
```

This will have htsget-rs behaving like the native AWS CLI, i.e:

```
mkdir /tmp/test
minio server /tmp/test
export AWS_ACCESS_KEY_ID=minioadmin
export AWS_SECRET_ACCESS_KEY=minioadmin
aws s3 mb --endpoint-url=http://localhost:9000 s3://bucket/
aws s3 cp --recursive --endpoint-url=http://localhost:9000 htsget-rs/data/bam s3://bucket/
cargo run -p htsget-actix -- --config ~/.htsget-rs/config.toml
# On another session/terminal
curl http://localhost:8080/reads/htsnexus_test_NA12878
```

Please don't run the example above as-is in production systems ;)

### As a library

Expand All @@ -384,4 +407,5 @@ This crate has the following features:

This project is licensed under the [MIT license][license].

[license]: LICENSE
[license]: LICENSE
[minio]: https://min.io/
2 changes: 1 addition & 1 deletion htsget-config/src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ mod tests {
)],
|config| {
let storage = Storage::S3 {
s3_storage: S3Storage::new("bucket".to_string()),
s3_storage: S3Storage::new("bucket".to_string(), None),
};
let allow_guard = AllowGuard::new(
ReferenceNames::List(HashSet::from_iter(vec!["chr1".to_string()])),
Expand Down
15 changes: 11 additions & 4 deletions htsget-config/src/storage/s3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,34 @@ use tracing::instrument;
#[serde(default)]
pub struct S3Storage {
bucket: String,
endpoint: Option<String>,
}

impl S3Storage {
/// Create a new S3 storage.
pub fn new(bucket: String) -> Self {
Self { bucket }
pub fn new(bucket: String, endpoint: Option<String>) -> Self {
Self { bucket, endpoint }
}

/// Get the bucket.
pub fn bucket(&self) -> &str {
&self.bucket
}

/// Get the endpoint
pub fn endpoint(self) -> Option<String> {
self.endpoint
}
}

impl<'a> From<ResolverMatcher<'a>> for Option<S3Storage> {
#[instrument(level = "trace", ret)]
fn from(resolver_and_query: ResolverMatcher) -> Self {
let (regex, regex_match) = resolver_and_query.into_inner();
let bucket = regex.captures(regex_match)?.get(1)?.as_str();
let endpoint = None;

Some(S3Storage::new(bucket.to_string()))
Some(S3Storage::new(bucket.to_string(), endpoint))
}
}

Expand Down Expand Up @@ -65,7 +72,7 @@ mod tests {
let regex = Regex::new("^(bucket)/(?P<key>.*)$").unwrap();

let result: Option<S3Storage> = ResolverMatcher(&regex, "bucket/id").into();
let expected = S3Storage::new("bucket".to_string());
let expected = S3Storage::new("bucket".to_string(), None); // TODO: Fix custom endpoint func

assert_eq!(result.unwrap(), expected);
}
Expand Down
14 changes: 7 additions & 7 deletions htsget-search/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ noodles = { version = "0.32", features = ["async", "core", "bgzf", "bam", "bcf",

# Amazon S3
bytes = { version = "1.4", optional = true }
aws-sdk-s3 = { version = "0.24", optional = true, features = ["test-util"] }
aws-config = { version = "0.54", optional = true }
aws-sdk-s3 = { version = "0.27", optional = true, features = ["test-util"] }
aws-config = { version = "0.55", optional = true }
aws-credential-types = { version = "0.55", features = ["test-util"] }

# Error control, tracing, config
thiserror = "1.0"
Expand All @@ -52,11 +53,10 @@ serde = "1.0"
tempfile = "3.3"
data-url = "0.2"

# Aws S3 storage.
aws-credential-types = { version = "0.54", features = ["test-util"] }
s3s = { version = "0.3" }
s3s-fs = { version = "0.3" }
s3s-aws = { version = "0.3" }
# S3 storage testing
s3s = { version = "0.5" }
s3s-fs = { version = "0.5" }
s3s-aws = { version = "0.5" }

# Axum server
reqwest = { version = "0.11", default-features = false, features = ["rustls-tls"] }
Expand Down
6 changes: 5 additions & 1 deletion htsget-search/src/htsget/from_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,11 @@ impl<S> ResolveResponse for HtsGetFromStorage<S> {
#[cfg(feature = "s3-storage")]
async fn from_s3_storage(s3_storage: &S3Storage, query: &Query) -> Result<Response> {
let searcher = HtsGetFromStorage::new(
AwsS3Storage::new_with_default_config(s3_storage.bucket().to_string()).await,
AwsS3Storage::new_with_default_config(
s3_storage.bucket().to_string(),
s3_storage.clone().endpoint(),
)
.await,
);
searcher.search(query.clone()).await
}
Expand Down
50 changes: 30 additions & 20 deletions htsget-search/src/storage/aws.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,17 @@ use std::io;
use std::io::ErrorKind::Other;
use std::time::Duration;

use async_trait::async_trait;
use aws_sdk_s3::client::fluent_builders;
use aws_sdk_s3::error::{GetObjectError, GetObjectErrorKind, HeadObjectErrorKind};
use aws_sdk_s3::model::StorageClass;
use aws_sdk_s3::output::HeadObjectOutput;
use aws_sdk_s3::presigning::config::PresigningConfig;
use aws_sdk_s3::types::{ByteStream, SdkError};
use aws_sdk_s3::error::SdkError;
use aws_sdk_s3::operation::get_object::builders::GetObjectFluentBuilder;
use aws_sdk_s3::operation::get_object::GetObjectError;
use aws_sdk_s3::operation::head_object::{HeadObjectError, HeadObjectOutput};
use aws_sdk_s3::presigning::PresigningConfig;
use aws_sdk_s3::primitives::ByteStream;
use aws_sdk_s3::types::StorageClass;
use aws_sdk_s3::Client;

use async_trait::async_trait;
use bytes::Bytes;
use fluent_builders::GetObject;
use tokio_util::io::StreamReader;
use tracing::debug;
use tracing::instrument;
Expand Down Expand Up @@ -52,8 +53,15 @@ impl AwsS3Storage {
AwsS3Storage { client, bucket }
}

pub async fn new_with_default_config(bucket: String) -> Self {
AwsS3Storage::new(Client::new(&aws_config::load_from_env().await), bucket)
pub async fn new_with_default_config(bucket: String, endpoint: Option<String>) -> Self {
let sdk_config = aws_config::load_from_env().await;
let mut s3_config_builder = aws_sdk_s3::config::Builder::from(&sdk_config);
s3_config_builder.set_endpoint_url(endpoint); // For local S3 storage, i.e: Minio

let client = s3_config_builder.build();
let s3_client = aws_sdk_s3::Client::from_conf(client);

AwsS3Storage::new(s3_client, bucket)
}

/// Return an S3 pre-signed URL of the key. This function does not check that the key exists,
Expand Down Expand Up @@ -83,7 +91,6 @@ impl AwsS3Storage {
}

async fn s3_head<K: AsRef<str> + Send>(&self, key: K) -> Result<HeadObjectOutput> {
println!("{:#?}", self.client.list_buckets().send().await.unwrap());
self
.client
.head_object()
Expand All @@ -93,7 +100,7 @@ impl AwsS3Storage {
.await
.map_err(|err| {
let err = err.into_service_error();
if let HeadObjectErrorKind::NotFound(_) = err.kind {
if let HeadObjectError::NotFound(_) = err {
KeyNotFound(key.as_ref().to_string())
} else {
AwsS3Error(err.to_string(), key.as_ref().to_string())
Expand Down Expand Up @@ -134,7 +141,7 @@ impl AwsS3Storage {
Delayed(class)
}

fn apply_range(builder: GetObject, range: BytesPosition) -> GetObject {
fn apply_range(builder: GetObjectFluentBuilder, range: BytesPosition) -> GetObjectFluentBuilder {
let range: String = String::from(&BytesRange::from(&range));
if range.is_empty() {
builder
Expand Down Expand Up @@ -185,7 +192,7 @@ impl AwsS3Storage {
K: AsRef<str> + Send,
{
let error = error.into_service_error();
if let GetObjectErrorKind::NoSuchKey(_) = error.kind {
if let GetObjectError::NoSuchKey(_) = error {
KeyNotFound(key.as_ref().to_string())
} else {
AwsS3Error(error.to_string(), key.as_ref().to_string())
Expand Down Expand Up @@ -252,8 +259,11 @@ pub(crate) mod tests {

use aws_config::SdkConfig;
use aws_credential_types::provider::SharedCredentialsProvider;
use aws_sdk_s3::{Client, Credentials, Region};
use s3s::service::S3Service;
use aws_sdk_s3::config::{Credentials, Region};
use aws_sdk_s3::Client;

use s3s::auth::SimpleAuth;
use s3s::service::S3ServiceBuilder;
use s3s_aws;

use crate::storage::aws::AwsS3Storage;
Expand All @@ -275,13 +285,13 @@ pub(crate) mod tests {
let conn = {
let fs = s3s_fs::FileSystem::new(server_base_path).unwrap();

let auth = s3s::SimpleAuth::from_single(cred.access_key_id(), cred.secret_access_key());
let auth = SimpleAuth::from_single(cred.access_key_id(), cred.secret_access_key());

let mut service = S3Service::new(Box::new(fs));
service.set_auth(Box::new(auth));
let mut service = S3ServiceBuilder::new(fs);
service.set_auth(auth);
service.set_base_domain(DOMAIN_NAME);

s3s_aws::Connector::from(service.into_shared())
s3s_aws::Connector::from(service.build().into_shared())
};

let sdk_config = SdkConfig::builder()
Expand Down

0 comments on commit ca3c2e6

Please sign in to comment.