Skip to content

Commit

Permalink
Upgrade s3 client to ListObjectsV2
Browse files Browse the repository at this point in the history
This upgrades the s3 client from ListObjects to the newer ListObjectsV2,
which will allow us to just list prefixes in the future.

This also adds tests for our s3 implementation using minio in docker to
ensure that the s3-specific implementation is correct.
  • Loading branch information
tobias committed May 7, 2023
1 parent 5622cb1 commit bf4425c
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 20 deletions.
9 changes: 9 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,12 @@ services:
- POSTGRES_DB=clojars
volumes:
- ./data/test-postgres:/var/lib/postgresql/data
minio:
image: minio/minio:RELEASE.2023-04-20T17-56-55Z
command: server /data --console-address ":9090"
ports:
- "9000:9000"
- "9090:9090"
environment:
MINIO_ROOT_USER: fake-access-key
MINIO_ROOT_PASSWORD: fake-secret-key
54 changes: 34 additions & 20 deletions src/clojars/s3.clj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
(:require
[clojure.java.io :as io]
[clojure.string :as str]
[cognitect.aws.client.api :as aws])
[cognitect.aws.client.api :as aws]
[cognitect.aws.credentials :as credentials])
(:import
(java.io
ByteArrayInputStream)
Expand All @@ -23,26 +24,25 @@
v))

(defn- list-objects-chunk
[client bucket-name prefix marker]
[client bucket-name prefix continuation-token]
(let [request (cond-> {:Bucket bucket-name}
prefix (assoc :Prefix prefix)
marker (assoc :Marker marker))]
continuation-token (assoc :ContinuationToken continuation-token))]
(throw-on-error
(aws/invoke client
{:op :ListObjects
{:op :ListObjectsV2
:request request}))))

(defn- list-objects-seq
"Generates a lazy seq of objects, chunked by the API's paging."
[client bucket-name prefix marker]
(let [{:keys [Contents IsTruncated]}
(list-objects-chunk client bucket-name prefix marker)]
"Generates a lazy seq of list-objects results, chunked by the API's paging."
[client bucket-name prefix continuation-token]
(let [{:as result :keys [IsTruncated NextContinuationToken]}
(list-objects-chunk client bucket-name prefix continuation-token)]
(if IsTruncated
(lazy-seq
(concat Contents
(list-objects-seq client bucket-name prefix
(-> Contents last :Key))))
Contents)))
(cons result
(list-objects-seq client bucket-name prefix NextContinuationToken)))
[result])))

(defn- strip-etag
"ETags from the s3 api are wrapped in \"s"
Expand Down Expand Up @@ -83,7 +83,11 @@
:Body))

(-list-objects [_ prefix]
(map strip-etag (list-objects-seq s3 bucket-name prefix nil)))
(sequence
(comp
(mapcat :Contents)
(map strip-etag))
(list-objects-seq s3 bucket-name prefix nil)))

(-put-object [_ key stream opts]
(->> {:op :PutObject
Expand All @@ -95,13 +99,23 @@
(throw-on-error))))

(defn s3-client
[bucket]
{:pre [(not (str/blank? bucket))]}
;; Credentials are derived from the instance's role and region comes from the
;; aws.region property, so we don't have to set either here.
(->S3Client (doto (aws/client {:api :s3})
(aws/validate-requests true))
bucket))
;; Credentials are derived from the instance's role when running in
;; production and region comes from the aws.region property, so we don't have
;; to set either here.
([bucket]
(s3-client bucket nil))
;; This arity is only used directly in testing, where we use minio via docker, and we have
;; to override the endpoint and provide credentials
([bucket {:keys [credentials endpoint region]}]
{:pre [(not (str/blank? bucket))]}
(->S3Client
(doto (aws/client
(cond-> {:api :s3}
credentials (assoc :credentials-provider (credentials/basic-credentials-provider credentials))
endpoint (assoc :endpoint-override endpoint)
region (assoc :region region)))
(aws/validate-requests true))
bucket)))

(defrecord MockS3Client [state]
S3Bucket
Expand Down
104 changes: 104 additions & 0 deletions test/clojars/unit/s3_test.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
(ns clojars.unit.s3-test
(:require
[clojars.s3 :as s3]
[clojure.java.io :as io]
[clojure.test :refer [deftest is use-fixtures]]
[cognitect.aws.client.api :as aws]
[matcher-combinators.matchers :as m]
[matcher-combinators.test]))

;; Note: these tests exercise the s3 client end-to-end, and require minio to
;; running. See docker-compose.yml

(defn- throw-on-error
[v]
(if (some? (:cognitect.anomalies/category v))
(throw (ex-info "S3 request failed" v))
v))

(def ^:private s3-client
(memoize
(fn []
(let [bucket "testing-bucket"
client (s3/s3-client bucket
{:credentials {:access-key-id "fake-access-key"
:secret-access-key "fake-secret-key"}
:endpoint {:protocol "http"
:hostname "localhost"
:port 9000}
:region "us-east-1"})]
(aws/invoke (:s3 client) {:op :CreateBucket
:request {:Bucket bucket}})
client))))

(def ^:private page-size 1000)

(defn- clear-bucket
[]
(let [client (s3-client)]
(doseq [objects (partition-all page-size (s3/list-objects client))]
(throw-on-error
(aws/invoke (:s3 client)
{:op :DeleteObjects
:request {:Bucket (:bucket-name client)
:Delete {:Objects (map #(select-keys % [:Key]) objects)}}})))))

(use-fixtures :each (fn [f]
(try
(f)
(finally
(clear-bucket)))))

(deftest put-object+get-object-details-work
(let [s3 (s3-client)]
(s3/put-object s3 "a-key" (io/input-stream (io/resource "fake.jar")))
(is (match?
{:ETag "d7c2e3e6ed5ab399efcb2fb7d8faa87c"
:ContentLength 8
:ContentType "application/octet-stream"
:AcceptRanges "bytes"}
(s3/get-object-details s3 "a-key")))))

(deftest list-objects-works
(let [s3 (s3-client)]
(s3/put-object s3 "a-key" (io/input-stream (io/resource "fake.jar")))
(s3/put-object s3 "nested/a-key" (io/input-stream (io/resource "fake.jar")))
(is (match?
[{:Key "a-key"}
{:Key "nested/a-key"}]
(s3/list-objects s3)))))

(deftest list-objects-works-with-prefix
(let [s3 (s3-client)]
(s3/put-object s3 "a-key" (io/input-stream (io/resource "fake.jar")))
(s3/put-object s3 "nested/a-key" (io/input-stream (io/resource "fake.jar")))
(is (match?
[{:Key "nested/a-key"}]
(s3/list-objects s3 "nested/")))))

(deftest list-objects-works-with-more-than-a-page-of-objects
(let [s3 (s3-client)
key-range (range (inc page-size))]
(doseq [n key-range]
(s3/put-object s3 (format "a-key/%s" n) (io/input-stream (io/resource "fake.jar"))))
(is (match?
(m/in-any-order
(for [n key-range]
{:Key (format "a-key/%s" n)}))
(s3/list-objects s3)))))

(deftest list-object-keys-works
(let [s3 (s3-client)]
(s3/put-object s3 "a-key" (io/input-stream (io/resource "fake.jar")))
(s3/put-object s3 "nested/a-key" (io/input-stream (io/resource "fake.jar")))
(is (match?
["a-key" "nested/a-key"]
(s3/list-object-keys s3)))))

(deftest list-object-keys-works-with-prefix
(let [s3 (s3-client)]
(s3/put-object s3 "a-key" (io/input-stream (io/resource "fake.jar")))
(s3/put-object s3 "nested/a-key" (io/input-stream (io/resource "fake.jar")))
(is (match?
["nested/a-key"]
(s3/list-object-keys s3 "nested/")))))

0 comments on commit bf4425c

Please sign in to comment.