From be2816e5b3b4408a9f2bdcb1bd44ad923c85f896 Mon Sep 17 00:00:00 2001 From: Toby Crawley Date: Tue, 23 May 2023 08:29:07 -0400 Subject: [PATCH] Cache repo listing responses We are spending too much on calls to s3, so this introduces an on-disk caching layer for repo listings. Responses will be cached for up to 12 hours. --- resources/config.edn | 1 + src/clojars/routes/repo_listing.clj | 6 +-- src/clojars/system.clj | 5 +- src/clojars/web.clj | 4 +- src/clojars/web/repo_listing.clj | 80 ++++++++++++++++++++++++----- 5 files changed, 76 insertions(+), 20 deletions(-) diff --git a/resources/config.edn b/resources/config.edn index 7c3502ee..58f3a5f2 100644 --- a/resources/config.edn +++ b/resources/config.edn @@ -1,6 +1,7 @@ {:base-url "https://clojars.org" :bcrypt-work-factor 12 :bind "127.0.0.1" + :cache-path "data/cache" :cdn-token #profile {:production #ssm-parameter "/clojars/production/cdn_token" :default nil} :cdn-url "https://repo.clojars.org" diff --git a/src/clojars/routes/repo_listing.clj b/src/clojars/routes/repo_listing.clj index 2d96cce9..f06c02c0 100644 --- a/src/clojars/routes/repo_listing.clj +++ b/src/clojars/routes/repo_listing.clj @@ -4,11 +4,11 @@ [compojure.core :as compojure :refer [GET HEAD]])) (defn routes - [repo-bucket] + [repo-lister] (compojure/routes (GET ["/list-repo"] {{:keys [path]} :params} - (repo-listing/response repo-bucket path)) + (repo-listing/response repo-lister path)) (HEAD ["/list-repo"] {{:keys [path]} :params} - (repo-listing/response repo-bucket path)))) + (repo-listing/response repo-lister path)))) diff --git a/src/clojars/system.clj b/src/clojars/system.clj index 9e54f0d6..5ccfa24a 100644 --- a/src/clojars/system.clj +++ b/src/clojars/system.clj @@ -17,6 +17,7 @@ [clojars.stats :refer [artifact-stats]] [clojars.storage :as storage] [clojars.web :as web] + [clojars.web.repo-listing :as repo-listing] [com.stuartsierra.component :as component] [duct.component.endpoint :refer [endpoint-component]] [duct.component.handler :refer [handler-component]] @@ -87,11 +88,13 @@ :mailer (simple-mailer (:mail config)) :notifications (notifications/notification-component) :repo-bucket (s3/s3-client (get-in config [:s3 :repo-bucket])) + :repo-lister (repo-listing/repo-lister (:cache-path config)) :storage (storage-component (:repo config) (:cdn-token config) (:cdn-url config)))) (component/system-using {:app [:clojars-app] :clojars-app [:db :github :gitlab :error-reporter :http-client - :mailer :repo-bucket :stats :search :storage] + :mailer :repo-lister :stats :search :storage] :http [:app] :notifications [:db :mailer] + :repo-lister [:repo-bucket] :storage [:error-reporter :repo-bucket]})))) diff --git a/src/clojars/web.clj b/src/clojars/web.clj index db5bade9..fa5e64bd 100644 --- a/src/clojars/web.clj +++ b/src/clojars/web.clj @@ -48,7 +48,7 @@ :status 400}))))) (defn- main-routes - [{:as _system :keys [db mailer repo-bucket search stats]}] + [{:as _system :keys [db mailer repo-lister search stats]}] (let [db (:spec db)] (routes (GET "/" _ @@ -77,7 +77,7 @@ #(html-doc "DMCA" {:account %} (raw (slurp (io/resource "dmca.html")))))) session/routes - (repo-listing/routes repo-bucket) + (repo-listing/routes repo-lister) (group/routes db) (artifact/routes db stats) ;; user routes must go after artifact routes diff --git a/src/clojars/web/repo_listing.clj b/src/clojars/web/repo_listing.clj index 43162820..25b4af66 100644 --- a/src/clojars/web/repo_listing.clj +++ b/src/clojars/web/repo_listing.clj @@ -3,9 +3,18 @@ [clojars.s3 :as s3] [clojars.web.common :as common] [clojars.web.safe-hiccup :as safe-hiccup] + [clojure.edn :as edn] + [clojure.java.io :as io] [clojure.string :as str] [hiccup.element :as el] - [ring.util.response :as ring.response])) + [ring.util.response :as ring.response]) + (:import + (java.io + File) + (java.util + Date))) + +(set! *warn-on-reflection* true) (defn- sort-entries [entries] @@ -98,10 +107,44 @@ (mapcat entry-line entries)]] [:hr]])) +(def ^:private max-age 43200) ;; 12 hours + +(defn- cache-file + ^File + [cache-path path] + ;; assumes path has gone through normalize-path already + (let [path (if path + ;; strip trailing / + (subs path 0 (dec (count path))) + "root") + f (io/file (format "%s/%s.edn" cache-path path))] + (io/make-parents f) + f)) + +(defn- file-age + [^File f] + (-> (- (.getTime (Date.)) (.lastModified f)) + (/ 1000) + (long))) + +(defn- cached-response + [cache-path requested-path] + (let [cached-file (cache-file cache-path requested-path)] + (when (.exists cached-file) + (let [age (file-age cached-file)] + (when-not (> age max-age) + [(edn/read-string (slurp cached-file)) age]))))) + +(defn- cache-response + [cache-path requested-path response] + (spit (cache-file cache-path requested-path) + (binding [*print-length* nil] + (pr-str response))) + response) + (defn- with-maxage - [r] - ;; Instruct fastly to cache this result for 12 hours - (ring.response/header r "Cache-Control" "s-maxage=43200")) + [r curr-age-in-seconds] + (ring.response/header r "Cache-Control" (format "s-maxage=%s" (- max-age curr-age-in-seconds)))) (def ^:private not-found (-> (safe-hiccup/html5 @@ -110,15 +153,24 @@ [:body [:h1 "404 Not Found"]]) (ring.response/not-found) (ring.response/content-type "text/html;charset=utf-8") - (with-maxage))) + (with-maxage 0))) (defn response - [s3 path] - (let [path (normalize-path path) - entries (sort-entries (s3/list-entries s3 path))] - (if (seq entries) - (-> (index path entries) - (ring.response/response) - (ring.response/content-type "text/html;charset=utf-8") - (with-maxage)) - not-found))) + [{:keys [cache-path repo-bucket]} path] + (let [path (normalize-path path)] + (if-some [[response age] (cached-response cache-path path)] + (with-maxage response age) + (cache-response + cache-path + path + (if-some [entries (seq (sort-entries (s3/list-entries repo-bucket path)))] + (-> (index path entries) + (ring.response/response) + (ring.response/content-type "text/html;charset=utf-8") + (with-maxage 0)) + not-found))))) + +(defn repo-lister + [cache-path] + ;; :repo-bucket gets assoc'ed on to this by component + {:cache-path cache-path})