Skip to content

Commit

Permalink
fix(repo-server): excess git requests, add shared cache lock on revis…
Browse files Browse the repository at this point in the history
…ions (Issue argoproj#14725) (argoproj#17109)

* fix(repo-server): excess git requests, cache lock on revisions

Signed-off-by: nromriell <[email protected]>

* fix: pr feedback, simplify, add configurable variable

Signed-off-by: nromriell <[email protected]>

* fix: codegen, lint

Signed-off-by: nromriell <[email protected]>

* fix: test print, no opts set, var type nit

Signed-off-by: nromriell <[email protected]>

* chore: add additional logging for unexpected cache error

Signed-off-by: nromriell <[email protected]>

---------

Signed-off-by: nromriell <[email protected]>
Co-authored-by: Ishita Sequeira <[email protected]>
Signed-off-by: Kevin Lyda <[email protected]>
  • Loading branch information
2 people authored and lyda committed Mar 28, 2024
1 parent d091276 commit 2afbbb5
Show file tree
Hide file tree
Showing 21 changed files with 975 additions and 103 deletions.
1 change: 1 addition & 0 deletions docs/operator-manual/server-commands/argocd-repo-server.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ argocd-repo-server [flags]
--redisdb int Redis database.
--repo-cache-expiration duration Cache expiration for repo state, incl. app lists, app details, manifest generation, revision meta-data (default 24h0m0s)
--revision-cache-expiration duration Cache expiration for cached revision (default 3m0s)
--revision-cache-lock-timeout duration Cache TTL for locks to prevent duplicate requests on revisions, set to 0 to disable (default 10s)
--sentinel stringArray Redis sentinel hostname and port (e.g. argocd-redis-ha-announce-0:6379).
--sentinelmaster string Redis sentinel master group name. (default "master")
--streamed-manifest-max-extracted-size string Maximum size of streamed manifest archives when extracted (default "1G")
Expand Down
1 change: 1 addition & 0 deletions docs/operator-manual/server-commands/argocd-server.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ argocd-server [flags]
--repo-server-timeout-seconds int Repo server RPC call timeout seconds. (default 60)
--request-timeout string The length of time to wait before giving up on a single server request. Non-zero values should contain a corresponding time unit (e.g. 1s, 2m, 3h). A value of zero means don't timeout requests. (default "0")
--revision-cache-expiration duration Cache expiration for cached revision (default 3m0s)
--revision-cache-lock-timeout duration Cache TTL for locks to prevent duplicate requests on revisions, set to 0 to disable (default 10s)
--rootpath string Used if Argo CD is running behind reverse proxy under subpath different from /
--sentinel stringArray Redis sentinel hostname and port (e.g. argocd-redis-ha-announce-0:6379).
--sentinelmaster string Redis sentinel master group name. (default "master")
Expand Down
6 changes: 6 additions & 0 deletions manifests/base/repo-server/argocd-repo-server-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,12 @@ spec:
name: argocd-cmd-params-cm
key: reposerver.disable.helm.manifest.max.extracted.size
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/core-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21484,6 +21484,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/ha/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23083,6 +23083,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/ha/namespace-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2204,6 +2204,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22129,6 +22129,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
6 changes: 6 additions & 0 deletions manifests/namespace-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,12 @@ spec:
key: reposerver.disable.helm.manifest.max.extracted.size
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_REVISION_CACHE_LOCK_TIMEOUT
valueFrom:
configMapKeyRef:
key: reposerver.revision.cache.lock.timeout
name: argocd-cmd-params-cm
optional: true
- name: ARGOCD_GIT_MODULES_ENABLED
valueFrom:
configMapKeyRef:
Expand Down
169 changes: 144 additions & 25 deletions reposerver/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ import (
)

var ErrCacheMiss = cacheutil.ErrCacheMiss
var ErrCacheKeyLocked = cacheutil.ErrCacheKeyLocked

type Cache struct {
cache *cacheutil.Cache
repoCacheExpiration time.Duration
revisionCacheExpiration time.Duration
cache *cacheutil.Cache
repoCacheExpiration time.Duration
revisionCacheExpiration time.Duration
revisionCacheLockTimeout time.Duration
}

// ClusterRuntimeInfo holds cluster runtime information
Expand All @@ -39,16 +41,18 @@ type ClusterRuntimeInfo interface {
GetKubeVersion() string
}

func NewCache(cache *cacheutil.Cache, repoCacheExpiration time.Duration, revisionCacheExpiration time.Duration) *Cache {
return &Cache{cache, repoCacheExpiration, revisionCacheExpiration}
func NewCache(cache *cacheutil.Cache, repoCacheExpiration time.Duration, revisionCacheExpiration time.Duration, revisionCacheLockTimeout time.Duration) *Cache {
return &Cache{cache, repoCacheExpiration, revisionCacheExpiration, revisionCacheLockTimeout}
}

func AddCacheFlagsToCmd(cmd *cobra.Command, opts ...cacheutil.Options) func() (*Cache, error) {
var repoCacheExpiration time.Duration
var revisionCacheExpiration time.Duration
var revisionCacheLockTimeout time.Duration

cmd.Flags().DurationVar(&repoCacheExpiration, "repo-cache-expiration", env.ParseDurationFromEnv("ARGOCD_REPO_CACHE_EXPIRATION", 24*time.Hour, 0, math.MaxInt64), "Cache expiration for repo state, incl. app lists, app details, manifest generation, revision meta-data")
cmd.Flags().DurationVar(&revisionCacheExpiration, "revision-cache-expiration", env.ParseDurationFromEnv("ARGOCD_RECONCILIATION_TIMEOUT", 3*time.Minute, 0, math.MaxInt64), "Cache expiration for cached revision")
cmd.Flags().DurationVar(&revisionCacheLockTimeout, "revision-cache-lock-timeout", env.ParseDurationFromEnv("ARGOCD_REVISION_CACHE_LOCK_TIMEOUT", 10*time.Second, 0, math.MaxInt64), "Cache TTL for locks to prevent duplicate requests on revisions, set to 0 to disable")

repoFactory := cacheutil.AddCacheFlagsToCmd(cmd, opts...)

Expand All @@ -57,7 +61,7 @@ func AddCacheFlagsToCmd(cmd *cobra.Command, opts ...cacheutil.Options) func() (*
if err != nil {
return nil, fmt.Errorf("error adding cache flags to cmd: %w", err)
}
return NewCache(cache, repoCacheExpiration, revisionCacheExpiration), nil
return NewCache(cache, repoCacheExpiration, revisionCacheExpiration, revisionCacheLockTimeout), nil
}
}

Expand Down Expand Up @@ -145,7 +149,12 @@ func (c *Cache) ListApps(repoUrl, revision string) (map[string]string, error) {
}

func (c *Cache) SetApps(repoUrl, revision string, apps map[string]string) error {
return c.cache.SetItem(listApps(repoUrl, revision), apps, c.repoCacheExpiration, apps == nil)
return c.cache.SetItem(
listApps(repoUrl, revision),
apps,
&cacheutil.CacheActionOpts{
Expiration: c.repoCacheExpiration,
Delete: apps == nil})
}

func helmIndexRefsKey(repo string) string {
Expand All @@ -154,7 +163,14 @@ func helmIndexRefsKey(repo string) string {

// SetHelmIndex stores helm repository index.yaml content to cache
func (c *Cache) SetHelmIndex(repo string, indexData []byte) error {
return c.cache.SetItem(helmIndexRefsKey(repo), indexData, c.revisionCacheExpiration, false)
if indexData == nil {
// Logged as warning upstream
return fmt.Errorf("helm index data is nil, skipping cache")
}
return c.cache.SetItem(
helmIndexRefsKey(repo),
indexData,
&cacheutil.CacheActionOpts{Expiration: c.revisionCacheExpiration})
}

// GetHelmIndex retrieves helm repository index.yaml content from cache
Expand All @@ -172,21 +188,99 @@ func (c *Cache) SetGitReferences(repo string, references []*plumbing.Reference)
for i := range references {
input = append(input, references[i].Strings())
}
return c.cache.SetItem(gitRefsKey(repo), input, c.revisionCacheExpiration, false)
return c.cache.SetItem(gitRefsKey(repo), input, &cacheutil.CacheActionOpts{Expiration: c.revisionCacheExpiration})
}

// GetGitReferences retrieves resolved Git repository references from cache
func (c *Cache) GetGitReferences(repo string, references *[]*plumbing.Reference) error {
// Converts raw cache items to plumbing.Reference objects
func GitRefCacheItemToReferences(cacheItem [][2]string) *[]*plumbing.Reference {
var res []*plumbing.Reference
for i := range cacheItem {
// Skip empty data
if cacheItem[i][0] != "" || cacheItem[i][1] != "" {
res = append(res, plumbing.NewReferenceFromStrings(cacheItem[i][0], cacheItem[i][1]))
}
}
return &res
}

// TryLockGitRefCache attempts to lock the key for the Git repository references if the key doesn't exist, returns the value of
// GetGitReferences after calling the SET
func (c *Cache) TryLockGitRefCache(repo string, lockId string, references *[]*plumbing.Reference) (string, error) {
// This try set with DisableOverwrite is important for making sure that only one process is able to claim ownership
// A normal get + set, or just set would cause ownership to go to whoever the last writer was, and during race conditions
// leads to duplicate requests
err := c.cache.SetItem(gitRefsKey(repo), [][2]string{{cacheutil.CacheLockedValue, lockId}}, &cacheutil.CacheActionOpts{
Expiration: c.revisionCacheLockTimeout,
DisableOverwrite: true})
if err != nil {
// Log but ignore this error since we'll want to retry, failing to obtain the lock should not throw an error
log.Errorf("Error attempting to acquire git references cache lock: %v", err)
}
return c.GetGitReferences(repo, references)
}

// Retrieves the cache item for git repo references. Returns foundLockId, error
func (c *Cache) GetGitReferences(repo string, references *[]*plumbing.Reference) (string, error) {
var input [][2]string
if err := c.cache.GetItem(gitRefsKey(repo), &input); err != nil {
return err
err := c.cache.GetItem(gitRefsKey(repo), &input)
valueExists := len(input) > 0 && len(input[0]) > 0
switch {
// Unexpected Error
case err != nil && err != ErrCacheMiss:
log.Errorf("Error attempting to retrieve git references from cache: %v", err)
return "", err
// Value is set
case valueExists && input[0][0] != cacheutil.CacheLockedValue:
*references = *GitRefCacheItemToReferences(input)
return "", nil
// Key is locked
case valueExists:
return input[0][1], nil
// No key or empty key
default:
return "", nil
}
var res []*plumbing.Reference
for i := range input {
res = append(res, plumbing.NewReferenceFromStrings(input[i][0], input[i][1]))
}

// GetOrLockGitReferences retrieves the git references if they exist, otherwise creates a lock and returns so the caller can populate the cache
// Returns isLockOwner, localLockId, error
func (c *Cache) GetOrLockGitReferences(repo string, lockId string, references *[]*plumbing.Reference) (string, error) {
// Value matches the ttl on the lock in TryLockGitRefCache
waitUntil := time.Now().Add(c.revisionCacheLockTimeout)
// Wait only the maximum amount of time configured for the lock
// if the configured time is zero then the for loop will never run and instead act as the owner immediately
for time.Now().Before(waitUntil) {
// Get current cache state
if foundLockId, err := c.GetGitReferences(repo, references); foundLockId == lockId || err != nil || (references != nil && len(*references) > 0) {
return foundLockId, err
}
if foundLockId, err := c.TryLockGitRefCache(repo, lockId, references); foundLockId == lockId || err != nil || (references != nil && len(*references) > 0) {
return foundLockId, err
}
time.Sleep(1 * time.Second)
}
*references = res
return nil
// If configured time is 0 then this is expected
if c.revisionCacheLockTimeout > 0 {
log.Debug("Repository cache was unable to acquire lock or valid data within timeout")
}
// Timeout waiting for lock
return lockId, nil
}

// UnlockGitReferences unlocks the key for the Git repository references if needed
func (c *Cache) UnlockGitReferences(repo string, lockId string) error {
var input [][2]string
var err error
if err = c.cache.GetItem(gitRefsKey(repo), &input); err == nil &&
input != nil &&
len(input) > 0 &&
len(input[0]) > 1 &&
input[0][0] == cacheutil.CacheLockedValue &&
input[0][1] == lockId {
// We have the lock, so remove it
return c.cache.SetItem(gitRefsKey(repo), input, &cacheutil.CacheActionOpts{Delete: true})
}
return err
}

// refSourceCommitSHAs is a list of resolved revisions for each ref source. This allows us to invalidate the cache
Expand Down Expand Up @@ -274,11 +368,19 @@ func (c *Cache) SetManifests(revision string, appSrc *appv1.ApplicationSource, s
res.CacheEntryHash = hash
}

return c.cache.SetItem(manifestCacheKey(revision, appSrc, srcRefs, namespace, trackingMethod, appLabelKey, appName, clusterInfo, refSourceCommitSHAs), res, c.repoCacheExpiration, res == nil)
return c.cache.SetItem(
manifestCacheKey(revision, appSrc, srcRefs, namespace, trackingMethod, appLabelKey, appName, clusterInfo, refSourceCommitSHAs),
res,
&cacheutil.CacheActionOpts{
Expiration: c.repoCacheExpiration,
Delete: res == nil})
}

func (c *Cache) DeleteManifests(revision string, appSrc *appv1.ApplicationSource, srcRefs appv1.RefTargetRevisionMapping, clusterInfo ClusterRuntimeInfo, namespace, trackingMethod, appLabelKey, appName string, refSourceCommitSHAs ResolvedRevisions) error {
return c.cache.SetItem(manifestCacheKey(revision, appSrc, srcRefs, namespace, trackingMethod, appLabelKey, appName, clusterInfo, refSourceCommitSHAs), "", c.repoCacheExpiration, true)
return c.cache.SetItem(
manifestCacheKey(revision, appSrc, srcRefs, namespace, trackingMethod, appLabelKey, appName, clusterInfo, refSourceCommitSHAs),
"",
&cacheutil.CacheActionOpts{Delete: true})
}

func appDetailsCacheKey(revision string, appSrc *appv1.ApplicationSource, srcRefs appv1.RefTargetRevisionMapping, trackingMethod appv1.TrackingMethod, refSourceCommitSHAs ResolvedRevisions) string {
Expand All @@ -293,7 +395,12 @@ func (c *Cache) GetAppDetails(revision string, appSrc *appv1.ApplicationSource,
}

func (c *Cache) SetAppDetails(revision string, appSrc *appv1.ApplicationSource, srcRefs appv1.RefTargetRevisionMapping, res *apiclient.RepoAppDetailsResponse, trackingMethod appv1.TrackingMethod, refSourceCommitSHAs ResolvedRevisions) error {
return c.cache.SetItem(appDetailsCacheKey(revision, appSrc, srcRefs, trackingMethod, refSourceCommitSHAs), res, c.repoCacheExpiration, res == nil)
return c.cache.SetItem(
appDetailsCacheKey(revision, appSrc, srcRefs, trackingMethod, refSourceCommitSHAs),
res,
&cacheutil.CacheActionOpts{
Expiration: c.repoCacheExpiration,
Delete: res == nil})
}

func revisionMetadataKey(repoURL, revision string) string {
Expand All @@ -306,7 +413,10 @@ func (c *Cache) GetRevisionMetadata(repoURL, revision string) (*appv1.RevisionMe
}

func (c *Cache) SetRevisionMetadata(repoURL, revision string, item *appv1.RevisionMetadata) error {
return c.cache.SetItem(revisionMetadataKey(repoURL, revision), item, c.repoCacheExpiration, false)
return c.cache.SetItem(
revisionMetadataKey(repoURL, revision),
item,
&cacheutil.CacheActionOpts{Expiration: c.repoCacheExpiration})
}

func revisionChartDetailsKey(repoURL, chart, revision string) string {
Expand All @@ -319,15 +429,21 @@ func (c *Cache) GetRevisionChartDetails(repoURL, chart, revision string) (*appv1
}

func (c *Cache) SetRevisionChartDetails(repoURL, chart, revision string, item *appv1.ChartDetails) error {
return c.cache.SetItem(revisionChartDetailsKey(repoURL, chart, revision), item, c.repoCacheExpiration, false)
return c.cache.SetItem(
revisionChartDetailsKey(repoURL, chart, revision),
item,
&cacheutil.CacheActionOpts{Expiration: c.repoCacheExpiration})
}

func gitFilesKey(repoURL, revision, pattern string) string {
return fmt.Sprintf("gitfiles|%s|%s|%s", repoURL, revision, pattern)
}

func (c *Cache) SetGitFiles(repoURL, revision, pattern string, files map[string][]byte) error {
return c.cache.SetItem(gitFilesKey(repoURL, revision, pattern), &files, c.repoCacheExpiration, false)
return c.cache.SetItem(
gitFilesKey(repoURL, revision, pattern),
&files,
&cacheutil.CacheActionOpts{Expiration: c.repoCacheExpiration})
}

func (c *Cache) GetGitFiles(repoURL, revision, pattern string) (map[string][]byte, error) {
Expand All @@ -340,7 +456,10 @@ func gitDirectoriesKey(repoURL, revision string) string {
}

func (c *Cache) SetGitDirectories(repoURL, revision string, directories []string) error {
return c.cache.SetItem(gitDirectoriesKey(repoURL, revision), &directories, c.repoCacheExpiration, false)
return c.cache.SetItem(
gitDirectoriesKey(repoURL, revision),
&directories,
&cacheutil.CacheActionOpts{Expiration: c.repoCacheExpiration})
}

func (c *Cache) GetGitDirectories(repoURL, revision string) ([]string, error) {
Expand Down
Loading

0 comments on commit 2afbbb5

Please sign in to comment.