From 3b53d0b5a1ce8f74d593ec3c898ee459c6493edb Mon Sep 17 00:00:00 2001 From: Jose Javier Merchante Date: Fri, 30 Aug 2024 10:21:01 +0200 Subject: [PATCH] [git] Improve branches study for consistency Previously, while the branches study was running, the `branches` field remained empty or partially filled until the study completed, leading to incorrect data being displayed on the dashboard. With this change, the study is first created in an auxiliary field `branches_aux`, and once complete, the results are moved to the `branches` field. This approach ensures consistent and accurate data is displayed. Signed-off-by: Jose Javier Merchante --- grimoire_elk/enriched/git.py | 56 +++++++++++++++++-- .../git-branches-study-improved.yml | 12 ++++ 2 files changed, 64 insertions(+), 4 deletions(-) create mode 100644 releases/unreleased/git-branches-study-improved.yml diff --git a/grimoire_elk/enriched/git.py b/grimoire_elk/enriched/git.py index a85a643d6..106b557ac 100644 --- a/grimoire_elk/enriched/git.py +++ b/grimoire_elk/enriched/git.py @@ -970,8 +970,14 @@ def enrich_git_branches(self, ocean_backend, enrich_backend, run_month_days=[7, logger.error("[git] study git-branches failed on repo {}, due to {}".format(git_repo.uri, e)) continue - logger.debug("[git] study git-branches repo {} in index {} processed".format( - git_repo.uri, anonymize_url(enrich_backend.elastic.index_url))) + try: + self.update_branches_field(git_repo, enrich_backend) + except Exception as e: + logger.error("[git] study git-branches failed on repo {}, due to {}".format(git_repo.uri, e)) + continue + + logger.info("[git] study git-branches repo {} in index {} processed".format( + git_repo.uri, anonymize_url(enrich_backend.elastic.index_url))) logger.info("[git] study git-branches end") @@ -996,7 +1002,7 @@ def delete_commit_branches(self, git_repo, enrich_backend): es_query = """ { "script": { - "source": "ctx._source.branches = new HashSet();", + "source": "ctx._source.branches_aux = new HashSet();", "lang": "painless" }, "query": { @@ -1058,6 +1064,48 @@ def add_commit_branches(self, git_repo, enrich_backend): logger.error("[git] Skip adding branch info for repo {} due to {}".format(git_repo.uri, e)) return + def update_branches_field(self, git_repo, enrich_backend): + """Replace the branches field with the contents of branches_aux with + the processed branches in the enriched index. + + :param git_repo: GitRepository object + :param enrich_backend: the enrich backend + """ + fltr = """ + "filter": [ + { + "term": { + "origin": "%s" + } + } + ] + """ % anonymize_url(git_repo.uri) + + es_query = """ + { + "script": { + "source": "ctx._source.branches = ctx._source.branches_aux; ctx._source.remove('branches_aux');", + "lang": "painless" + }, + "query": { + "bool": { + %s + } + } + } + """ % fltr + + index = enrich_backend.elastic.index_url + r = self.requests.post(index + "/_update_by_query?refresh", data=es_query, headers=HEADER_JSON, verify=False) + try: + r.raise_for_status() + except requests.exceptions.HTTPError: + logger.error("[git] Error updating branches field for {}".format(anonymize_url(index))) + logger.error(r.text) + return + + logger.debug("[git] Update branches field {}, index {}".format(r.text, anonymize_url(index))) + def __process_commits_in_branch(self, enrich_backend, repo_origin, branch_name, commits): commits_str = ",".join(['"%s"' % c for c in commits]) @@ -1076,7 +1124,7 @@ def __process_commits_in_branch(self, enrich_backend, repo_origin, branch_name, es_query = """ { "script": { - "source": "if(!ctx._source.branches.contains(params.branch)){ctx._source.branches.add(params.branch);}", + "source": "if(!ctx._source.branches_aux.contains(params.branch)){ctx._source.branches_aux.add(params.branch);}", "lang": "painless", "params": { "branch": "'%s'" diff --git a/releases/unreleased/git-branches-study-improved.yml b/releases/unreleased/git-branches-study-improved.yml new file mode 100644 index 000000000..4391bb165 --- /dev/null +++ b/releases/unreleased/git-branches-study-improved.yml @@ -0,0 +1,12 @@ +--- +title: Git branches study improved +category: fixed +author: null +issue: null +notes: > + Previously, while the branches study was running, + the `branches` field remained empty or partially filled + until the study was completed, leading to incorrect data + being displayed on the dashboard. + With this change, the branches field is updated only + after the study has finished.