From 134c47fffe660afb6b0a6840db8e99c984e21ee7 Mon Sep 17 00:00:00 2001 From: Matthias Balke Date: Wed, 19 May 2021 08:43:54 +0200 Subject: [PATCH] implement HTTPS support for cloning (#225) --- README.md | 43 +++++++++++++++++++++++------- dockerize.nix | 3 ++- marge/app.py | 18 ++++++++++--- marge/bot.py | 27 ++++++++++++------- marge/project.py | 4 +++ marge/store.py | 64 ++++++++++++++++++++++++++++++++++++--------- tests/test_store.py | 2 +- 7 files changed, 125 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 89e76b9b..32f975f3 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,8 @@ optional arguments: [env var: MARGE_AUTH_TOKEN_FILE] (default: None) --gitlab-url URL Your GitLab instance, e.g. "https://gitlab.example.com". [env var: MARGE_GITLAB_URL] (default: None) + --use-https use HTTP(S) instead of SSH for GIT repository access + [env var: MARGE_USE_HTTPS] (default: False) --ssh-key KEY The private ssh key for marge so it can clone/push. DISABLED because passing credentials on the command line is insecure: You can still set it via ENV variable or config file, or use "--ssh-key-file" flag. @@ -79,20 +81,21 @@ optional arguments: --ssh-key-file FILE Path to the private ssh key for marge so it can clone/push. [env var: MARGE_SSH_KEY_FILE] (default: None) --embargo INTERVAL[,..] - Time(s) during which no merging is to take place, e.g. "Friday 1pm - Monday 9am" - or "Fri 12:30 Europe/London - Mon 08:00 Europe/London" + Time(s) during which no merging is to take place, e.g. "Friday 1pm - Monday 9am". [env var: MARGE_EMBARGO] (default: None) --use-merge-strategy Use git merge instead of git rebase to update the *source* branch (EXPERIMENTAL) If you need to use a strict no-rebase workflow (in most cases you don't want this, even if you configured gitlab to use merge requests to use merge commits on the *target* branch (the default).) [env var: MARGE_USE_MERGE_STRATEGY] (default: False) + --rebase-remotely Instead of rebasing in a local clone of the repository, use GitLab's + built-in rebase functionality, via their API. Note that Marge can't add + information in the commits in this case. + [env var: MARGE_REBASE_REMOTELY] (default: False) --add-tested Add "Tested: marge-bot <$MR_URL>" for the final commit on branch after it passed CI. [env var: MARGE_ADD_TESTED] (default: False) --batch Enable processing MRs in batches [env var: MARGE_BATCH] (default: False) - --use-no-ff-batches Disable fast forwarding when merging MR batches. - [env var: MARGE_USE_NO_FF_BATCHES] (default: False) --add-part-of Add "Part-of: <$MR_URL>" to each commit in MR. [env var: MARGE_ADD_PART_OF] (default: False) --add-reviewers Add "Reviewed-by: $approver" for each approver of MR to each commit in MR. @@ -100,10 +103,9 @@ optional arguments: --impersonate-approvers Marge-bot pushes effectively don't change approval status. [env var: MARGE_IMPERSONATE_APPROVERS] (default: False) - --merge-order The order you want marge to merge its requests. - As of earliest merge request creation time (created_at), update time (updated_at) - or assigned to 'marge-bot' user time (assigned_at) - [env var: MARGE_MERGE_ORDER] (default: created_at) + --merge-order {created_at,updated_at,assigned_at} + Order marge merges assigned requests. created_at (default), updated_at or assigned_at. + [env var: MARGE_MERGE_ORDER] (default: created_at) --approval-reset-timeout APPROVAL_RESET_TIMEOUT How long to wait for approvals to reset after pushing. Only useful with the "new commits remove all approvals" option in a project's settings. @@ -136,6 +138,11 @@ optional arguments: --cli Run marge-bot as a single CLI command, not as a long-running service. This may be used to run marge-bot in scheduled CI pipelines or cronjobs. [env var: MARGE_CLI] (default: False) + --use-no-ff-batches Disable fast forwarding when merging MR batches [env var: MARGE_USE_NO_FF_BATCHES] (default: False) + --use-merge-commit-batches + Use merge commit when creating batches, so that the commits in the batch MR will be the same with in individual MRs. Requires sudo scope in the access token. + [env var: MARGE_USE_MERGE_COMMIT_BATCHES] (default: False) + --skip-ci-batches Skip CI when updating individual MRs when using batches [env var: MARGE_SKIP_CI_BATCHES] (default: False) ``` Here is a config file example ```yaml @@ -156,6 +163,8 @@ project-regexp: .* # choose one way of specifying the SSH key #ssh-key: KEY ssh-key-file: token.FILE +# OR use HTTPS instead of SSH +#use-https: true ``` For more information about configuring marge-bot see `--help` @@ -203,7 +212,7 @@ ssh-keygen -t ed25519 -C marge-bot@invalid -f marge-bot-ssh-key -P '' Add the public key (`marge-bot-ssh-key.pub`) to the user's `SSH Keys` in GitLab and keep the private one handy. -### Running marge-bot in docker (what we do) +### Running marge-bot in docker using SSH (what we do) Assuming you have already got docker installed, the quickest and most minimal way to run marge is like so (*but see note about passing secrets on the @@ -256,6 +265,22 @@ may contain bugs. You can also specify a particular version as a tag, e.g. `smarkets/marge-bot:0.7.0`. +### Running marge-bot in docker using HTTPS + +It is also possible to use Git over HTTPS instead of Git over SSH. To use HTTPS instead of SSH, +add the `--use-https` flag and do not provide any SSH keys. Alternatively you can set the +environment variable `MARGE_USE_HTTPS` or the config file property `use-https`. + +```bash +docker run --restart=on-failure \ # restart if marge crashes because GitLab is flaky + -e MARGE_AUTH_TOKEN="$(cat marge-bot.token)" \ + smarkets/marge-bot \ + --use-https \ + --gitlab-url='http://your.gitlab.instance.com' +``` + +HTTPS can be used using any other deployment technique as well. + ### Running marge-bot in kubernetes It's also possible to run marge in kubernetes, e.g. here's how you use a ktmpl template: diff --git a/dockerize.nix b/dockerize.nix index cafe9f1f..c622a6de 100644 --- a/dockerize.nix +++ b/dockerize.nix @@ -31,9 +31,10 @@ in busybox gitMinimal openssh + cacert ] ++ [ marge ]; config = { Entrypoint = [ "/bin/marge.app" ]; - Env = ["LANG=en_US.UTF-8" ''LOCALE_ARCHIVE=/lib/locale/locale-archive'']; + Env = ["LANG=en_US.UTF-8" ''LOCALE_ARCHIVE=/lib/locale/locale-archive'' "GIT_SSL_CAINFO=/etc/ssl/certs/ca-bundle.crt" "SSL_CERT_FILE=/etc/ssl/certs/ca-bundle.crt"]; }; } diff --git a/marge/app.py b/marge/app.py index c87fabdc..49402d7b 100644 --- a/marge/app.py +++ b/marge/app.py @@ -78,8 +78,14 @@ def regexp(str_regex): metavar='URL', help='Your GitLab instance, e.g. "https://gitlab.example.com".\n', ) - ssh_key_group = parser.add_mutually_exclusive_group(required=True) - ssh_key_group.add_argument( + repo_access = parser.add_mutually_exclusive_group(required=True) + repo_access.add_argument( + '--use-https', + env_var='MARGE_USE_HTTPS', + action='store_true', + help='use HTTP(S) instead of SSH for GIT repository access\n', + ) + repo_access.add_argument( '--ssh-key', type=str, metavar='KEY', @@ -89,7 +95,7 @@ def regexp(str_regex): 'You can still set it via ENV variable or config file, or use "--ssh-key-file" flag.\n' ), ) - ssh_key_group.add_argument( + repo_access.add_argument( '--ssh-key-file', type=str, # because we want a file location, not the content metavar='FILE', @@ -261,7 +267,9 @@ def regexp(str_regex): @contextlib.contextmanager def _secret_auth_token_and_ssh_key(options): auth_token = options.auth_token or options.auth_token_file.readline().strip() - if options.ssh_key_file: + if options.use_https: + yield auth_token, None + elif options.ssh_key_file: yield auth_token, options.ssh_key_file else: with tempfile.NamedTemporaryFile(mode='w', prefix='ssh-key-') as tmp_ssh_key_file: @@ -313,6 +321,8 @@ def main(args=None): config = bot.BotConfig( user=user, + use_https=options.use_https, + auth_token=auth_token, ssh_key_file=ssh_key_file, project_regexp=options.project_regexp, git_timeout=options.git_timeout, diff --git a/marge/bot.py b/marge/bot.py index f5f3de91..09969627 100644 --- a/marge/bot.py +++ b/marge/bot.py @@ -32,13 +32,22 @@ def __init__(self, *, api, config): def start(self): with TemporaryDirectory() as root_dir: - repo_manager = store.RepoManager( - user=self.user, - root_dir=root_dir, - ssh_key_file=self._config.ssh_key_file, - timeout=self._config.git_timeout, - reference=self._config.git_reference_repo, - ) + if self._config.use_https: + repo_manager = store.HttpsRepoManager( + user=self.user, + root_dir=root_dir, + auth_token=self._config.auth_token, + timeout=self._config.git_timeout, + reference=self._config.git_reference_repo, + ) + else: + repo_manager = store.SshRepoManager( + user=self.user, + root_dir=root_dir, + ssh_key_file=self._config.ssh_key_file, + timeout=self._config.git_timeout, + reference=self._config.git_reference_repo, + ) self._run(repo_manager) @property @@ -189,8 +198,8 @@ def _get_single_job(self, project, merge_request, repo, options): class BotConfig(namedtuple('BotConfig', - 'user ssh_key_file project_regexp merge_order merge_opts git_timeout ' + - 'git_reference_repo branch_regexp source_branch_regexp batch cli')): + 'user use_https auth_token ssh_key_file project_regexp merge_order merge_opts ' + + 'git_timeout git_reference_repo branch_regexp source_branch_regexp batch cli')): pass diff --git a/marge/project.py b/marge/project.py index 9f5e810c..0ca5b63a 100644 --- a/marge/project.py +++ b/marge/project.py @@ -77,6 +77,10 @@ def path_with_namespace(self): def ssh_url_to_repo(self): return self.info['ssh_url_to_repo'] + @property + def http_url_to_repo(self): + return self.info['http_url_to_repo'] + @property def merge_requests_enabled(self): return self.info['merge_requests_enabled'] diff --git a/marge/store.py b/marge/store.py index 5f819673..78017cac 100644 --- a/marge/store.py +++ b/marge/store.py @@ -1,3 +1,4 @@ +import re import tempfile from . import git @@ -5,14 +6,31 @@ class RepoManager: - def __init__(self, user, root_dir, ssh_key_file=None, timeout=None, reference=None): + def __init__(self, user, root_dir, timeout=None, reference=None): self._root_dir = root_dir self._user = user - self._ssh_key_file = ssh_key_file self._repos = {} self._timeout = timeout self._reference = reference + def forget_repo(self, project): + self._repos.pop(project.id, None) + + @property + def user(self): + return self._user + + @property + def root_dir(self): + return self._root_dir + + +class SshRepoManager(RepoManager): + + def __init__(self, user, root_dir, ssh_key_file=None, timeout=None, reference=None): + super().__init__(user, root_dir, timeout, reference) + self._ssh_key_file = ssh_key_file + def repo_for_project(self, project): repo = self._repos.get(project.id) if not repo or repo.remote_url != project.ssh_url_to_repo: @@ -31,17 +49,39 @@ def repo_for_project(self, project): return repo - def forget_repo(self, project): - self._repos.pop(project.id, None) - @property - def user(self): - return self._user + def ssh_key_file(self): + return self._ssh_key_file - @property - def root_dir(self): - return self._root_dir + +class HttpsRepoManager(RepoManager): + + def __init__(self, user, root_dir, auth_token=None, timeout=None, reference=None): + super().__init__(user, root_dir, timeout, reference) + self._auth_token = auth_token + + def repo_for_project(self, project): + repo = self._repos.get(project.id) + if not repo or repo.remote_url != project.http_url_to_repo: + credentials = "oauth2:" + self._auth_token + # insert token auth "oauth2:@" + pattern = "(http(s)?://)" + replacement = r"\1" + credentials + "@" + repo_url = re.sub(pattern, replacement, project.http_url_to_repo, 1) + local_repo_dir = tempfile.mkdtemp(dir=self._root_dir) + + repo = git.Repo(repo_url, local_repo_dir, ssh_key_file=None, + timeout=self._timeout, reference=self._reference) + repo.clone() + repo.config_user_info( + user_email=self._user.email, + user_name=self._user.name, + ) + + self._repos[project.id] = repo + + return repo @property - def ssh_key_file(self): - return self._ssh_key_file + def auth_token(self): + return self._auth_token diff --git a/tests/test_store.py b/tests/test_store.py index a9f3a698..d0459c5e 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -18,7 +18,7 @@ class TestRepoManager: def setup_method(self, _method): user = marge.user.User(api=None, info=dict(USER_INFO, name='Peter Parker', email='pparker@bugle.com')) self.root_dir = tempfile.TemporaryDirectory() - self.repo_manager = marge.store.RepoManager( + self.repo_manager = marge.store.SshRepoManager( user=user, root_dir=self.root_dir.name, ssh_key_file='/ssh/key', )