Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

repospec: support ssh urls with ssh certificates #4741

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 100 additions & 56 deletions api/internal/git/repospec.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"fmt"
"net/url"
"path/filepath"
"regexp"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -86,17 +87,17 @@ func NewRepoSpecFromURL(n string) (*RepoSpec, error) {
if filepath.IsAbs(n) {
return nil, fmt.Errorf("uri looks like abs path: %s", n)
}
host, orgRepo, path, gitRef, gitSubmodules, suffix, gitTimeout := parseGitURL(n)
if orgRepo == "" {
rs, err := parseGitURL(n)
if err != nil {
return nil, err
}
KnVerey marked this conversation as resolved.
Show resolved Hide resolved
if rs.OrgRepo == "" {
return nil, fmt.Errorf("url lacks orgRepo: %s", n)
}
if host == "" {
if rs.Host == "" {
return nil, fmt.Errorf("url lacks host: %s", n)
}
return &RepoSpec{
raw: n, Host: host, OrgRepo: orgRepo,
Dir: notCloned, Path: path, Ref: gitRef, GitSuffix: suffix,
Submodules: gitSubmodules, Timeout: gitTimeout}, nil
return rs, nil
}

const (
Expand All @@ -108,61 +109,68 @@ const (
// From strings like [email protected]:someOrg/someRepo.git or
// https://github.com/someOrg/someRepo?ref=someHash, extract
// the parts.
func parseGitURL(n string) (
host string, orgRepo string, path string, gitRef string, gitSubmodules bool, gitSuff string, gitTimeout time.Duration) {
func parseGitURL(n string) (*RepoSpec, error) {
var err error
rs := &RepoSpec{raw: n, Dir: notCloned}
if strings.Contains(n, gitDelimiter) {
index := strings.Index(n, gitDelimiter)
// Adding _git/ to host
host = normalizeGitHostSpec(n[:index+len(gitDelimiter)])
orgRepo = strings.Split(strings.Split(n[index+len(gitDelimiter):], "/")[0], "?")[0]
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n[index+len(gitDelimiter)+len(orgRepo):])
return
rs.Host, err = normalizeGitHostSpec(n[:index+len(gitDelimiter)])
if err != nil {
return nil, err
}
rs.OrgRepo = strings.Split(strings.Split(n[index+len(gitDelimiter):], "/")[0], "?")[0]
rs.Path, rs.Ref, rs.Timeout, rs.Submodules = peelQuery(n[index+len(gitDelimiter)+len(rs.OrgRepo):])
return rs, nil
}
host, n = parseHostSpec(n)
isLocal := strings.HasPrefix(host, "file://")
rs.Host, n, err = parseHostSpec(n)
if err != nil {
return nil, err
}
isLocal := strings.HasPrefix(rs.Host, "file://")
if !isLocal {
gitSuff = gitSuffix
rs.GitSuffix = gitSuffix
}
if strings.Contains(n, gitSuffix) {
gitSuff = gitSuffix
rs.GitSuffix = gitSuffix
index := strings.Index(n, gitSuffix)
orgRepo = n[0:index]
rs.OrgRepo = n[0:index]
n = n[index+len(gitSuffix):]
if len(n) > 0 && n[0] == '/' {
n = n[1:]
}
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n)
return
rs.Path, rs.Ref, rs.Timeout, rs.Submodules = peelQuery(n)
return rs, nil
}

if isLocal {
if idx := strings.Index(n, "//"); idx > 0 {
orgRepo = n[:idx]
rs.OrgRepo = n[:idx]
n = n[idx+2:]
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n)
return
rs.Path, rs.Ref, rs.Timeout, rs.Submodules = peelQuery(n)
return rs, nil
}
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n)
orgRepo = path
path = ""
return
rs.Path, rs.Ref, rs.Timeout, rs.Submodules = peelQuery(n)
rs.OrgRepo = rs.Path
rs.Path = ""
return rs, nil
}

i := strings.Index(n, "/")
if i < 1 {
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n)
return
rs.Path, rs.Ref, rs.Timeout, rs.Submodules = peelQuery(n)
return rs, nil
}
j := strings.Index(n[i+1:], "/")
if j >= 0 {
j += i + 1
orgRepo = n[:j]
path, gitRef, gitTimeout, gitSubmodules = peelQuery(n[j+1:])
return
rs.OrgRepo = n[:j]
rs.Path, rs.Ref, rs.Timeout, rs.Submodules = peelQuery(n[j+1:])
return rs, nil
}
path = ""
orgRepo, gitRef, gitTimeout, gitSubmodules = peelQuery(n)
return host, orgRepo, path, gitRef, gitSubmodules, gitSuff, gitTimeout
rs.Path = ""
rs.OrgRepo, rs.Ref, rs.Timeout, rs.Submodules = peelQuery(n)
return rs, nil
}

// Clone git submodules by default.
Expand Down Expand Up @@ -212,18 +220,26 @@ func peelQuery(arg string) (string, string, time.Duration, bool) {
return parsed.Path, ref, duration, submodules
}

func parseHostSpec(n string) (string, string) {
var userRegexp = regexp.MustCompile(`^([a-zA-Z][a-zA-Z0-9-]*)@`)

func parseHostSpec(n string) (string, string, error) {
KnVerey marked this conversation as resolved.
Show resolved Hide resolved
var host string
// Start accumulating the host part.
for _, p := range []string{
// Order matters here.
"git::", "gh:", "ssh://", "https://", "http://", "file://",
"git@", "github.com:", "github.com/"} {
if len(p) < len(n) && strings.ToLower(n[:len(p)]) == p {
n = n[len(p):]
host += p
consumeHostStrings := func(parts []string) {
for _, p := range parts {
if len(p) < len(n) && strings.ToLower(n[:len(p)]) == p {
n = n[len(p):]
host += p
}
}
}
// Start accumulating the host part.
// Order matters here.
consumeHostStrings([]string{"git::", "gh:", "ssh://", "https://", "http://", "file://"})
if p := userRegexp.FindString(n); p != "" {
n = n[len(p):]
host += p
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm right about the simplification from the git protocol not being valid with Github, then I think the remaining source of correctness risk is for any non-Github URLs that work if they contain usernames today. Notably, I found an interaction between this code and the non-Github handling at L261, which expects the protocol to have been the last thing parsed. For example, the following smoke test passes on master and fails (with poor host parsing) on this branch:

		{
			name:      "t71",
			input:     "ssh://[email protected]/ourteamname/ourrepositoryname.git//path?ref=branch",
			cloneSpec: "ssh://[email protected]/ourteamname/ourrepositoryname.git",
			absPath:   notCloned.Join("path"),
			repoSpec: RepoSpec{
				Host:      "ssh://[email protected]/",
				OrgRepo:   "ourteamname/ourrepositoryname",
				Path:      "/path",
				Ref:       "branch",
				GitSuffix: ".git",
			},
		},

I suspect we need to make more changes to this method to both make this work and leave the method in a state that makes some sense.

}
consumeHostStrings([]string{"github.com:", "github.com/"})
if host == "git@" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Playing devil's advocate with my own comment above: arguably this conditional here is our current username handling, and we could reduce special casing in the implementation by changing it to apply whenever we've found any username, and allow it to consume any domain, rather than just github.com above. That would add risk in the form of additional supported permutations we can't specifically integration test... but in another way also reduce it by making use cases like the new one in this PR use the code paths we already have integration coverage for (not to mention reducing the difficulty of understanding this code). Wdyt?

cc @natasha41575 for another opinion on this tradeoff

Copy link
Contributor Author

@mightyguava mightyguava Sep 26, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Acknowledging I read this comment but don't have an opinion. I do like the idea! I'd prefer not to include that change in this PR though, since it would probably be much bigger.

i := strings.Index(n, "/")
if i > -1 {
Expand All @@ -236,7 +252,7 @@ func parseHostSpec(n string) (string, string) {
n = n[i+1:]
}
}
return host, n
return host, n, nil
}

// If host is a http(s) or ssh URL, grab the domain part.
Expand All @@ -252,22 +268,50 @@ func parseHostSpec(n string) (string, string) {
}
}

return normalizeGitHostSpec(host), n
host, err := normalizeGitHostSpec(host)
return host, n, err
}

func normalizeGitHostSpec(host string) string {
var githubRegexp = regexp.MustCompile(`^(?:ssh://)?([a-zA-Z][a-zA-Z0-9-]*)@(github.com[:/]?)`)

func normalizeGitHostSpec(host string) (string, error) {
s := strings.ToLower(host)
if strings.Contains(s, "github.com") {
if strings.Contains(s, "git@") || strings.Contains(s, "ssh:") {
host = "[email protected]:"
} else {
host = "https://github.com/"
}
}
if strings.HasPrefix(s, "git::") {

// The git:: syntax is meant to force the Git protocol (separate from SSH
// and HTTPS), but we drop it here, to preserve past behavior.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Worth noting that Github actually does not support the git protocol? I just discovered this myself. https://blog.readthedocs.com/github-git-protocol-deprecation/

isGitProtocol := strings.HasPrefix(s, "git::")
if isGitProtocol {
host = strings.TrimPrefix(s, "git::")
}
return host

// Special treatment for github.com
if strings.Contains(host, "github.com") {
m := githubRegexp.FindStringSubmatch(host)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this is matching the various inputs we normalize to an SSH Github URL, right? Let's make the regex var name more specific.

I think it would also be helpful to have more comments in this section. For example, on L291, we are returning a normalize HTTPS URL when we've concluded that it is not SSH.

if m == nil {
return "https://github.com/", nil
}
userName, realHost := m[1], m[2]

if realHost == "github.com/" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only other possibility is that it already ends in a colon right? Can we force the colon directly in the return result and avoid needing to capture the host in the regex at all?

realHost = "github.com:"
}

const gitUser = "git"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The git user is referenced several other times in this file, so I'm thinking this should be introduced external to this method (possibly as git@ instead).

isGitUser := userName == gitUser || userName == ""
if userName == "" {
userName = gitUser
}

switch {
case isGitProtocol && !isGitUser:
return "", fmt.Errorf("git protocol on github.com only allows git@ user")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I just discovered and mentioned above re: git protocol not being supported at all on Github changes my mind about this error. In light of that, at least at the current point in history, removal of git:// is essentially auto-correcting a removed protocol in favour of one that will still work. And when we're talking SSH, AFAIK we have no way to distinguish between the public Github case where only the git username is valid and the GHE case you are creating this PR for where anything can be valid.

I think this realization lets us simplify back much closer to the original implementation.

case isGitProtocol:
return "[email protected]:", nil
default:
return fmt.Sprintf("%s@%s", userName, realHost), nil
}
}
return host, nil
}

// The format of Azure repo URL is documented
Expand Down
65 changes: 58 additions & 7 deletions api/internal/git/repospec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ func TestNewRepoSpecFromUrl_Permute(t *testing.T) {
{"git::https://git.example.com/", "https://git.example.com/"},
KnVerey marked this conversation as resolved.
Show resolved Hide resolved
{"[email protected]:", "[email protected]:"},
{"[email protected]/", "[email protected]:"},
{"[email protected]:", "[email protected]:"},
{"[email protected]/", "[email protected]:"},
KnVerey marked this conversation as resolved.
Show resolved Hide resolved
{"git::[email protected]:", "[email protected]:"},
}
var orgRepos = []string{"someOrg/someRepo", "kubernetes/website"}
var pathNames = []string{"README.md", "foo/krusty.txt", ""}
Expand Down Expand Up @@ -99,16 +102,19 @@ func TestNewRepoSpecFromUrlErrors(t *testing.T) {
{"htxxxtp://github.com/", "url lacks host"},
{"ssh://git.example.com", "url lacks orgRepo"},
{"git::___", "url lacks orgRepo"},
{"git::[email protected]:kubernetes-sigs/kustomize", "git protocol on github.com only allows git@ user"},
}

for _, testCase := range badData {
_, err := NewRepoSpecFromURL(testCase.url)
if err == nil {
t.Error("expected error")
}
if !strings.Contains(err.Error(), testCase.error) {
t.Errorf("unexpected error: %s", err)
}
t.Run(testCase.error, func(t *testing.T) {
_, err := NewRepoSpecFromURL(testCase.url)
if err == nil {
t.Fatal("expected error")
}
if !strings.Contains(err.Error(), testCase.error) {
t.Errorf("unexpected error: %s", err)
}
})
}
}

Expand Down Expand Up @@ -407,6 +413,51 @@ func TestNewRepoSpecFromUrl_Smoke(t *testing.T) {
GitSuffix: ".git",
},
},
{
name: "t25",
input: "https://[email protected]/kubernetes-sigs/kustomize",
cloneSpec: "https://github.com/kubernetes-sigs/kustomize.git",
absPath: notCloned.String(),
repoSpec: RepoSpec{
Host: "https://github.com/",
OrgRepo: "kubernetes-sigs/kustomize",
GitSuffix: ".git",
},
},
{
name: "t26",
input: "ssh://[email protected]/kubernetes-sigs/kustomize",
cloneSpec: "[email protected]:kubernetes-sigs/kustomize.git",
absPath: notCloned.String(),
repoSpec: RepoSpec{
Host: "[email protected]:",
OrgRepo: "kubernetes-sigs/kustomize",
GitSuffix: ".git",
},
},
{
name: "t27",
input: "[email protected]/kubernetes-sigs/kustomize",
cloneSpec: "[email protected]:kubernetes-sigs/kustomize.git",
absPath: notCloned.String(),
repoSpec: RepoSpec{
Host: "[email protected]:",
OrgRepo: "kubernetes-sigs/kustomize",
GitSuffix: ".git",
},
},
{
name: "t28",
input: "[email protected]/someorg/somerepo/somepath",
cloneSpec: "[email protected]:someorg/somerepo.git",
absPath: notCloned.Join("somepath"),
repoSpec: RepoSpec{
Host: "[email protected]:",
OrgRepo: "someorg/somerepo",
Path: "somepath",
GitSuffix: ".git",
},
},
}
for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
Expand Down