From 7fe147409b8d1f8104c7a63653177f16eab50dc7 Mon Sep 17 00:00:00 2001
From: Jurre Stender <jurre@github.com>
Date: Wed, 20 Oct 2021 16:45:59 +0200
Subject: [PATCH] Treat GHES hosted sources as github sources

When a dependency is hosted on GHES, previously it was not treated as a
GitHub source, meaning that we would not check for releases/changelogs
etc when requesting Metadata for the PR.

This fixes that, by first parsing the URL, and then making a request to
`<host>/status`, and checking for a `X-GitHub-Request-Id` header, which
we return from GitHub Enterprise Server.
---
 .../bundler/metadata_finder_spec.rb           |  14 +++
 .../dependabot/cargo/metadata_finder_spec.rb  |   8 ++
 common/lib/dependabot/source.rb               |  38 ++++++-
 common/spec/dependabot/source_spec.rb         | 102 ++++++++++++++++++
 .../composer/metadata_finder_spec.rb          |   8 ++
 .../git_submodules/metadata_finder_spec.rb    |   9 ++
 .../go_modules/metadata_finder_spec.rb        |   8 ++
 .../dependabot/gradle/metadata_finder_spec.rb |   6 ++
 .../dependabot/hex/metadata_finder_spec.rb    |   6 ++
 .../dependabot/maven/metadata_finder_spec.rb  |   6 ++
 .../npm_and_yarn/metadata_finder_spec.rb      |   6 ++
 .../dependabot/nuget/metadata_finder_spec.rb  |   6 ++
 .../dependabot/python/metadata_finder_spec.rb |  10 ++
 13 files changed, 226 insertions(+), 1 deletion(-)

diff --git a/bundler/spec/dependabot/bundler/metadata_finder_spec.rb b/bundler/spec/dependabot/bundler/metadata_finder_spec.rb
index 6c0936d5e9..fc69ac8307 100644
--- a/bundler/spec/dependabot/bundler/metadata_finder_spec.rb
+++ b/bundler/spec/dependabot/bundler/metadata_finder_spec.rb
@@ -33,6 +33,20 @@
   end
   let(:dependency_name) { "business" }
 
+  before do
+    stub_request(:get, "https://example.com/status").to_return(
+      status: 200,
+      body: "Not GHES",
+      headers: {}
+    )
+
+    stub_request(:get, "https://www.rubydoc.info/status").to_return(
+      status: 200,
+      body: "Not GHES",
+      headers: {}
+    )
+  end
+
   describe "#source_url" do
     subject(:source_url) { finder.source_url }
 
diff --git a/cargo/spec/dependabot/cargo/metadata_finder_spec.rb b/cargo/spec/dependabot/cargo/metadata_finder_spec.rb
index aea6f007a8..4f23d4b349 100644
--- a/cargo/spec/dependabot/cargo/metadata_finder_spec.rb
+++ b/cargo/spec/dependabot/cargo/metadata_finder_spec.rb
@@ -35,6 +35,14 @@
   let(:dependency_name) { "bitflags" }
   let(:dependency_source) { nil }
 
+  before do
+    stub_request(:get, "https://example.com/status").to_return(
+      status: 200,
+      body: "Not GHES",
+      headers: {}
+    )
+  end
+
   describe "#source_url" do
     subject(:source_url) { finder.source_url }
     let(:crates_url) { "https://crates.io/api/v1/crates/bitflags" }
diff --git a/common/lib/dependabot/source.rb b/common/lib/dependabot/source.rb
index 6fd0f8f9a2..52b71d3cca 100644
--- a/common/lib/dependabot/source.rb
+++ b/common/lib/dependabot/source.rb
@@ -9,6 +9,15 @@ class Source
       (?:(?:/tree|/blob)/(?<branch>[^/]+)/(?<directory>.*)[\#|/])?
     }x.freeze
 
+    GITHUB_ENTERPRISE_SOURCE = %r{
+      (?<protocol>(http://|https://|git://|ssh://))*
+      (?<username>[^@]+@)*
+      (?<host>[^/]+)
+      [/:]
+      (?<repo>[\w.-]+/(?:(?!\.git|\.\s)[\w.-])+)
+      (?:(?:/tree|/blob)/(?<branch>[^/]+)/(?<directory>.*)[\#|/])?
+    }x.freeze
+
     GITLAB_SOURCE = %r{
       (?<provider>gitlab)
       (?:\.com)[/:]
@@ -40,7 +49,7 @@ class Source
                   :hostname, :api_endpoint
 
     def self.from_url(url_string)
-      return unless url_string&.match?(SOURCE_REGEX)
+      return github_enterprise_from_url(url_string) unless url_string&.match?(SOURCE_REGEX)
 
       captures = url_string.match(SOURCE_REGEX).named_captures
 
@@ -52,6 +61,33 @@ def self.from_url(url_string)
       )
     end
 
+    def self.github_enterprise_from_url(url_string)
+      captures = url_string&.match(GITHUB_ENTERPRISE_SOURCE)&.named_captures
+      return unless captures
+
+      base_url = "https://#{captures.fetch('host')}"
+
+      return unless github_enterprise?(base_url)
+
+      new(
+        provider: "github",
+        repo: captures.fetch("repo"),
+        directory: captures.fetch("directory"),
+        branch: captures.fetch("branch"),
+        hostname: captures.fetch("host"),
+        api_endpoint: File.join(base_url, "api", "v3")
+      )
+    end
+
+    def self.github_enterprise?(base_url)
+      resp = Excon.get(File.join(base_url, "status"))
+      resp.status == 200 &&
+        # Alternatively: resp.headers["Server"] == "GitHub.com", but this
+        # currently doesn't work with development environments
+        resp.headers["X-GitHub-Request-Id"] &&
+        !resp.headers["X-GitHub-Request-Id"].empty?
+    end
+
     def initialize(provider:, repo:, directory: nil, branch: nil, commit: nil,
                    hostname: nil, api_endpoint: nil)
       if (hostname.nil? ^ api_endpoint.nil?) && (provider != "codecommit")
diff --git a/common/spec/dependabot/source_spec.rb b/common/spec/dependabot/source_spec.rb
index e7e65b9ded..3360f5b575 100644
--- a/common/spec/dependabot/source_spec.rb
+++ b/common/spec/dependabot/source_spec.rb
@@ -133,6 +133,108 @@
       end
     end
 
+    context "with a GitHub Enterprise URL" do
+      before do
+        stub_request(:get, "https://ghes.mycorp.com/status").to_return(
+          status: 200,
+          body: "GitHub lives!",
+          headers: {
+            Server: "GitHub.com",
+            "X-GitHub-Request-Id": "24e4e058-fdab-5ff4-8d79-be3493b7fa8e"
+          }
+        )
+      end
+      let(:url) { "https://ghes.mycorp.com/org/abc" }
+      its(:provider) { is_expected.to eq("github") }
+      its(:repo) { is_expected.to eq("org/abc") }
+      its(:directory) { is_expected.to be_nil }
+      its(:branch) { is_expected.to be_nil }
+
+      context "with a git protocol" do
+        let(:url) { "ssh://git@ghes.mycorp.com:org/abc" }
+        its(:provider) { is_expected.to eq("github") }
+        its(:repo) { is_expected.to eq("org/abc") }
+        its(:directory) { is_expected.to be_nil }
+      end
+
+      context "with a trailing .git" do
+        let(:url) { "https://ghes.mycorp.com/org/abc.git" }
+        its(:provider) { is_expected.to eq("github") }
+        its(:repo) { is_expected.to eq("org/abc") }
+        its(:directory) { is_expected.to be_nil }
+      end
+
+      context "with a trailing ." do
+        let(:url) { "https://ghes.mycorp.com/org/abc. " }
+        its(:provider) { is_expected.to eq("github") }
+        its(:repo) { is_expected.to eq("org/abc") }
+        its(:directory) { is_expected.to be_nil }
+      end
+
+      context "with a trailing space" do
+        let(:url) { "https://ghes.mycorp.com/org/abc " }
+        its(:provider) { is_expected.to eq("github") }
+        its(:repo) { is_expected.to eq("org/abc") }
+        its(:directory) { is_expected.to be_nil }
+      end
+
+      context "with a trailing /" do
+        let(:url) { "https://ghes.mycorp.com/org/abc/" }
+        its(:provider) { is_expected.to eq("github") }
+        its(:repo) { is_expected.to eq("org/abc") }
+        its(:directory) { is_expected.to be_nil }
+      end
+
+      context "with a trailing quote" do
+        let(:url) { "<a href=\"https://ghes.mycorp.com/org/abc\">" }
+        its(:provider) { is_expected.to eq("github") }
+        its(:repo) { is_expected.to eq("org/abc") }
+        its(:directory) { is_expected.to be_nil }
+      end
+
+      context "with no directory" do
+        let(:url) { "https://ghes.mycorp.com/org/abc/tree/master/readme.md" }
+        its(:provider) { is_expected.to eq("github") }
+        its(:repo) { is_expected.to eq("org/abc") }
+        its(:directory) { is_expected.to be_nil }
+      end
+
+      context "with a directory" do
+        let(:url) { "https://ghes.mycorp.com/org/abc/tree/master/dir/readme.md" }
+        its(:provider) { is_expected.to eq("github") }
+        its(:repo) { is_expected.to eq("org/abc") }
+        its(:directory) { is_expected.to eq("dir") }
+        its(:branch) { is_expected.to eq("master") }
+
+        context "with the filename specified by a #" do
+          let(:url) { "https://ghes.mycorp.com/org/abc/tree/master/dir#readme.md" }
+          its(:provider) { is_expected.to eq("github") }
+          its(:repo) { is_expected.to eq("org/abc") }
+          its(:directory) { is_expected.to eq("dir") }
+        end
+
+        context "when not looking at the master branch" do
+          let(:url) { "https://ghes.mycorp.com/org/abc/tree/custom/dir/readme.md" }
+          its(:provider) { is_expected.to eq("github") }
+          its(:repo) { is_expected.to eq("org/abc") }
+          its(:directory) { is_expected.to eq("dir") }
+          its(:branch) { is_expected.to eq("custom") }
+        end
+      end
+
+      context "when the source is not GHES" do
+        before do
+          stub_request(:get, "https://not-ghes.mycorp.com/status").to_return(
+            status: 200,
+            body: "This is not GHES!",
+            headers: { Server: "nginx" }
+          )
+        end
+        let(:url) { "https://not-ghes.mycorp.com/org/abc" }
+        it { is_expected.to be_nil }
+      end
+    end
+
     context "with a Bitbucket URL" do
       let(:url) do
         "https://bitbucket.org/org/abc/src/master/dir/readme.md?at=default"
diff --git a/composer/spec/dependabot/composer/metadata_finder_spec.rb b/composer/spec/dependabot/composer/metadata_finder_spec.rb
index 16c1a1bfcc..1488dfe62d 100644
--- a/composer/spec/dependabot/composer/metadata_finder_spec.rb
+++ b/composer/spec/dependabot/composer/metadata_finder_spec.rb
@@ -33,6 +33,14 @@
   end
   let(:dependency_name) { "monolog/monolog" }
 
+  before do
+    stub_request(:get, "https://example.com/status").to_return(
+      status: 200,
+      body: "Not GHES",
+      headers: {}
+    )
+  end
+
   describe "#source_url" do
     subject(:source_url) { finder.source_url }
     let(:packagist_url) { "https://packagist.org/p/monolog/monolog.json" }
diff --git a/git_submodules/spec/dependabot/git_submodules/metadata_finder_spec.rb b/git_submodules/spec/dependabot/git_submodules/metadata_finder_spec.rb
index e3662d3df3..4c088e1c29 100644
--- a/git_submodules/spec/dependabot/git_submodules/metadata_finder_spec.rb
+++ b/git_submodules/spec/dependabot/git_submodules/metadata_finder_spec.rb
@@ -42,6 +42,15 @@
     }]
   end
 
+  before do
+    # Not hosted on GitHub Enterprise Server
+    stub_request(:get, "https://example.com/status").to_return(
+      status: 200,
+      body: "Not GHES",
+      headers: {}
+    )
+  end
+
   describe "#source_url" do
     subject(:source_url) { finder.source_url }
 
diff --git a/go_modules/spec/dependabot/go_modules/metadata_finder_spec.rb b/go_modules/spec/dependabot/go_modules/metadata_finder_spec.rb
index a7274522b3..0ba444db42 100644
--- a/go_modules/spec/dependabot/go_modules/metadata_finder_spec.rb
+++ b/go_modules/spec/dependabot/go_modules/metadata_finder_spec.rb
@@ -38,6 +38,14 @@
   let(:dependency_name) { "github.com/satori/go.uuid" }
   let(:source) { nil }
 
+  before do
+    stub_request(:get, "https://example.com/status").to_return(
+      status: 200,
+      body: "Not GHES",
+      headers: {}
+    )
+  end
+
   describe "#source_url" do
     subject(:source_url) { finder.source_url }
 
diff --git a/gradle/spec/dependabot/gradle/metadata_finder_spec.rb b/gradle/spec/dependabot/gradle/metadata_finder_spec.rb
index 2333d9dc1b..a160734f56 100644
--- a/gradle/spec/dependabot/gradle/metadata_finder_spec.rb
+++ b/gradle/spec/dependabot/gradle/metadata_finder_spec.rb
@@ -49,6 +49,12 @@
 
     before do
       stub_request(:get, maven_url).to_return(status: 200, body: maven_response)
+
+      stub_request(:get, "https://example.com/status").to_return(
+        status: 200,
+        body: "Not GHES",
+        headers: {}
+      )
     end
 
     context "when there is a github link in the maven response" do
diff --git a/hex/spec/dependabot/hex/metadata_finder_spec.rb b/hex/spec/dependabot/hex/metadata_finder_spec.rb
index bbd4a91527..5a62219de4 100644
--- a/hex/spec/dependabot/hex/metadata_finder_spec.rb
+++ b/hex/spec/dependabot/hex/metadata_finder_spec.rb
@@ -41,6 +41,12 @@
 
     before do
       stub_request(:get, hex_url).to_return(status: 200, body: hex_response)
+
+      stub_request(:get, "https://example.com/status").to_return(
+        status: 200,
+        body: "Not GHES",
+        headers: {}
+      )
     end
 
     context "when there is a github link in the hex.pm response" do
diff --git a/maven/spec/dependabot/maven/metadata_finder_spec.rb b/maven/spec/dependabot/maven/metadata_finder_spec.rb
index 57c16c10a2..9ec92da48b 100644
--- a/maven/spec/dependabot/maven/metadata_finder_spec.rb
+++ b/maven/spec/dependabot/maven/metadata_finder_spec.rb
@@ -54,6 +54,12 @@
     before do
       stub_request(:get, maven_url).to_return(status: 200, body: maven_response)
       stub_request(:get, mockk_url).to_return(status: 200, body: mockk_response)
+
+      stub_request(:get, "https://example.com/status").to_return(
+        status: 200,
+        body: "Not GHES",
+        headers: {}
+      )
     end
 
     context "when the dependency name has a classifier" do
diff --git a/npm_and_yarn/spec/dependabot/npm_and_yarn/metadata_finder_spec.rb b/npm_and_yarn/spec/dependabot/npm_and_yarn/metadata_finder_spec.rb
index bfcc970d00..e336278a65 100644
--- a/npm_and_yarn/spec/dependabot/npm_and_yarn/metadata_finder_spec.rb
+++ b/npm_and_yarn/spec/dependabot/npm_and_yarn/metadata_finder_spec.rb
@@ -41,6 +41,12 @@
         to_return(status: 200, body: npm_latest_version_response)
       stub_request(:get, npm_url).
         to_return(status: 200, body: npm_all_versions_response)
+      stub_request(:get, "https://example.come/status").to_return(
+        status: 200,
+        body: "Not GHES",
+        headers: {}
+      )
+      stub_request(:get, "https://jshttp/status").to_return(status: 404)
     end
 
     context "for a git dependency" do
diff --git a/nuget/spec/dependabot/nuget/metadata_finder_spec.rb b/nuget/spec/dependabot/nuget/metadata_finder_spec.rb
index 6190614470..ec185f4d94 100644
--- a/nuget/spec/dependabot/nuget/metadata_finder_spec.rb
+++ b/nuget/spec/dependabot/nuget/metadata_finder_spec.rb
@@ -1,5 +1,6 @@
 # frozen_string_literal: true
 
+require "spec_helper"
 require "dependabot/dependency"
 require "dependabot/nuget/metadata_finder"
 require_common_spec "metadata_finders/shared_examples_for_metadata_finders"
@@ -51,6 +52,11 @@
 
     before do
       stub_request(:get, nuget_url).to_return(status: 200, body: nuget_response)
+      stub_request(:get, "https://example.com/status").to_return(
+        status: 200,
+        body: "Not GHES",
+        headers: {}
+      )
     end
 
     context "with a github link in the nuspec" do
diff --git a/python/spec/dependabot/python/metadata_finder_spec.rb b/python/spec/dependabot/python/metadata_finder_spec.rb
index 00d616ef7e..9f5f3565e8 100644
--- a/python/spec/dependabot/python/metadata_finder_spec.rb
+++ b/python/spec/dependabot/python/metadata_finder_spec.rb
@@ -36,6 +36,16 @@
   let(:dependency_name) { "luigi" }
   let(:version) { "1.0" }
 
+  before do
+    stub_request(:get, "https://example.com/status").to_return(
+      status: 200,
+      body: "Not GHES",
+      headers: {}
+    )
+    stub_request(:get, "https://initd.org/status").to_return(status: 404)
+    stub_request(:get, "https://pypi.org/status").to_return(status: 404)
+  end
+
   describe "#source_url" do
     subject(:source_url) { finder.source_url }
     let(:pypi_url) { "https://pypi.org/pypi/#{dependency_name}/json" }