diff --git a/Gemfile b/Gemfile index 7bfb39856e..7825dcd02b 100644 --- a/Gemfile +++ b/Gemfile @@ -39,3 +39,4 @@ gem "webrick", "~> 1.7" # Link checker gem "typhoeus" gem "ruby-link-checker" +gem "ruby-enum" diff --git a/_plugins/link-checker.rb b/_plugins/link-checker.rb index 5dfd53c9f1..340e13f51e 100644 --- a/_plugins/link-checker.rb +++ b/_plugins/link-checker.rb @@ -12,12 +12,13 @@ require 'pathname' require 'typhoeus' require 'ruby-link-checker' +require 'ruby-enum' ## # This singleton checks links during build to warn or fail upon finding dead links. # # `JEKYLL_LINK_CHECKER`, set on the environment, will cause verification of external links -# Valid values: internal, forced, all. +# Valid values: internal, all. # Usage: `JEKYLL_LINK_CHECKER=internal bundle exec jekyll build --trace` # # `JEKYLL_FATAL_LINK_CHECKER`, set on the environment, is the same as `JEKYLL_LINK_CHECKER` @@ -25,6 +26,14 @@ # Usage: `JEKYLL_FATAL_LINK_CHECKER=internal bundle exec jekyll build --trace` module Jekyll::LinkChecker + class CheckTypes + include Ruby::Enum + + define :INTERNAL, 'internal' + define :EXTERNAL, 'external' + define :ALL, 'all' + end + ## # The collection that will get stores as the output @@ -44,7 +53,6 @@ module Jekyll::LinkChecker # Pattern to check for external URLs @external_matcher = %r{^https?://}.freeze - @forced_external_matcher = %r{^https?://.*(?=opensearch\.org/)}.freeze ## # List of domains to ignore @@ -55,7 +63,7 @@ module Jekyll::LinkChecker 'playground.opensearch.org', # inifite redirect, https://github.com/opensearch-project/dashboards-anywhere/issues/172 'crates.io', # 404s on bots 'www.cloudflare.com', # 403s on bots - 'example.issue.link', # a fake example link from the template + 'example.issue.link' # a fake example link from the template ] ## @@ -68,11 +76,9 @@ module Jekyll::LinkChecker ## # Build flags driven by environment variables - @@LINK_CHECKER_STATES = %w[internal forced all] - @check_links # Enables the link checker - @check_forced_external # Enables checking internal links marked as external e.g. /docs + @check_internal_links # Enables checking internal links @check_external_links # Enables checking external links - @should_build_fatally # indicates the need to fail the build for dead links + @fail_on_error # Indicates the need to fail the build for dead links ## # Defines the priority of the plugin @@ -81,6 +87,22 @@ def self.priority 10 end + def self.check_links? + check_external_links? || check_internal_links? + end + + def self.check_external_links? + !!@check_external_links + end + + def self.check_internal_links? + !!@check_internal_links + end + + def self.fail_on_error? + !!@fail_on_error + end + ## # Initializes the singleton by recording the site def self.init(site) @@ -89,15 +111,15 @@ def self.init(site) @failures = [] begin - @should_build_fatally = true if ENV.key?('JEKYLL_FATAL_LINK_CHECKER') - check_flag = @should_build_fatally ? ENV['JEKYLL_FATAL_LINK_CHECKER'] : ENV['JEKYLL_LINK_CHECKER'] + @fail_on_error = true if ENV.key?('JEKYLL_FATAL_LINK_CHECKER') + check_flag = fail_on_error? ? ENV['JEKYLL_FATAL_LINK_CHECKER'] : ENV['JEKYLL_LINK_CHECKER'] unless check_flag return Jekyll.logger.info 'LinkChecker:', 'disabled. Enable with JEKYLL_LINK_CHECKER on the environment' end - unless @@LINK_CHECKER_STATES.include?(check_flag) - Jekyll.logger.info "LinkChecker: [Notice] Could not initialize, Valid values for #{@should_build_fatally ? 'JEKYLL_FATAL_LINK_CHECKER' : 'JEKYLL_LINK_CHECKER'} are #{@@LINK_CHECKER_STATES}" + unless CheckTypes.values.include?(check_flag) + Jekyll.logger.info "LinkChecker: [Notice] Could not initialize, Valid values for #{fail_on_error? ? 'JEKYLL_FATAL_LINK_CHECKER' : 'JEKYLL_LINK_CHECKER'} are #{CheckTypes.values}" return end @@ -112,15 +134,8 @@ def self.init(site) @failures << "#{result}, linked to in #{result.options[:location]}" end - @check_links = true if @@LINK_CHECKER_STATES.include?(check_flag) - @check_forced_external = true if @@LINK_CHECKER_STATES[1..3].include?(check_flag) - @check_external_links = true if @@LINK_CHECKER_STATES[2..3].include?(check_flag) - - msg = { - 'internal' => 'internal links', - 'forced' => 'internal and forced external links', - 'all' => 'all links' - } + @check_external_links = [CheckTypes::EXTERNAL, CheckTypes::ALL].include?(check_flag) + @check_internal_links = [CheckTypes::INTERNAL, CheckTypes::ALL].include?(check_flag) # Process a Page as soon as its content is ready Jekyll::Hooks.register :pages, :post_convert, priority: priority do |page| @@ -137,10 +152,10 @@ def self.init(site) verify(site) end - if @check_links - Jekyll.logger.info "LinkChecker: [Notice] Initialized successfully and will check #{msg[check_flag]}" + if check_links? + Jekyll.logger.info "LinkChecker: [Notice] Initialized successfully and will check #{check_flag} links" end - Jekyll.logger.info 'LinkChecker: [Notice] The build will fail if a dead link is found' if @should_build_fatally + Jekyll.logger.info 'LinkChecker: [Notice] The build will fail if a dead link is found' if fail_on_error? rescue StandardError => e Jekyll.logger.error 'LinkChecker: [Error] Failed to initialize Link Checker' raise @@ -152,7 +167,7 @@ def self.init(site) # It also checks for anchors to parts of the same page/doc def self.process(page) - return unless @check_links + return unless check_links? return if @excluded_paths.match(page.path) hrefs = page.content.scan(@href_matcher) @@ -177,11 +192,11 @@ def self.process(page) # Saves the collection as a JSON file def self.verify(_site) - return unless @check_links + return unless check_links? @base_url_matcher = %r{^#{@site.config["url"]}#{@site.baseurl}(/.*)$}.freeze - @urls.sort_by { |url, pages| rand }.each do |url, pages| + @urls.sort_by { |_url, _pages| rand }.each do |url, pages| location = "./#{pages.to_a.join(', ./')}" @failures << "#{url}, linked to in #{location}" unless check(url, location) end @@ -193,7 +208,7 @@ def self.verify(_site) end if !@failures.empty? - if @should_build_fatally + if fail_on_error? Jekyll.logger.error "\nLinkChecker: [Error] #{msg}\n".red raise msg else @@ -213,19 +228,15 @@ def self.check(url, location) url = @site.config['url'] + url if url.start_with? '/docs/' - if @forced_external_matcher =~ url - return true unless @check_forced_external - - return check_external(url, location) - end - if @external_matcher =~ url - return true unless @check_external_links + return true unless check_external_links? - return check_external(url, location) - end + check_external(url, location) + else + return true unless check_internal_links? - check_internal(url, location) + check_internal(url, location) + end end ##