Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removed forced options, cleaned up link checker args. #4726

Merged
merged 1 commit into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@ gem "webrick", "~> 1.7"
# Link checker
gem "typhoeus"
gem "ruby-link-checker"
gem "ruby-enum"
85 changes: 48 additions & 37 deletions _plugins/link-checker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,28 @@
require 'pathname'
require 'typhoeus'
require 'ruby-link-checker'
require 'ruby-enum'

##
# This singleton checks links during build to warn or fail upon finding dead links.
#
# `JEKYLL_LINK_CHECKER`, set on the environment, will cause verification of external links
# Valid values: internal, forced, all.
# Valid values: internal, all.
# Usage: `JEKYLL_LINK_CHECKER=internal bundle exec jekyll build --trace`
#
# `JEKYLL_FATAL_LINK_CHECKER`, set on the environment, is the same as `JEKYLL_LINK_CHECKER`
# except that it fails the build if there are broken links. it takes the same valid values
# Usage: `JEKYLL_FATAL_LINK_CHECKER=internal bundle exec jekyll build --trace`

module Jekyll::LinkChecker
class CheckTypes
include Ruby::Enum

define :INTERNAL, 'internal'
define :EXTERNAL, 'external'
define :ALL, 'all'
end

##
# The collection that will get stores as the output

Expand All @@ -44,7 +53,6 @@ module Jekyll::LinkChecker
# Pattern to check for external URLs

@external_matcher = %r{^https?://}.freeze
@forced_external_matcher = %r{^https?://.*(?=opensearch\.org/)}.freeze

##
# List of domains to ignore
Expand All @@ -55,7 +63,7 @@ module Jekyll::LinkChecker
'playground.opensearch.org', # inifite redirect, https://github.com/opensearch-project/dashboards-anywhere/issues/172
'crates.io', # 404s on bots
'www.cloudflare.com', # 403s on bots
'example.issue.link', # a fake example link from the template
'example.issue.link' # a fake example link from the template
]

##
Expand All @@ -68,11 +76,9 @@ module Jekyll::LinkChecker

##
# Build flags driven by environment variables
@@LINK_CHECKER_STATES = %w[internal forced all]
@check_links # Enables the link checker
@check_forced_external # Enables checking internal links marked as external e.g. /docs
@check_internal_links # Enables checking internal links
@check_external_links # Enables checking external links
@should_build_fatally # indicates the need to fail the build for dead links
@fail_on_error # Indicates the need to fail the build for dead links

##
# Defines the priority of the plugin
Expand All @@ -81,6 +87,22 @@ def self.priority
10
end

def self.check_links?
check_external_links? || check_internal_links?
end

def self.check_external_links?
!!@check_external_links
end

def self.check_internal_links?
!!@check_internal_links
end

def self.fail_on_error?
!!@fail_on_error
end

##
# Initializes the singleton by recording the site
def self.init(site)
Expand All @@ -89,15 +111,15 @@ def self.init(site)
@failures = []

begin
@should_build_fatally = true if ENV.key?('JEKYLL_FATAL_LINK_CHECKER')
check_flag = @should_build_fatally ? ENV['JEKYLL_FATAL_LINK_CHECKER'] : ENV['JEKYLL_LINK_CHECKER']
@fail_on_error = true if ENV.key?('JEKYLL_FATAL_LINK_CHECKER')
check_flag = fail_on_error? ? ENV['JEKYLL_FATAL_LINK_CHECKER'] : ENV['JEKYLL_LINK_CHECKER']

unless check_flag
return Jekyll.logger.info 'LinkChecker:', 'disabled. Enable with JEKYLL_LINK_CHECKER on the environment'
end

unless @@LINK_CHECKER_STATES.include?(check_flag)
Jekyll.logger.info "LinkChecker: [Notice] Could not initialize, Valid values for #{@should_build_fatally ? 'JEKYLL_FATAL_LINK_CHECKER' : 'JEKYLL_LINK_CHECKER'} are #{@@LINK_CHECKER_STATES}"
unless CheckTypes.values.include?(check_flag)
Jekyll.logger.info "LinkChecker: [Notice] Could not initialize, Valid values for #{fail_on_error? ? 'JEKYLL_FATAL_LINK_CHECKER' : 'JEKYLL_LINK_CHECKER'} are #{CheckTypes.values}"
return
end

Expand All @@ -112,15 +134,8 @@ def self.init(site)
@failures << "#{result}, linked to in #{result.options[:location]}"
end

@check_links = true if @@LINK_CHECKER_STATES.include?(check_flag)
@check_forced_external = true if @@LINK_CHECKER_STATES[1..3].include?(check_flag)
@check_external_links = true if @@LINK_CHECKER_STATES[2..3].include?(check_flag)

msg = {
'internal' => 'internal links',
'forced' => 'internal and forced external links',
'all' => 'all links'
}
@check_external_links = [CheckTypes::EXTERNAL, CheckTypes::ALL].include?(check_flag)
@check_internal_links = [CheckTypes::INTERNAL, CheckTypes::ALL].include?(check_flag)

# Process a Page as soon as its content is ready
Jekyll::Hooks.register :pages, :post_convert, priority: priority do |page|
Expand All @@ -137,10 +152,10 @@ def self.init(site)
verify(site)
end

if @check_links
Jekyll.logger.info "LinkChecker: [Notice] Initialized successfully and will check #{msg[check_flag]}"
if check_links?
Jekyll.logger.info "LinkChecker: [Notice] Initialized successfully and will check #{check_flag} links"
end
Jekyll.logger.info 'LinkChecker: [Notice] The build will fail if a dead link is found' if @should_build_fatally
Jekyll.logger.info 'LinkChecker: [Notice] The build will fail if a dead link is found' if fail_on_error?
rescue StandardError => e
Jekyll.logger.error 'LinkChecker: [Error] Failed to initialize Link Checker'
raise
Expand All @@ -152,7 +167,7 @@ def self.init(site)
# It also checks for anchors to parts of the same page/doc

def self.process(page)
return unless @check_links
return unless check_links?
return if @excluded_paths.match(page.path)

hrefs = page.content.scan(@href_matcher)
Expand All @@ -177,11 +192,11 @@ def self.process(page)
# Saves the collection as a JSON file

def self.verify(_site)
return unless @check_links
return unless check_links?

@base_url_matcher = %r{^#{@site.config["url"]}#{@site.baseurl}(/.*)$}.freeze

@urls.sort_by { |url, pages| rand }.each do |url, pages|
@urls.sort_by { |_url, _pages| rand }.each do |url, pages|
location = "./#{pages.to_a.join(', ./')}"
@failures << "#{url}, linked to in #{location}" unless check(url, location)
end
Expand All @@ -193,7 +208,7 @@ def self.verify(_site)
end

if [email protected]?
if @should_build_fatally
if fail_on_error?
Jekyll.logger.error "\nLinkChecker: [Error] #{msg}\n".red
raise msg
else
Expand All @@ -213,19 +228,15 @@ def self.check(url, location)

url = @site.config['url'] + url if url.start_with? '/docs/'

if @forced_external_matcher =~ url
return true unless @check_forced_external

return check_external(url, location)
end

if @external_matcher =~ url
return true unless @check_external_links
return true unless check_external_links?

return check_external(url, location)
end
check_external(url, location)
else
return true unless check_internal_links?

check_internal(url, location)
check_internal(url, location)
end
end

##
Expand Down