Skip to content

Commit

Permalink
Implement issue aim42#276 exclude certain URLs from checking, also im…
Browse files Browse the repository at this point in the history
…plement hosts to exclude
  • Loading branch information
elebeida committed Jan 14, 2025
1 parent dcb9be0 commit 1c53adf
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 0 deletions.
2 changes: 2 additions & 0 deletions docToolchainConfig.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ exportEA.with {
htmlSanityCheck.with {
sourceDir = 'microsite/output'
resultsFolder = 'html-sanity-check'
//urlsToExclude = ['http://example.com/excluded', 'http://example.com/excluded2']
//hostsToExclude = ['example2.com', 'example3', 'example4']
}
//end::htmlSanityCheckConfig[]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ public class Configuration {
@Getter(AccessLevel.NONE)
@Builder.Default
Boolean ignoreIPAddresses = false;
Set<String> urlsToExclude;
Set<String> hostsToExclude;

/*
* Explanation for configuring http status codes:
* The standard http status codes are defined in class @link NetUtil and can
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class BrokenHttpLinksChecker extends Checker {
// need that to calculate "nrOfOccurrences"
// the pure http/https-hrefs a set, duplicates are removed here
private Set<String> hrefSet;
private Set<String> urlsToExclude;
private Set<String> hostsToExclude;


BrokenHttpLinksChecker(Configuration pConfig) {
Expand All @@ -45,6 +47,8 @@ class BrokenHttpLinksChecker extends Checker {
errorCodes = getMyConfig().getHttpErrorCodes();
warningCodes = getMyConfig().getHttpWarningCodes();
successCodes = getMyConfig().getHttpSuccessCodes();
urlsToExclude = getMyConfig().getUrlsToExclude();
hostsToExclude = getMyConfig().getHostsToExclude();
}

@Override
Expand Down Expand Up @@ -101,6 +105,25 @@ private void checkAllHttpLinks() {


protected void doubleCheckSingleHttpLink(String href) {
if (urlsToExclude != null && urlsToExclude.contains(href)) {
// Skip checking this URL
return;
}

// Check if the host of the URL is in the hostsToExclude list
try {
URL url = new URL(href);
String host = url.getHost();
if (hostsToExclude != null && hostsToExclude.contains(host)) {
// Skip checking this URL
return;
}
} catch (MalformedURLException e) {
// Handle the exception if the URL is malformed
Finding malformedURLFinding = new Finding("malformed URL exception with href=" + href);
getCheckingResults().addFinding(malformedURLFinding);
return;
}
// bookkeeping:
getCheckingResults().incNrOfChecks();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ class HtmlSanityCheckTask extends DefaultTask {
@Optional
@Input
Set<Integer> httpSuccessCodes
@Optional
@Input
Set<String> urlsToExclude
@Optional
@Input
Set<String> hostsToExclude

@Input
List<Class<? extends Checker>> checkerClasses = AllCheckers.CHECKER_CLASSES
Expand Down Expand Up @@ -187,6 +193,8 @@ See ${checkingResultsDir} for a detailed report."""
.ignoreIPAddresses(ignoreIPAddresses)

.checksToExecute(checkerClasses)
.urlsToExclude(urlsToExclude)
.hostsToExclude(hostsToExclude)
.build()

// in case we have configured specific interpretations of http status codes
Expand All @@ -212,6 +220,8 @@ See ${checkingResultsDir} for a detailed report."""
logger.info "Results dir : $checkingResultsDir"
logger.info "JUnit dir : $junitResultsDir"
logger.info "Fail on errors : $failOnErrors"
logger.info "Urls to Exclude : $urlsToExclude"
logger.info "Hosts to Exclude: $hostsToExclude"
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,22 @@ import spock.lang.Specification
class HtmlSanityCheckBaseSpec extends Specification {
final static VALID_HTML = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"><html><head></head><body></body><html>"""
final static INVALID_HTML = """<body><span id="id"/><span id="id"/></body> """
final static VALID_HTML_WITH_EXCLUDED_URL = """
<html>
<body>
<a href="http://example.com/excluded">Excluded URL</a>
<a href="http://example.com/included">Included URL</a>
</body>
</html>
"""
final static VALID_HTML_WITH_EXCLUDED_HOST = """
<html>
<body>
<a href="http://excluded.com/page">Excluded Host</a>
<a href="http://included.com/page">Included Host</a>
</body>
</html>
"""

@Rule
TemporaryFolder testProjectDir = new TemporaryFolder()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,44 @@ class HtmlSanityCheckTaskFunctionalSpec extends HtmlSanityCheckBaseSpec {
gradleVersion << GRADLE_VERSIONS
}

@Unroll
def "can exclude specific URLs with urlsToExclude and Gradle version #gradleVersion"() {
given:
htmlFile << VALID_HTML_WITH_EXCLUDED_URL
createBuildFile("""
urlsToExclude = ['http://example.com/excluded']
""")

when:
def result = runnerForHtmlSanityCheckTask(gradleVersion as String).build()

then:
result.task(":htmlSanityCheck").outcome == SUCCESS
!result.output.contains("http://example.com/excluded")

where:
gradleVersion << GRADLE_VERSIONS
}

@Unroll
def "can exclude specific hosts with hostToExclude and Gradle version #gradleVersion"() {
given:
htmlFile << VALID_HTML_WITH_EXCLUDED_HOST
createBuildFile("""
hostsToExclude = ['excluded.com']
""")

when:
def result = runnerForHtmlSanityCheckTask(gradleVersion as String).build()

then:
result.task(":htmlSanityCheck").outcome == SUCCESS
!result.output.contains("http://excluded.com")

where:
gradleVersion << GRADLE_VERSIONS
}

private GradleRunner runnerForHtmlSanityCheckTask(String gradleVersion) {
GradleRunner.create()
.withGradleVersion(gradleVersion)
Expand Down
3 changes: 3 additions & 0 deletions self-check/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ htmlSanityCheck {

failOnErrors = true

urlsToExclude = [ "https://www.aim42.org/"]
hostsToExclude = [ "www.aim42.org" ]

logger.quiet "HSC version: ${htmlSanityCheckVersion}"
logger.quiet "HSC sourceDir: ${sourceDir.absolutePath}"
logger.quiet "HSC checkingResultsDir: ${checkingResultsDir.absolutePath}"
Expand Down

0 comments on commit 1c53adf

Please sign in to comment.