From a4fed675b6840ed3c96a4f1314e5e83c029c70aa Mon Sep 17 00:00:00 2001 From: Romain Deltour Date: Mon, 28 Nov 2022 10:50:35 +0100 Subject: [PATCH] feat: check that container-relative URLs have no query EPUB relative URLs must be valid-relative-ocf-URL-with-fragment strings, which must not have query components. This commit adds a new check that reports `RSC-033` (error) when a query is found in a container-relative URL. It also slightly improves the URL checks in single-file validation mode, to ensure a URL is not always considered remote in that mode. --- .../epubcheck/messages/DefaultSeverities.java | 1 + .../adobe/epubcheck/messages/MessageId.java | 1 + .../epubcheck/opf/ValidationContext.java | 2 +- .../epubcheck/core/references/URLChecker.java | 24 ++++++++++---- .../messages/MessageBundle.properties | 3 +- .../04-ocf/files/url-in-xhtml-valid.xhtml | 15 +++++++++ .../files/url-query-in-package-item-error.opf | 16 ++++++++++ .../files/url-query-in-package-link-error.opf | 17 ++++++++++ .../files/url-query-in-xhtml-a-error.xhtml | 11 +++++++ src/test/resources/epub3/04-ocf/ocf.feature | 31 +++++++++++++++++++ 10 files changed, 113 insertions(+), 8 deletions(-) create mode 100644 src/test/resources/epub3/04-ocf/files/url-in-xhtml-valid.xhtml create mode 100644 src/test/resources/epub3/04-ocf/files/url-query-in-package-item-error.opf create mode 100644 src/test/resources/epub3/04-ocf/files/url-query-in-package-link-error.opf create mode 100644 src/test/resources/epub3/04-ocf/files/url-query-in-xhtml-a-error.xhtml diff --git a/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java b/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java index 7c2292ecb..0419bbe1a 100644 --- a/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java +++ b/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java @@ -349,6 +349,7 @@ private void initialize() severities.put(MessageId.RSC_030, Severity.ERROR); severities.put(MessageId.RSC_031, Severity.WARNING); severities.put(MessageId.RSC_032, Severity.ERROR); + severities.put(MessageId.RSC_033, Severity.ERROR); // Scripting severities.put(MessageId.SCP_001, Severity.SUPPRESSED); // checking scripts is out of scope diff --git a/src/main/java/com/adobe/epubcheck/messages/MessageId.java b/src/main/java/com/adobe/epubcheck/messages/MessageId.java index 6e427ee59..8c35915d1 100644 --- a/src/main/java/com/adobe/epubcheck/messages/MessageId.java +++ b/src/main/java/com/adobe/epubcheck/messages/MessageId.java @@ -343,6 +343,7 @@ public enum MessageId implements Comparable RSC_030("RSC-030"), RSC_031("RSC-031"), RSC_032("RSC-032"), + RSC_033("RSC-033"), // Messages relating to scripting SCP_001("SCP-001"), diff --git a/src/main/java/com/adobe/epubcheck/opf/ValidationContext.java b/src/main/java/com/adobe/epubcheck/opf/ValidationContext.java index 7e65da8e3..1ed5a52db 100644 --- a/src/main/java/com/adobe/epubcheck/opf/ValidationContext.java +++ b/src/main/java/com/adobe/epubcheck/opf/ValidationContext.java @@ -205,7 +205,7 @@ public String relativize(URL url) } else { - return url.toString(); + return this.url.relativize(url); } } diff --git a/src/main/java/org/w3c/epubcheck/core/references/URLChecker.java b/src/main/java/org/w3c/epubcheck/core/references/URLChecker.java index c1527b24f..080e6f03d 100644 --- a/src/main/java/org/w3c/epubcheck/core/references/URLChecker.java +++ b/src/main/java/org/w3c/epubcheck/core/references/URLChecker.java @@ -29,6 +29,7 @@ public class URLChecker private URL baseURLTestB; private boolean isRemoteBase; private final Report report; + private final ValidationContext context; public URLChecker(ValidationContext context) { @@ -37,7 +38,8 @@ public URLChecker(ValidationContext context) public URLChecker(ValidationContext context, URL baseURL) { - this.report = Preconditions.checkNotNull(context).report; + this.context = Preconditions.checkNotNull(context); + this.report = context.report; this.baseURL = Preconditions.checkNotNull(baseURL); this.isRemoteBase = false; try @@ -117,12 +119,22 @@ private URL resolveURL(String string, boolean isBase, EPUBLocation location) isRemoteBase = true; return url; } - // if relative URL "leaks" outside the container, report and continue - else if (!isBase && !testA.toString().startsWith(TEST_BASE_A_FULL) - || !testB.toString().startsWith(TEST_BASE_B_FULL)) + else { - // FIXME !!! this is broken, base s/b taken into account - report.message(MessageId.RSC_026, location, string); + // if URL has a query string, report and continue + if (url.query() != null) + { + report.message(MessageId.RSC_033, location, string); + url = url.withQuery(null); + } + // if relative URL "leaks" outside the container, report and continue + // this check only make sense when the container is present + if (context.container.isPresent() && !isBase + && (!testA.toString().startsWith(TEST_BASE_A_FULL) + || !testB.toString().startsWith(TEST_BASE_B_FULL))) + { + report.message(MessageId.RSC_026, location, string); + } } return url; } catch (GalimatiasParseException e) diff --git a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties index 59a597b54..ec9336ba8 100644 --- a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties +++ b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties @@ -260,7 +260,7 @@ RSC_015=A fragment identifier is required for svg use tag references. RSC_016=Fatal Error while parsing file: %1$s RSC_017=Warning while parsing file: %1$s RSC_019=EPUBs with Multiple Renditions should contain a META-INF/metadata.xml file. -RSC_020="%1$s" is not a valid URI (%2$s) +RSC_020="%1$s" is not a valid URL (%2$s) RSC_021=A Search Key Map Document must point to Content Documents ("%1$s" was not found in the spine). RSC_022=Cannot check image details (requires Java version 7 or higher). RSC_024=Informative parsing warning: %1$s @@ -272,3 +272,4 @@ RSC_029=Data URL is not allowed in this context. RSC_030=File URLs are not allowed in EPUB, but found "%1$s". RSC_031=Remote resource references should use HTTPS, but found "%1$s". RSC_032=Fallback must be provided for foreign resources, but found none for resource "%1$s" of type "%2$s". +RSC_033=Relative URL strings must not have a query component, but found one in "%1$s". diff --git a/src/test/resources/epub3/04-ocf/files/url-in-xhtml-valid.xhtml b/src/test/resources/epub3/04-ocf/files/url-in-xhtml-valid.xhtml new file mode 100644 index 000000000..ad80e0e36 --- /dev/null +++ b/src/test/resources/epub3/04-ocf/files/url-in-xhtml-valid.xhtml @@ -0,0 +1,15 @@ + + + + + Minimal EPUB + + +

Loomings

+ asbolute URL + relative URL + relative URL, one directory up + fragment URL + fragment-only URL + + diff --git a/src/test/resources/epub3/04-ocf/files/url-query-in-package-item-error.opf b/src/test/resources/epub3/04-ocf/files/url-query-in-package-item-error.opf new file mode 100644 index 000000000..f78912333 --- /dev/null +++ b/src/test/resources/epub3/04-ocf/files/url-query-in-package-item-error.opf @@ -0,0 +1,16 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + diff --git a/src/test/resources/epub3/04-ocf/files/url-query-in-package-link-error.opf b/src/test/resources/epub3/04-ocf/files/url-query-in-package-link-error.opf new file mode 100644 index 000000000..67410605d --- /dev/null +++ b/src/test/resources/epub3/04-ocf/files/url-query-in-package-link-error.opf @@ -0,0 +1,17 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/04-ocf/files/url-query-in-xhtml-a-error.xhtml b/src/test/resources/epub3/04-ocf/files/url-query-in-xhtml-a-error.xhtml new file mode 100644 index 000000000..37c6db09d --- /dev/null +++ b/src/test/resources/epub3/04-ocf/files/url-query-in-xhtml-a-error.xhtml @@ -0,0 +1,11 @@ + + + + + Minimal EPUB + + +

Loomings

+ Call me Ishmael. + + diff --git a/src/test/resources/epub3/04-ocf/ocf.feature b/src/test/resources/epub3/04-ocf/ocf.feature index b386bb618..ede13d575 100644 --- a/src/test/resources/epub3/04-ocf/ocf.feature +++ b/src/test/resources/epub3/04-ocf/ocf.feature @@ -105,6 +105,15 @@ Feature: EPUB 3 — Open Container Format ### 4.1.5 URLs in the OCF abstract container + #### Valid container URLs + + @spec @xref:sec-container-iri + Scenario: Allow valid container URLs in XHTML + When checking EPUB 'url-in-xhtml-valid.xhtml' + And no errors or warnings are reported + + #### Invalid container URLs + @spec @xref:sec-container-iri Scenario: Report leaking URLs in the package document When checking EPUB 'ocf-url-leaking-in-opf-error' @@ -117,6 +126,28 @@ Feature: EPUB 3 — Open Container Format Then error RSC-026 is reported And no other errors or warnings are reported + #### URL query checks: + + @spec @xref:sec-container-iri + Scenario: Report a URL query string found in a manifest item + When checking EPUB 'url-query-in-package-item-error.opf' + Then error RSC-033 is reported + And no other errors or warnings are reported + + @spec @xref:sec-container-iri + Scenario: Report a URL query string found in a package link + When checking EPUB 'url-query-in-package-link-error.opf' + Then error RSC-033 is reported + And no other errors or warnings are reported + + @spec @xref:sec-container-iri + Scenario: Report a URL query string found in a manifest item + When checking EPUB 'url-query-in-xhtml-a-error.xhtml' + Then error RSC-033 is reported + And no other errors or warnings are reported + + #### resource existence checks: + @spec @xref:sec-container-iri Scenario: Report a reference from an XHTML `cite` attribute not declared in the manifest When checking EPUB 'url-xhtml-cite-missing-resource-error'