diff --git a/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java b/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java index deedf5ddc..cd25d4799 100644 --- a/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java +++ b/src/main/java/com/adobe/epubcheck/messages/DefaultSeverities.java @@ -307,6 +307,7 @@ private void initialize() severities.put(MessageId.PKG_024, Severity.INFO); severities.put(MessageId.PKG_025, Severity.ERROR); severities.put(MessageId.PKG_026, Severity.ERROR); + severities.put(MessageId.PKG_027, Severity.FATAL); // Resources severities.put(MessageId.RSC_001, Severity.ERROR); diff --git a/src/main/java/com/adobe/epubcheck/messages/MessageId.java b/src/main/java/com/adobe/epubcheck/messages/MessageId.java index 340db9aaf..dd8486823 100644 --- a/src/main/java/com/adobe/epubcheck/messages/MessageId.java +++ b/src/main/java/com/adobe/epubcheck/messages/MessageId.java @@ -301,6 +301,7 @@ public enum MessageId implements Comparable PKG_024("PKG-024"), PKG_025("PKG-025"), PKG_026("PKG-026"), + PKG_027("PKG-027"), // Messages relating to resources RSC_001("RSC-001"), diff --git a/src/main/java/com/adobe/epubcheck/ocf/OCFChecker.java b/src/main/java/com/adobe/epubcheck/ocf/OCFChecker.java index d8cdde80e..04960f4b4 100755 --- a/src/main/java/com/adobe/epubcheck/ocf/OCFChecker.java +++ b/src/main/java/com/adobe/epubcheck/ocf/OCFChecker.java @@ -85,7 +85,11 @@ public void check() // Check the OCF Container file structure // -------------------------------------- // - checkContainerStructure(state); + if (!checkContainerStructure(state)) + { + return; + } + ; OCFContainer container = state.getContainer(); // @@ -270,83 +274,92 @@ private boolean checkContainerFile(OCFCheckerState state) return true; } - private void checkContainerStructure(OCFCheckerState state) + private boolean checkContainerStructure(OCFCheckerState state) { - // Get a container - Iterable resourcesProvider; try { // FIXME 2022 build resourcesProvider depending on MIME type - resourcesProvider = new OCFZipResources(context.url); - } catch (IOException e) - { - // FIXME 2022 see how to propagate fatal IOError - report.message(MessageId.PKG_008, EPUBLocation.of(context), e.getLocalizedMessage()); - return; - } - // Map to store the container resource files - Map resources = new HashMap<>(); - // List to store the container resource directories - List directories = new LinkedList<>(); - - // Loop through the entries - OCFFilenameChecker filenameChecker = new OCFFilenameChecker(state.context().build()); - for (OCFResource resource : resourcesProvider) - { - Preconditions.checkNotNull(resource.getPath()); - Preconditions.checkNotNull(resource.getProperties()); + // Get a container + Iterable resourcesProvider = new OCFZipResources(context.url); + // Map to store the container resource files + Map resources = new HashMap<>(); + // List to store the container resource directories + List directories = new LinkedList<>(); + + // Loop through the entries + OCFFilenameChecker filenameChecker = new OCFFilenameChecker(state.context().build()); + // FIXME catch IAE MALFORMED entries + for (OCFResource resource : resourcesProvider) + { + Preconditions.checkNotNull(resource.getPath()); + Preconditions.checkNotNull(resource.getProperties()); - // FIXME 2022 report symbolic links and continue + // FIXME 2022 report symbolic links and continue - // Check duplicate entries - if (resources.containsKey(resource.getPath().toLowerCase(Locale.ROOT))) - { - context.report.message(MessageId.OPF_060, EPUBLocation.of(context), resource.getPath()); - } - // Check duplicate entries after NFC normalization - else if (resources.containsKey( - Normalizer.normalize(resource.getPath().toLowerCase(Locale.ROOT), Normalizer.Form.NFC))) - { - context.report.message(MessageId.OPF_061, EPUBLocation.of(context), resource.getPath()); - } + // Check duplicate entries + if (resources.containsKey(resource.getPath().toLowerCase(Locale.ROOT))) + { + context.report.message(MessageId.OPF_060, EPUBLocation.of(context), resource.getPath()); + } + // Check duplicate entries after NFC normalization + else if (resources.containsKey( + Normalizer.normalize(resource.getPath().toLowerCase(Locale.ROOT), Normalizer.Form.NFC))) + { + context.report.message(MessageId.OPF_061, EPUBLocation.of(context), resource.getPath()); + } - // Store the resource in the data structure - if (resource.isDirectory()) - { - // the container resource is a directory, - // store it for later checking of empty directories - directories.add(resource.getPath()); - } - else - { - // Check file name requirements - filenameChecker.checkCompatiblyEscaped(resource.getPath()); - - // report entry metadata - reportFeatures(resource.getProperties()); - // the container resource is a file, - // add the resource to the container model - resources.put(resource.getPath().toLowerCase(Locale.ROOT), resource); - state.addResource(resource); + // Store the resource in the data structure + if (resource.isDirectory()) + { + // the container resource is a directory, + // store it for later checking of empty directories + directories.add(resource.getPath()); + } + else + { + // Check file name requirements + filenameChecker.checkCompatiblyEscaped(resource.getPath()); + + // report entry metadata + reportFeatures(resource.getProperties()); + // the container resource is a file, + // add the resource to the container model + resources.put(resource.getPath().toLowerCase(Locale.ROOT), resource); + state.addResource(resource); + } } - } - // Report empty directories - for (String directory : directories) - { - boolean hasContents = false; - for (OCFResource resource : resources.values()) + // Report empty directories + for (String directory : directories) { - if (resource.getPath().startsWith(directory)) + boolean hasContents = false; + for (OCFResource resource : resources.values()) + { + if (resource.getPath().startsWith(directory)) + { + hasContents = true; + break; + } + } + if (!hasContents) { - hasContents = true; - break; + report.message(MessageId.PKG_014, EPUBLocation.of(context), directory); } } - if (!hasContents) + return true; + } catch (Exception e) + { + switch (e.getMessage()) { - report.message(MessageId.PKG_014, EPUBLocation.of(context), directory); + case "invalid CEN header (bad entry name)": // reported by OpenJDK + case "MALFORMED": // reported by Oracle JDK 1.8 + report.message(MessageId.PKG_027, EPUBLocation.of(context), e.getLocalizedMessage()); + break; + default: + report.message(MessageId.PKG_008, EPUBLocation.of(context), e.getLocalizedMessage()); + break; } + return false; } } diff --git a/src/main/java/com/adobe/epubcheck/ocf/OCFZipResources.java b/src/main/java/com/adobe/epubcheck/ocf/OCFZipResources.java index f31f6c26b..8a719fb04 100644 --- a/src/main/java/com/adobe/epubcheck/ocf/OCFZipResources.java +++ b/src/main/java/com/adobe/epubcheck/ocf/OCFZipResources.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.io.InputStream; import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.util.Enumeration; import java.util.Iterator; @@ -32,7 +33,7 @@ public OCFZipResources(URL url) throws IOException { new IllegalArgumentException("Not a file URL: " + url); } - this.zip = new ZipFile(file); + this.zip = new ZipFile(file, StandardCharsets.UTF_8); } @Override diff --git a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties index 1f6abd970..a09ca0378 100644 --- a/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties +++ b/src/main/resources/com/adobe/epubcheck/messages/MessageBundle.properties @@ -316,7 +316,8 @@ PKG_023=Validating the EPUB against version 2.0, default validation profile will PKG_024=Uncommon EPUB file extension. PKG_024_SUG=For maximum compatibility, use ".epub". PKG_025=Publication resource must not be located in the META-INF directory -PKG_026=Obfuscated resource must be a Font Core Media Type (was declared as "%1$s" in "%2$s"). +PKG_026=Obfuscated resource must be a Font Core Media Type (was declared as "%1$s" in "%2$s"). +PKG_027=Could not extract EPUB ZIP content, probably due to file names not encoded in UTF-8. #Resources RSC_001=File "%1$s" could not be found. diff --git a/src/test/resources/epub3/04-ocf/files/ocf-filename-not-utf8-error.epub b/src/test/resources/epub3/04-ocf/files/ocf-filename-not-utf8-error.epub new file mode 100644 index 000000000..dab8ad697 Binary files /dev/null and b/src/test/resources/epub3/04-ocf/files/ocf-filename-not-utf8-error.epub differ diff --git a/src/test/resources/epub3/04-ocf/files/ocf-filename-utf8-valid.epub b/src/test/resources/epub3/04-ocf/files/ocf-filename-utf8-valid.epub new file mode 100644 index 000000000..3eb95b7c2 Binary files /dev/null and b/src/test/resources/epub3/04-ocf/files/ocf-filename-utf8-valid.epub differ diff --git a/src/test/resources/epub3/04-ocf/files/ocf-filepath-not-utf8-error.epub b/src/test/resources/epub3/04-ocf/files/ocf-filepath-not-utf8-error.epub new file mode 100755 index 000000000..fcf20c9fa Binary files /dev/null and b/src/test/resources/epub3/04-ocf/files/ocf-filepath-not-utf8-error.epub differ diff --git a/src/test/resources/epub3/04-ocf/files/ocf-filepath-utf8-valid.epub b/src/test/resources/epub3/04-ocf/files/ocf-filepath-utf8-valid.epub new file mode 100644 index 000000000..2117b3f14 Binary files /dev/null and b/src/test/resources/epub3/04-ocf/files/ocf-filepath-utf8-valid.epub differ diff --git a/src/test/resources/epub3/04-ocf/ocf.feature b/src/test/resources/epub3/04-ocf/ocf.feature index 36a757376..001c2adbe 100644 --- a/src/test/resources/epub3/04-ocf/ocf.feature +++ b/src/test/resources/epub3/04-ocf/ocf.feature @@ -181,6 +181,28 @@ Feature: EPUB 3 — Open Container Format Then error OPF-060 is reported And no other errors or warnings are reported + @spec @xref:sec-zip-container-zipreqs + Scenario: Verify file names with non-ASCII UTF-8-encoded character are allowed + When checking EPUB 'ocf-filename-utf8-valid.epub' + Then no errors or warnings are reported + + @spec @xref:sec-zip-container-zipreqs + Scenario: Report file names that are not encoded as UTF-8 + When checking EPUB 'ocf-filename-not-utf8-error.epub' + Then fatal error PKG-027 is reported + Then no errors or warnings are reported + + @spec @xref:sec-zip-container-zipreqs + Scenario: Verify path names with non-ASCII UTF-8-encoded character are allowed + When checking EPUB 'ocf-filepath-utf8-valid.epub' + Then no errors or warnings are reported + + @spec @xref:sec-zip-container-zipreqs + Scenario: Report file names that are not encoded as UTF-8 + When checking EPUB 'ocf-filepath-not-utf8-error.epub' + Then fatal error PKG-027 is reported + Then no errors or warnings are reported + ### 4.2.3 OCF ZIP container media type idenfication