Skip to content

Commit

Permalink
unit tests for GenericReportExcelServlet and BrokenLinksReport
Browse files Browse the repository at this point in the history
  • Loading branch information
Yegor Kozlov committed Oct 3, 2017
1 parent 6f5a2c5 commit c0aa075
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 37 deletions.
12 changes: 11 additions & 1 deletion bundle/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
com.day.cq.wcm.workflow.process;version="[6.0,8)", <!-- using a wider version range for forward compatibility -->
com.day.cq.mailer;version="[5.9,7)", <!-- using a wider version range for forward compatibility -->
com.adobe.cq.sightly;version="[2.5,4)",
javax.annotation;resolution:=optional, <!-- pulled by the tika-parsers dependency -->
*
</Import-Package>
<Embed-Dependency>guava</Embed-Dependency>
Expand Down Expand Up @@ -283,7 +284,16 @@
<version>3.16</version>
<type>jar</type>
</dependency>

<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.14</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.14</version>
</dependency>
<!-- Test Dependencies -->
<dependency>
<groupId>org.mockito</groupId>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,25 +1,46 @@
/*
* Copyright 2017 Adobe.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.adobe.acs.commons.mcp.impl.processes;

import com.adobe.acs.commons.fam.ActionManager;
import com.adobe.acs.commons.mcp.ProcessDefinition;
import com.adobe.acs.commons.mcp.ProcessInstance;
import com.adobe.acs.commons.mcp.form.CheckboxComponent;
import com.adobe.acs.commons.mcp.form.FormField;
import com.adobe.acs.commons.mcp.form.PathfieldComponent;
import com.adobe.acs.commons.mcp.model.GenericReport;
import com.adobe.acs.commons.util.visitors.TreeFilteringResourceVisitor;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.PersistenceException;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceUtil;
import org.apache.sling.api.resource.*;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.html.HtmlParser;
import org.apache.tika.sax.Link;
import org.apache.tika.sax.LinkContentHandler;

import javax.jcr.RepositoryException;
import java.io.ByteArrayInputStream;
import java.io.Serializable;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Broken Links Checker MCP task
*
* @author Yegor Kozlov
*/
public class BrokenLinksReport extends ProcessDefinition implements Serializable {
Expand All @@ -44,12 +65,29 @@ public class BrokenLinksReport extends ProcessDefinition implements Serializable
options = {"default=cq:allowedTemplates"})
private String excludeProperties;

@FormField(
name = "Deep check in html",
description = "If checked, links will be extracted from html field",
component = CheckboxComponent.class,
options = {"checked"}
)
private boolean deepCheck = false;

@FormField(name = "Fields containing html",
description = "Properties containing html to extract links",
required = false,
options = {"default=text"})
private String htmlFields;

transient private Set<String> excludeList;
transient private Set<String> deepCheckList;
transient private Pattern regex;

@Override
public void init() throws RepositoryException {
excludeList = Arrays.stream(excludeProperties.split(",")).map(String::trim).collect(Collectors.toSet());
deepCheckList = deepCheck ? Arrays.stream(htmlFields.split(",")).map(String::trim).collect(Collectors.toSet())
: new HashSet<>();
regex = Pattern.compile(propertyRegex);
}

Expand All @@ -64,7 +102,7 @@ enum REPORT {
@Override
public void buildProcess(ProcessInstance instance, ResourceResolver rr) throws LoginException, RepositoryException {
report.setName(instance.getName());
instance.defineAction("Collect Broken References", rr, this::collectBrokenLinks);
instance.defineAction("Collect Broken References", rr, this::buildReport);
instance.getInfo().setDescription(sourcePath);

}
Expand All @@ -77,43 +115,97 @@ public void storeReport(ProcessInstance instance, ResourceResolver rr) throws Re

}

public void collectBrokenLinks(ActionManager manager) {
public void buildReport(ActionManager manager) {
TreeFilteringResourceVisitor visitor = new TreeFilteringResourceVisitor();
visitor.setBreadthFirstMode();
visitor.setTraversalFilter(null);
visitor.setResourceVisitor((resource, depth) -> {
ResourceResolver resolver = resource.getResourceResolver();
resource.getValueMap().entrySet().stream()
.filter(entry -> !excludeList.contains(entry.getKey()))
.filter(entry -> entry.getValue() instanceof String || entry.getValue() instanceof String[])
.forEach(entry -> {

List<String> paths = collectPaths(entry.getValue())
.filter(path -> ResourceUtil.isNonExistingResource(resolver.resolve(path)))
.collect(Collectors.toList());
if (!paths.isEmpty()) {
String propertyPath = resource.getPath() + "/" + entry.getKey();
reportData.put(propertyPath, new EnumMap<>(REPORT.class));
reportData.get(propertyPath).put(REPORT.reference, paths.stream().collect(Collectors.joining(",")));
}

});
Map<String, List<String>> brokenRefs = collectBrokenReferences(resource, regex, excludeList, deepCheckList);
for(Map.Entry<String, List<String>> ref : brokenRefs.entrySet()){
String propertyPath = ref.getKey();
List<String> refs = ref.getValue();
reportData.put(propertyPath, new EnumMap<>(REPORT.class));
reportData.get(propertyPath).put(REPORT.reference, refs.stream().collect(Collectors.joining(",")));

}
});
manager.deferredWithResolver(rr -> visitor.accept(rr.getResource(sourcePath)));
}

Stream<String> collectPaths(Object p) {
/**
* Collect references from a JCR property.
* A property can be one of:
* <ol>
* <li>A string containing a reference, e.g, fileReference=/content/dam/image.png. </li>
* <li>An array of strings, e.g, fileReference=[/content/dam/image1.png, /content/dam/image2.png]</li>
* <li>An html fragment containing links , e.g,
* <pre>
* &lt;p&gt;
* &lt;a href="/content/site/page.html"&gt;hello&lt;/a&gt;
* &lt;img src="/content/dam/image1.png"&gt;hello&lt;/a&gt;
* &lt;/p&gt;
* </pre>
* </li>
* </ol>
*
* @param property an entry from a ValueMap
* @param htmlFields lst of properties containing html
* @return stream containing extracted references
*/
static Stream<String> collectPaths(Map.Entry<String, Object> property, Set<String> htmlFields) {
Object p = property.getValue();

Stream<String> stream;
if (p.getClass().isArray()) {
if (p.getClass() == String[].class) {
stream = Arrays.stream((String[]) p);
} else if (p.getClass() == String.class){
stream = Stream.of((String) p);
} else {
stream = Stream.of(p.toString());
stream = Stream.empty();
}
if (htmlFields.contains(property.getKey())) {
stream = stream.flatMap(val -> {
try {
// parse html and extract links via underlying tagsoup library
LinkContentHandler linkHandler = new LinkContentHandler();
HtmlParser parser = new HtmlParser();
parser.parse(new ByteArrayInputStream(val.getBytes("utf-8")), linkHandler, new Metadata(), new ParseContext());
return linkHandler.getLinks().stream().map(Link::getUri);
} catch (Exception e) {
return Stream.empty();
}
});
}
return stream.filter(val -> regex.matcher(val).matches());
return stream;
}

// access from unit tsts
Map<String, EnumMap<REPORT, Object>> getReportData(){
/**
* Collect broken references from properties of the given resource
*
* @param resource the resource to check
* @param regex regex to to detect properties containing references. Set from @FormField
* @param skipList properties to ignore. Set from @FormField
* @param htmlFields field containing html .
* @return broken references keyed by property. The value is a List because a property can contain multiple links,
* e.g. if it is multivalued or it is html containing multiple links.
*/
static Map<String, List<String>> collectBrokenReferences(Resource resource, Pattern regex, Set<String> skipList, Set<String> htmlFields) {

return resource.getValueMap().entrySet().stream()
.filter(entry -> !skipList.contains(entry.getKey()))
.collect(Collectors.toMap(
entry -> resource.getPath() + "/" + entry.getKey(),
entry -> {
List<String> brokenPaths = collectPaths(entry, htmlFields)
.filter(href -> regex.matcher(href).matches())
.filter(path -> ResourceUtil.isNonExistingResource(resource.getResourceResolver().resolve(path)))
.collect(Collectors.toList());
return brokenPaths;
})).entrySet().stream().filter(e -> !e.getValue().isEmpty())
.collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()));
}
// access from unit tests
Map<String, EnumMap<REPORT, Object>> getReportData() {
return reportData;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public void testReport() throws Exception {
slingContext.addModelsForClasses(GenericReport.class);

GenericReportExcelServlet servlet = new GenericReportExcelServlet();

servlet.doGet(request, response);

assertEquals("application/vnd.ms-excel", response.getContentType());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,12 @@
import com.adobe.acs.commons.mcp.ControlledProcessManager;
import com.adobe.acs.commons.mcp.impl.AbstractResourceImpl;
import com.adobe.acs.commons.mcp.impl.ProcessInstanceImpl;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.NonExistingResource;
import org.apache.sling.api.resource.ResourceMetadata;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.*;
import org.apache.sling.testing.mock.sling.ResourceResolverType;
import org.apache.sling.testing.mock.sling.junit.SlingContext;
import static com.adobe.acs.commons.mcp.impl.processes.BrokenLinksReport.REPORT;
import static com.adobe.acs.commons.mcp.impl.processes.BrokenLinksReport.collectBrokenReferences;
import static com.adobe.acs.commons.mcp.impl.processes.BrokenLinksReport.collectPaths;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
Expand All @@ -36,9 +35,9 @@
import javax.jcr.Session;
import javax.jcr.security.AccessControlManager;
import javax.jcr.security.Privilege;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Map;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static com.adobe.acs.commons.fam.impl.ActionManagerTest.*;
import static org.junit.Assert.assertEquals;
Expand All @@ -55,7 +54,7 @@ public class BrokenLinksTest {
@Rule
public final SlingContext slingContext = new SlingContext(ResourceResolverType.RESOURCERESOLVER_MOCK);

BrokenLinksReport tool;
private BrokenLinksReport tool;

@Before
public void setup() {
Expand Down Expand Up @@ -151,4 +150,63 @@ private ControlledProcessManager getControlledProcessManager() throws LoginExcep
when(cpm.getServiceResourceResolver()).thenReturn(getMockResolver());
return cpm;
}

@Test
public void testCollectPaths(){
Set<String> htmlFields = new HashSet<>();
htmlFields.add("text");

assertEquals(Arrays.asList("/ref1"), collectPaths(property("fileReference", "/ref1"), htmlFields).collect(Collectors.toList()));
assertEquals(Arrays.asList("/ref1", "/ref2"), collectPaths(property("fileReference", new String[]{"/ref1", "/ref2"}), htmlFields).collect(Collectors.toList()));
assertEquals(Arrays.asList("/ref1"), collectPaths(property("text", "<p><a href='/ref1'>hello</p>"), htmlFields).collect(Collectors.toList()));
}

private Map.Entry property(String key, Object value){
return new Map.Entry<String, Object>() {
@Override
public String getKey() {
return key;
}

@Override
public Object getValue() {
return value;
}

@Override
public Object setValue(Object value) {
return null;
}
};
}

@Test
public void testCollectBrokenReferences(){
Pattern ptrn = Pattern.compile("/content/.+");
Set<String> skipList = new HashSet<>(Arrays.asList("skip1", "skip2"));
Set<String> htmlFields = new HashSet<>(Arrays.asList("text"));
slingContext.build()
.resource("/test1",
"p1", "/content/ref1",
"p2", "/content/ref2",
"p3", new String[]{"/content/ref1"},
"p4", new String[]{"/content/ref1", "/content/ref2"},
"skip1", "/content/ref2")
.resource("/test2",
"text", "<p><a href='/content/ref2'>hello</a><img src='/content/ref3'>hello</img></p>",
"skip2", "<p><a href='/content/ref2'>hello</a><img src='/content/ref3'>hello</img></p>")
.resource("/content/ref1")
.commit();

Map<String, List<String>> refs1 = collectBrokenReferences(slingContext.resourceResolver().getResource("/test1"), ptrn, skipList, htmlFields);
assertEquals(2, refs1.size());
assertEquals(Arrays.asList("/content/ref2"), refs1.get("/test1/p2"));
assertEquals(Arrays.asList("/content/ref2"), refs1.get("/test1/p4"));

Map<String, List<String>> refs2 = collectBrokenReferences(slingContext.resourceResolver().getResource("/test2"), ptrn, skipList, htmlFields);
assertEquals(1, refs2.size());
assertEquals(Arrays.asList("/content/ref2", "/content/ref3"), refs2.get("/test2/text"));
}


}

0 comments on commit c0aa075

Please sign in to comment.