Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow Robots.txt generation to enable different file by path #3415

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com)

## Unreleased ([details][unreleased changes details])

### Added

- #3415 - Allow Robots.txt generation to serve different file by requested resource path

## 6.6.4 - 2024-08-14

### Fixed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
Expand Down Expand Up @@ -79,6 +80,8 @@ public final class RobotsServlet extends SlingSafeMethodsServlet {

private String robotsContentsPropertyPath;

private final Map<String, String> robotsContentsPropertyPathMap = new HashMap<>();

private boolean printGroupingComments;

private int crawlDelay;
Expand All @@ -95,6 +98,16 @@ public final class RobotsServlet extends SlingSafeMethodsServlet {
protected void activate(RobotsServletConfig config) {
externalizerDomain = config.externalizer_domain();
robotsContentsPropertyPath = config.robots_content_property_path();
for (String mapping : config.robots_content_property_pathMappings()) {
if (StringUtils.isNotBlank(mapping)) {
String[] mappingParts = mapping.split("=");
if (mappingParts.length == 2) {
robotsContentsPropertyPathMap.put(mappingParts[0], mappingParts[1]);
} else {
log.warn("Invalid robots_content_property_path mapping: {}", mapping);
}
}
}
printGroupingComments = config.print_grouping_comments();
crawlDelay = config.crawl_delay();

Expand All @@ -111,7 +124,7 @@ protected void doGet(SlingHttpServletRequest request, SlingHttpServletResponse r
}

private void write(SlingHttpServletRequest request, SlingHttpServletResponse response) throws IOException {
if (StringUtils.isNotBlank(robotsContentsPropertyPath)) {
if (StringUtils.isNotBlank(robotsContentsPropertyPath) || !robotsContentsPropertyPathMap.isEmpty()) {
writeFromBinaryProperty(request, response);
} else {
writeFromOsgiConfig(request, response);
Expand Down Expand Up @@ -211,9 +224,18 @@ private void addRuleForPageHavingBooleanProperty(Page page, List<String> propNam
}

private void writeFromBinaryProperty(SlingHttpServletRequest request, SlingHttpServletResponse response) throws IOException {
String absoluteRobotsContentsPropertyPath = robotsContentsPropertyPath;
String absoluteRobotsContentsPropertyPath = robotsContentsPropertyPathMap.get(request.getResource().getPath());
if (StringUtils.isBlank(absoluteRobotsContentsPropertyPath)) {
if (StringUtils.isNotBlank(robotsContentsPropertyPath)) {
absoluteRobotsContentsPropertyPath = robotsContentsPropertyPath;
} else {
log.error("robots file requested but resource path {} not found in mappings", request.getResource().getPath());
response.sendError(HttpServletResponse.SC_NOT_FOUND);
return;
}
}
if (!absoluteRobotsContentsPropertyPath.startsWith("/")) {
absoluteRobotsContentsPropertyPath = request.getResource().getPath() + "/" + robotsContentsPropertyPath;
absoluteRobotsContentsPropertyPath = request.getResource().getPath() + "/" + absoluteRobotsContentsPropertyPath;
}

boolean written = false;
Expand Down Expand Up @@ -334,6 +356,9 @@ private String buildAllowedOrDisallowedDirective(boolean isAllowed, String allow
@AttributeDefinition(name = "Robots Content Property", description = "Path (either relative or absolute) to a String or Binary property containing the entire robots.txt contents. This could be a page property (e.g. robotsTxtContents) or the contents of a file within the DAM (e.g. /content/dam/my-site/seo/robots.txt/jcr:content/renditions/original/jcr:content/jcr:data). If this is specified, all other configurations are effectively ignored.")
String robots_content_property_path();

@AttributeDefinition(name = "Robots Content Property Map", description = "Overrides Robots Content Property with mappings of <resource path>=<Robots Content Property> such that only a resource matching the exact <resource path> will honor the Robots Content Property. If a valid mapping is not found for the current resource path, Robots Content Property will be used as a fallback if specified.")
String[] robots_content_property_pathMappings() default {};

@AttributeDefinition(name = "User Agent Directives", description = "A set of User-agent directives to add to the robots file. Each directive is optionally pre-fixed with a ruleGroupName. Syntax: [<ruleGroupName>:]<user agent name>")
String[] user_agent_directives() default {};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,40 @@ public void testWriteFromPageProperty() throws IOException, ServletException {
assertResponse("RobotsServlet_testWriteFromPageProperty.txt", response);
}

@Test
public void testWriteFromPagePropertyFromPathMappings() throws IOException, ServletException {
Map<String, Object> props = new HashMap<>();
props.put("sling.servlet.resourceTypes", "geometrixx/components/structure/page");
props.put("robots.content.property.pathMappings", new String[] {"/content/geometrixx/es/jcr:content=thisPropDoesntExist",
"/content/geometrixx/en/jcr:content=robotsContents"});
RobotsServlet robotsServlet = context.registerInjectActivateService(new RobotsServlet(), props);
robotsServlet.doGet(request, response);
assertEquals("servlet returned an error", 200, response.getStatus());
assertResponse("RobotsServlet_testWriteFromPageProperty.txt", response);
}

@Test
public void testWriteFromAsset() throws ServletException, IOException {
context.create().asset("/content/dam/geometrixx/robots.txt", getClass().getResourceAsStream("RobotsServlet_testWriteFromAsset.txt"), "text/plain");

Map<String, Object> props = new HashMap<>();
props.put("sling.servlet.resourceTypes", "geometrixx/components/structure/page");
props.put("robots.content.property.path", "/content/dam/geometrixx/robots.txt/jcr:content/renditions/original/jcr:content/jcr:data");
props.put("robots.content.property.pathMappings", new String[] {"/content/some/other/page/jcr:content=/some/other/path/jcr:data"});
RobotsServlet robotsServlet = context.registerInjectActivateService(new RobotsServlet(), props);
robotsServlet.doGet(request, response);
assertEquals("servlet returned an error", 200, response.getStatus());
assertResponse("RobotsServlet_testWriteFromAsset.txt", response);
}

@Test
public void testWriteFromAssetFromMappings() throws ServletException, IOException {
context.create().asset("/content/dam/geometrixx/robots.txt", getClass().getResourceAsStream("RobotsServlet_testWriteFromAsset.txt"), "text/plain");

Map<String, Object> props = new HashMap<>();
props.put("sling.servlet.resourceTypes", "geometrixx/components/structure/page");
props.put("robots.content.property.pathMappings", new String[] {"/content/some/other/page/jcr:content=/some/other/path/jcr:data",
"/content/geometrixx/en/jcr:content=/content/dam/geometrixx/robots.txt/jcr:content/renditions/original/jcr:content/jcr:data"});
RobotsServlet robotsServlet = context.registerInjectActivateService(new RobotsServlet(), props);
robotsServlet.doGet(request, response);
assertEquals("servlet returned an error", 200, response.getStatus());
Expand Down Expand Up @@ -245,6 +272,17 @@ public void testWriteFromNonExistentPropertyRelative() throws ServletException,
assertEquals("servlet did not return the expected error", 404, response.getStatus());
}

@Test
public void testWriteFromNonExistentPathMapping() throws ServletException, IOException {
Map<String, Object> props = new HashMap<>();
props.put("sling.servlet.resourceTypes", "geometrixx/components/structure/page");
props.put("robots.content.property.pathMappings", new String[] {"/content/some/other/page/jcr:content=/non/existent/path",
"", "/content/geometrixx/en/jcr:content"});
RobotsServlet robotsServlet = context.registerInjectActivateService(new RobotsServlet(), props);
robotsServlet.doGet(request, response);
assertEquals("servlet did not return the expected error", 404, response.getStatus());
}

private void assertResponse(String expectedResponseOutputResourceName, MockSlingHttpServletResponse response) throws IOException {
try (InputStream resourceAsStream = getClass().getResourceAsStream(expectedResponseOutputResourceName)) {
// both response and input stream contains OS dependent line endings (no need to normalize)
Expand Down
Loading