Skip to content
This repository has been archived by the owner on Dec 5, 2020. It is now read-only.

Commit

Permalink
Index sitemap action
Browse files Browse the repository at this point in the history
  • Loading branch information
amihaiemil committed May 24, 2017
1 parent c2bc40b commit 9039331
Show file tree
Hide file tree
Showing 10 changed files with 170 additions and 15 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
A webapp ``.war`` - codebase behind [Charles Michaels](https://www.github.com/charlesmike) chatbot. It receives notifications read by this [ejb checker](https://github.com/opencharles/mention-notifications-ejb) and takes
actions according to each of them. Any Github account can be used with this project; it's all dictated by the Github auth token used.

Say ``@charlesmike hello`` in a Github issue comment and see what happens.
Check out the [website](http://charles.amihaiemil.com) for more details on how to use this service.
Say ``@charlesmike hello`` in a Github issue comment and see what happens.
[This](http://www.amihaiemil.com/2017/05/23/meet-charles-michael.html) blog post and the [website](http://charles.amihaiemil.com) provide more details on how to use this service.

## Contribute

Expand Down
4 changes: 3 additions & 1 deletion src/main/java/com/amihaiemil/charles/filters/CorsFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,9 @@ public void init(FilterConfig filterConfig) throws ServletException {}

@Override
public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException {
((HttpServletResponse) response).addHeader("Access-Control-Allow-Origin", "*");
((HttpServletResponse) response).addHeader(
"Access-Control-Allow-Origin", "*"
);
chain.doFilter(request, response);
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/amihaiemil/charles/github/Action.java
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ private void setupLog4jForAction() throws IOException {
if(logRoot == null) {
logRoot = ".";
}
String logFilePath = logRoot + "/Charles-Github-Ejb/ActionsLogs/" + this.id + ".log";
String logFilePath = logRoot + "/charles-rest/ActionsLogs/" + this.id + ".log";

File logFile = new File(logFilePath);
logFile.getParentFile().mkdirs();
Expand Down
47 changes: 46 additions & 1 deletion src/main/java/com/amihaiemil/charles/github/Brain.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,20 @@ public Steps understand(Command com) throws IOException {
this.finalCommentStep(com, category.language(), "denied.badlink.comment", com.authorLogin())
);
break;
case "indexsitemap":
steps = new PageHostedOnGithubCheck(
com, this.logger,
this.withCommonChecks(
com, category.language(),
this.indexSitemapStep(com, category.language())
),
this.finalCommentStep(
com, category.language(),
"denied.badlink.comment",
com.authorLogin()
)
);
break;
case "deleteindex":
steps = new DeleteIndexCommandCheck(
com, this.logger,
Expand Down Expand Up @@ -202,6 +216,37 @@ public Step indexPageStep(Command com, Language lang) throws IOException {
)
);
}

/**
* Steps for indexsitemap step.
* @param com Command
* @param lang Language
* @return Step
* @throws IOException
*/
public Step indexSitemapStep(Command com, Language lang) throws IOException {
return new SendReply(
new TextReply(
com,
String.format(
lang.response("index.start.comment"),
com.authorLogin(),
this.logsLoc.address()
)
), this.logger,
new IndexSitemap(
com, this.logger,
new StarRepo(
com.issue().repo(), this.logger,
this.finalCommentStep(
com, lang, "index.finished.comment",
com.authorLogin(),
this.logsLoc.address()
)
)
)
);
}

/**
* Steps for indexsite action
Expand All @@ -221,7 +266,7 @@ public Step indexSiteStep(Command com, Language lang) throws IOException {
)
), this.logger,
new IndexSite(
com, logger,
com, this.logger,
new StarRepo(
com.issue().repo(), this.logger,
this.finalCommentStep(
Expand Down
10 changes: 5 additions & 5 deletions src/main/java/com/amihaiemil/charles/github/IndexPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ public void perform() {
logger.info("Crawling the page...");
WebDriver driver = this.phantomJsDriver();
driver.get(link);
WebPage snapshot = new SnapshotWebPage(new LiveWebPage(driver));
WebPage snapshot = new SnapshotWebPage(new LiveWebPage(driver));
logger.info("Page crawled. Sending to aws...");
new AmazonEsRepository(this.com.indexName()).export(
Arrays.asList(snapshot)
);
new AmazonEsRepository(this.com.indexName()).export(
Arrays.asList(snapshot)
);
logger.info("Page successfully sent to aws!");
} catch (
final DataExportException | IOException | RuntimeException e
Expand All @@ -99,7 +99,7 @@ public void perform() {
* @return String link.
*/
private String getLink() {
String body = this.com.json().getString("body");
String body = this.com.json().getString("body");
return body.substring(body.indexOf('(') + 1, body.indexOf(')'));
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/amihaiemil/charles/github/IndexSite.java
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public WebCrawl graphCrawl() throws IOException {
siteIndexUrl, this.phantomJsDriver(), new IgnoredPatterns(),
new AmazonEsRepository(this.com.indexName()), 20
);
return new RetriableCrawl(siteCrawl);
return new RetriableCrawl(siteCrawl, 5);
}

}
105 changes: 105 additions & 0 deletions src/main/java/com/amihaiemil/charles/github/IndexSitemap.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/**
* Copyright (c) 2016-2017, Mihai Emil Andronache
* All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1)Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2)Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3)Neither the name of charles-rest nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package com.amihaiemil.charles.github;

import java.io.IOException;
import org.slf4j.Logger;
import com.amihaiemil.charles.DataExportException;
import com.amihaiemil.charles.RetriableCrawl;
import com.amihaiemil.charles.SitemapXmlCrawl;
import com.amihaiemil.charles.WebCrawl;
import com.amihaiemil.charles.aws.AmazonEsRepository;
import com.amihaiemil.charles.sitemap.SitemapXmlOnline;

/**
* Step to index a website represented by a sitemap.xml file.
* @author Mihai Andronache ([email protected])
* @version $Id$
* @since 1.0.0
*
*/
public class IndexSitemap extends IndexStep {

/**
* Command.
*/
private Command com;

/**
* Action's logger.
*/
private Logger logger;

/**
* Constructor.
* @param com Command
* @param logger The action's logger
* @param next The next step to take
*/
public IndexSitemap(Command com, Logger logger, Step next) {
super(next);
this.com = com;
this.logger = logger;
}

@Override
public void perform() {
String link = this.getLink();
try {
logger.info("Indexing sitemap " + link + " ...");
WebCrawl sitemap = new RetriableCrawl(
new SitemapXmlCrawl(
this.phantomJsDriver(),
new SitemapXmlOnline(link),
new AmazonEsRepository(this.com.indexName()),
20
),
5
);
sitemap.crawl();
logger.info("Sitemap indexed successfully!");
} catch (
final DataExportException | IOException | RuntimeException e
) {
logger.error("Exception while indexing the page " + link, e);
throw new IllegalStateException(
"Exception while indexing the page" + link, e
);
}
this.next().perform();
}

/**
* Get the sitemap's link from the command's text which should
* be in markdown format, with a link like
* [this](http://link.com/here/the/ling) .
* @return String link.
*/
private String getLink() {
String body = this.com.json().getString("body");
return body.substring(body.indexOf('(') + 1, body.indexOf(')'));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public class LogsResource {
public Response getActionLogs(@PathParam("name") String name) {
String logroot = System.getProperty("LOG_ROOT");
if(logroot != null) {
File log = new File(logroot + "/Charles-Github-Ejb/ActionsLogs/" + name);
File log = new File(logroot + "/charles-rest/ActionsLogs/" + name);
if(log.exists()) {
return Response.ok()
.entity(log)
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/commands_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ deleteindex.command=delete

indexsite.command=index^site

indexsitemap.command=index^sitemap

indexpage.command=index^page

hello.command=hello
7 changes: 4 additions & 3 deletions src/main/resources/responses_en.properties
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ denied.fork.comment=@%s the repository must not be a fork, you must own this rep
denied.name.comment=@%s the repository's name must match the format ``owner.github.io`` or it must have a project website on branch ``gh-pages``
denied.badlink.comment=@%s the given page does not seem to belong to the website hosted in this repo!\n\n\
denied.badlink.comment=@%s the given page or sitemap does not seem to belong to the website hosted in this repo!\n\n\
Make sure that the link starts with ``http://`` or ``https://`` followed by\
``yourusername.github.io/...`` Do not use the CNAME or any other link, even if it redirects to\
a page from this repo. \n\n \ If this condition is met and it still doesn't work please open an issue [here]\
``yourusername.github.io/...``. Do not use the CNAME or any other domain, even if it redirects to\
a page from this repo. Also, make sure that the link is specified in Markdown format. \n\n \
If this condition is met and it still doesn't work please open an issue [here]\
(https://github.com/opencharles/charles-rest/issues/new).

denied.deleteindex.comment=@%s the repository's name in a delete command has to be between single back apostrophes.\n\
Expand Down

0 comments on commit 9039331

Please sign in to comment.