diff --git a/README.md b/README.md index e1187a7..5f7f58d 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,8 @@ A webapp ``.war`` - codebase behind [Charles Michaels](https://www.github.com/charlesmike) chatbot. It receives notifications read by this [ejb checker](https://github.com/opencharles/mention-notifications-ejb) and takes actions according to each of them. Any Github account can be used with this project; it's all dictated by the Github auth token used. -Say ``@charlesmike hello`` in a Github issue comment and see what happens. -Check out the [website](http://charles.amihaiemil.com) for more details on how to use this service. +Say ``@charlesmike hello`` in a Github issue comment and see what happens. +[This](http://www.amihaiemil.com/2017/05/23/meet-charles-michael.html) blog post and the [website](http://charles.amihaiemil.com) provide more details on how to use this service. ## Contribute diff --git a/src/main/java/com/amihaiemil/charles/filters/CorsFilter.java b/src/main/java/com/amihaiemil/charles/filters/CorsFilter.java index 0a7c00a..b38a682 100644 --- a/src/main/java/com/amihaiemil/charles/filters/CorsFilter.java +++ b/src/main/java/com/amihaiemil/charles/filters/CorsFilter.java @@ -49,7 +49,9 @@ public void init(FilterConfig filterConfig) throws ServletException {} @Override public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException { - ((HttpServletResponse) response).addHeader("Access-Control-Allow-Origin", "*"); + ((HttpServletResponse) response).addHeader( + "Access-Control-Allow-Origin", "*" + ); chain.doFilter(request, response); } diff --git a/src/main/java/com/amihaiemil/charles/github/Action.java b/src/main/java/com/amihaiemil/charles/github/Action.java index 4e0f33f..631a24c 100644 --- a/src/main/java/com/amihaiemil/charles/github/Action.java +++ b/src/main/java/com/amihaiemil/charles/github/Action.java @@ -133,7 +133,7 @@ private void setupLog4jForAction() throws IOException { if(logRoot == null) { logRoot = "."; } - String logFilePath = logRoot + "/Charles-Github-Ejb/ActionsLogs/" + this.id + ".log"; + String logFilePath = logRoot + "/charles-rest/ActionsLogs/" + this.id + ".log"; File logFile = new File(logFilePath); logFile.getParentFile().mkdirs(); diff --git a/src/main/java/com/amihaiemil/charles/github/Brain.java b/src/main/java/com/amihaiemil/charles/github/Brain.java index 880efa3..d25e943 100644 --- a/src/main/java/com/amihaiemil/charles/github/Brain.java +++ b/src/main/java/com/amihaiemil/charles/github/Brain.java @@ -111,6 +111,20 @@ public Steps understand(Command com) throws IOException { this.finalCommentStep(com, category.language(), "denied.badlink.comment", com.authorLogin()) ); break; + case "indexsitemap": + steps = new PageHostedOnGithubCheck( + com, this.logger, + this.withCommonChecks( + com, category.language(), + this.indexSitemapStep(com, category.language()) + ), + this.finalCommentStep( + com, category.language(), + "denied.badlink.comment", + com.authorLogin() + ) + ); + break; case "deleteindex": steps = new DeleteIndexCommandCheck( com, this.logger, @@ -202,6 +216,37 @@ public Step indexPageStep(Command com, Language lang) throws IOException { ) ); } + + /** + * Steps for indexsitemap step. + * @param com Command + * @param lang Language + * @return Step + * @throws IOException + */ + public Step indexSitemapStep(Command com, Language lang) throws IOException { + return new SendReply( + new TextReply( + com, + String.format( + lang.response("index.start.comment"), + com.authorLogin(), + this.logsLoc.address() + ) + ), this.logger, + new IndexSitemap( + com, this.logger, + new StarRepo( + com.issue().repo(), this.logger, + this.finalCommentStep( + com, lang, "index.finished.comment", + com.authorLogin(), + this.logsLoc.address() + ) + ) + ) + ); + } /** * Steps for indexsite action @@ -221,7 +266,7 @@ public Step indexSiteStep(Command com, Language lang) throws IOException { ) ), this.logger, new IndexSite( - com, logger, + com, this.logger, new StarRepo( com.issue().repo(), this.logger, this.finalCommentStep( diff --git a/src/main/java/com/amihaiemil/charles/github/IndexPage.java b/src/main/java/com/amihaiemil/charles/github/IndexPage.java index ed6fe3a..c483b39 100644 --- a/src/main/java/com/amihaiemil/charles/github/IndexPage.java +++ b/src/main/java/com/amihaiemil/charles/github/IndexPage.java @@ -76,11 +76,11 @@ public void perform() { logger.info("Crawling the page..."); WebDriver driver = this.phantomJsDriver(); driver.get(link); - WebPage snapshot = new SnapshotWebPage(new LiveWebPage(driver)); + WebPage snapshot = new SnapshotWebPage(new LiveWebPage(driver)); logger.info("Page crawled. Sending to aws..."); - new AmazonEsRepository(this.com.indexName()).export( - Arrays.asList(snapshot) - ); + new AmazonEsRepository(this.com.indexName()).export( + Arrays.asList(snapshot) + ); logger.info("Page successfully sent to aws!"); } catch ( final DataExportException | IOException | RuntimeException e @@ -99,7 +99,7 @@ public void perform() { * @return String link. */ private String getLink() { - String body = this.com.json().getString("body"); + String body = this.com.json().getString("body"); return body.substring(body.indexOf('(') + 1, body.indexOf(')')); } diff --git a/src/main/java/com/amihaiemil/charles/github/IndexSite.java b/src/main/java/com/amihaiemil/charles/github/IndexSite.java index 3fd9d6b..d7302d7 100644 --- a/src/main/java/com/amihaiemil/charles/github/IndexSite.java +++ b/src/main/java/com/amihaiemil/charles/github/IndexSite.java @@ -104,7 +104,7 @@ public WebCrawl graphCrawl() throws IOException { siteIndexUrl, this.phantomJsDriver(), new IgnoredPatterns(), new AmazonEsRepository(this.com.indexName()), 20 ); - return new RetriableCrawl(siteCrawl); + return new RetriableCrawl(siteCrawl, 5); } } diff --git a/src/main/java/com/amihaiemil/charles/github/IndexSitemap.java b/src/main/java/com/amihaiemil/charles/github/IndexSitemap.java new file mode 100644 index 0000000..53543f5 --- /dev/null +++ b/src/main/java/com/amihaiemil/charles/github/IndexSitemap.java @@ -0,0 +1,105 @@ +/** + * Copyright (c) 2016-2017, Mihai Emil Andronache + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * 1)Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * 2)Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * 3)Neither the name of charles-rest nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +package com.amihaiemil.charles.github; + +import java.io.IOException; +import org.slf4j.Logger; +import com.amihaiemil.charles.DataExportException; +import com.amihaiemil.charles.RetriableCrawl; +import com.amihaiemil.charles.SitemapXmlCrawl; +import com.amihaiemil.charles.WebCrawl; +import com.amihaiemil.charles.aws.AmazonEsRepository; +import com.amihaiemil.charles.sitemap.SitemapXmlOnline; + +/** + * Step to index a website represented by a sitemap.xml file. + * @author Mihai Andronache (amihaiemil@gmail.com) + * @version $Id$ + * @since 1.0.0 + * + */ +public class IndexSitemap extends IndexStep { + + /** + * Command. + */ + private Command com; + + /** + * Action's logger. + */ + private Logger logger; + + /** + * Constructor. + * @param com Command + * @param logger The action's logger + * @param next The next step to take + */ + public IndexSitemap(Command com, Logger logger, Step next) { + super(next); + this.com = com; + this.logger = logger; + } + + @Override + public void perform() { + String link = this.getLink(); + try { + logger.info("Indexing sitemap " + link + " ..."); + WebCrawl sitemap = new RetriableCrawl( + new SitemapXmlCrawl( + this.phantomJsDriver(), + new SitemapXmlOnline(link), + new AmazonEsRepository(this.com.indexName()), + 20 + ), + 5 + ); + sitemap.crawl(); + logger.info("Sitemap indexed successfully!"); + } catch ( + final DataExportException | IOException | RuntimeException e + ) { + logger.error("Exception while indexing the page " + link, e); + throw new IllegalStateException( + "Exception while indexing the page" + link, e + ); + } + this.next().perform(); + } + + /** + * Get the sitemap's link from the command's text which should + * be in markdown format, with a link like + * [this](http://link.com/here/the/ling) . + * @return String link. + */ + private String getLink() { + String body = this.com.json().getString("body"); + return body.substring(body.indexOf('(') + 1, body.indexOf(')')); + } +} diff --git a/src/main/java/com/amihaiemil/charles/rest/LogsResource.java b/src/main/java/com/amihaiemil/charles/rest/LogsResource.java index 81950a8..b3bd6e2 100644 --- a/src/main/java/com/amihaiemil/charles/rest/LogsResource.java +++ b/src/main/java/com/amihaiemil/charles/rest/LogsResource.java @@ -49,7 +49,7 @@ public class LogsResource { public Response getActionLogs(@PathParam("name") String name) { String logroot = System.getProperty("LOG_ROOT"); if(logroot != null) { - File log = new File(logroot + "/Charles-Github-Ejb/ActionsLogs/" + name); + File log = new File(logroot + "/charles-rest/ActionsLogs/" + name); if(log.exists()) { return Response.ok() .entity(log) diff --git a/src/main/resources/commands_en.properties b/src/main/resources/commands_en.properties index b0d550b..df8445d 100644 --- a/src/main/resources/commands_en.properties +++ b/src/main/resources/commands_en.properties @@ -5,6 +5,8 @@ deleteindex.command=delete indexsite.command=index^site +indexsitemap.command=index^sitemap + indexpage.command=index^page hello.command=hello diff --git a/src/main/resources/responses_en.properties b/src/main/resources/responses_en.properties index bbe6012..04961be 100644 --- a/src/main/resources/responses_en.properties +++ b/src/main/resources/responses_en.properties @@ -14,10 +14,11 @@ denied.fork.comment=@%s the repository must not be a fork, you must own this rep denied.name.comment=@%s the repository's name must match the format ``owner.github.io`` or it must have a project website on branch ``gh-pages`` -denied.badlink.comment=@%s the given page does not seem to belong to the website hosted in this repo!\n\n\ +denied.badlink.comment=@%s the given page or sitemap does not seem to belong to the website hosted in this repo!\n\n\ Make sure that the link starts with ``http://`` or ``https://`` followed by\ - ``yourusername.github.io/...`` Do not use the CNAME or any other link, even if it redirects to\ - a page from this repo. \n\n \ If this condition is met and it still doesn't work please open an issue [here]\ + ``yourusername.github.io/...``. Do not use the CNAME or any other domain, even if it redirects to\ + a page from this repo. Also, make sure that the link is specified in Markdown format. \n\n \ + If this condition is met and it still doesn't work please open an issue [here]\ (https://github.com/opencharles/charles-rest/issues/new). denied.deleteindex.comment=@%s the repository's name in a delete command has to be between single back apostrophes.\n\