Skip to content

Commit

Permalink
fixes #3477: apoc.load.html does not always report href
Browse files Browse the repository at this point in the history
  • Loading branch information
conker84 committed Feb 28, 2023
1 parent 490c6b5 commit 4fa02eb
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 2 deletions.
15 changes: 13 additions & 2 deletions full/src/main/java/apoc/load/LoadHtml.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import apoc.util.MissingDependencyException;
import apoc.util.FileUtils;
import java.nio.charset.UnsupportedCharsetException;

import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Document;
Expand Down Expand Up @@ -74,7 +76,7 @@ private Stream<MapResult> readHtmlPage(String url, Map<String, String> query, Ma
}

return Stream.of(new MapResult(output));
} catch ( UnsupportedCharsetException e) {
} catch (UnsupportedCharsetException e) {
throw new RuntimeException("Unsupported charset: " + config.getCharset());
} catch (IllegalArgumentException | ClassCastException e) {
throw new RuntimeException("Invalid config: " + config);
Expand Down Expand Up @@ -137,7 +139,16 @@ private static Map<String, String> getAttributes(Element element) {
final String key = attribute.getKey();
// with href/src attribute we prepend baseUri path
final boolean attributeHasLink = key.equals("href") || key.equals("src");
attributes.put(key, attributeHasLink ? element.absUrl(key) : attribute.getValue());
String attr = null;
if (attributeHasLink) {
attr = element.absUrl(key);
if (StringUtils.isBlank(attr)) {
attr = attribute.getValue();
}
} else {
attr = attribute.getValue();
}
attributes.put(key, attr);
}
}

Expand Down
17 changes: 17 additions & 0 deletions full/src/test/java/apoc/load/LoadHtmlTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import apoc.ApocSettings;
import apoc.util.TestUtil;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
Expand Down Expand Up @@ -353,6 +354,22 @@ public void testQueryWithFailsSilentlyWithLog() {
});
}

@Test
public void testHref() {
String query = "CALL apoc.load.html('https://www.amazon.it/', { href: 'div#nav-xshop > a[class=\"nav-a \"]'}) YIELD value\n" +
"UNWIND value.href AS row\n" +
"WITH row\n" +
"WHERE row.text = 'Bestseller'\n" +
"RETURN row ";

testResult(db, query, Map.of(),
result -> {
Map<String, Object> row = (Map<String, Object>) result.next().get("row");
Map<String, Object> attributes = (Map<String, Object>) row.get("attributes");
Assert.assertEquals("/gp/bestsellers/?ref_=nav_cs_bestsellers", attributes.get("href"));
});
}

@Test
public void testQueryWithFailsSilentlyWithList() {
Map<String, Object> query = map("a", "a", "invalid", "invalid", "h6", "h6");
Expand Down

0 comments on commit 4fa02eb

Please sign in to comment.