Skip to content

Commit

Permalink
[NOID] fixes #3477: apoc.load.html does not always report href (#3478) (
Browse files Browse the repository at this point in the history
#3505)

Co-authored-by: Andrea Santurbano <[email protected]>
  • Loading branch information
vga91 and conker84 committed Apr 28, 2023
1 parent a03dc62 commit 3d6f4e5
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
15 changes: 13 additions & 2 deletions extended/src/main/java/apoc/load/LoadHtml.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import apoc.util.MissingDependencyException;
import apoc.util.FileUtils;
import java.nio.charset.UnsupportedCharsetException;

import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Document;
Expand Down Expand Up @@ -76,7 +78,7 @@ private Stream<MapResult> readHtmlPage(String url, Map<String, String> query, Ma
}

return Stream.of(new MapResult(output));
} catch ( UnsupportedCharsetException e) {
} catch (UnsupportedCharsetException e) {
throw new RuntimeException(UNSUPPORTED_CHARSET_ERR + config.getCharset());
} catch (IllegalArgumentException | ClassCastException e) {
throw new RuntimeException(INVALID_CONFIG_ERR + config);
Expand Down Expand Up @@ -139,7 +141,16 @@ private static Map<String, String> getAttributes(Element element) {
final String key = attribute.getKey();
// with href/src attribute we prepend baseUri path
final boolean attributeHasLink = key.equals("href") || key.equals("src");
attributes.put(key, attributeHasLink ? element.absUrl(key) : attribute.getValue());
String attr = null;
if (attributeHasLink) {
attr = element.absUrl(key);
if (StringUtils.isBlank(attr)) {
attr = attribute.getValue();
}
} else {
attr = attribute.getValue();
}
attributes.put(key, attr);
}
}

Expand Down
14 changes: 14 additions & 0 deletions extended/src/test/java/apoc/load/LoadHtmlTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import apoc.util.TestUtil;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
Expand Down Expand Up @@ -365,6 +366,19 @@ public void testQueryWithFailsSilentlyWithLog() {
});
}

@Test
public void testHref() {
Map<String, Object> query = Map.of("a", "a.image");

testResult(db, "CALL apoc.load.html($url, $query) YIELD value UNWIND value.a AS row RETURN row",
map("url", new File("src/test/resources/wikipedia.html").toURI().toString(), "query", query),
result -> {
Map<String, Object> row = (Map<String, Object>) result.next().get("row");
Map<String, Object> attributes = (Map<String, Object>) row.get("attributes");
Assert.assertEquals("/wiki/File:Aap_Kaa_Hak_titles.jpg", attributes.get("href"));
});
}

@Test
public void testQueryWithFailsSilentlyWithList() {
Map<String, Object> query = map("a", "a", "invalid", "invalid", "h6", "h6");
Expand Down

0 comments on commit 3d6f4e5

Please sign in to comment.