From 3d6f4e5cce510fe69ac0c2104fc96d59fc46ae61 Mon Sep 17 00:00:00 2001 From: Giuseppe Villani Date: Tue, 28 Mar 2023 11:40:11 +0200 Subject: [PATCH] [NOID] fixes #3477: apoc.load.html does not always report href (#3478) (#3505) Co-authored-by: Andrea Santurbano --- extended/src/main/java/apoc/load/LoadHtml.java | 15 +++++++++++++-- .../src/test/java/apoc/load/LoadHtmlTest.java | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/extended/src/main/java/apoc/load/LoadHtml.java b/extended/src/main/java/apoc/load/LoadHtml.java index 6ed0cef637..6716b11b6c 100644 --- a/extended/src/main/java/apoc/load/LoadHtml.java +++ b/extended/src/main/java/apoc/load/LoadHtml.java @@ -5,6 +5,8 @@ import apoc.util.MissingDependencyException; import apoc.util.FileUtils; import java.nio.charset.UnsupportedCharsetException; + +import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Attribute; import org.jsoup.nodes.Document; @@ -76,7 +78,7 @@ private Stream readHtmlPage(String url, Map query, Ma } return Stream.of(new MapResult(output)); - } catch ( UnsupportedCharsetException e) { + } catch (UnsupportedCharsetException e) { throw new RuntimeException(UNSUPPORTED_CHARSET_ERR + config.getCharset()); } catch (IllegalArgumentException | ClassCastException e) { throw new RuntimeException(INVALID_CONFIG_ERR + config); @@ -139,7 +141,16 @@ private static Map getAttributes(Element element) { final String key = attribute.getKey(); // with href/src attribute we prepend baseUri path final boolean attributeHasLink = key.equals("href") || key.equals("src"); - attributes.put(key, attributeHasLink ? element.absUrl(key) : attribute.getValue()); + String attr = null; + if (attributeHasLink) { + attr = element.absUrl(key); + if (StringUtils.isBlank(attr)) { + attr = attribute.getValue(); + } + } else { + attr = attribute.getValue(); + } + attributes.put(key, attr); } } diff --git a/extended/src/test/java/apoc/load/LoadHtmlTest.java b/extended/src/test/java/apoc/load/LoadHtmlTest.java index 6ed8be5760..4c37852be4 100644 --- a/extended/src/test/java/apoc/load/LoadHtmlTest.java +++ b/extended/src/test/java/apoc/load/LoadHtmlTest.java @@ -2,6 +2,7 @@ import apoc.util.TestUtil; import org.apache.commons.lang.exception.ExceptionUtils; +import org.junit.Assert; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -365,6 +366,19 @@ public void testQueryWithFailsSilentlyWithLog() { }); } + @Test + public void testHref() { + Map query = Map.of("a", "a.image"); + + testResult(db, "CALL apoc.load.html($url, $query) YIELD value UNWIND value.a AS row RETURN row", + map("url", new File("src/test/resources/wikipedia.html").toURI().toString(), "query", query), + result -> { + Map row = (Map) result.next().get("row"); + Map attributes = (Map) row.get("attributes"); + Assert.assertEquals("/wiki/File:Aap_Kaa_Hak_titles.jpg", attributes.get("href")); + }); + } + @Test public void testQueryWithFailsSilentlyWithList() { Map query = map("a", "a", "invalid", "invalid", "h6", "h6");