diff --git a/mf2py/parser.py b/mf2py/parser.py
index 68d1050..f38d6c2 100644
--- a/mf2py/parser.py
+++ b/mf2py/parser.py
@@ -165,8 +165,8 @@ def handle_microformat(root_class_names, el, value_property=None,
properties = self.dict_class()
children = []
self._default_date = None
- # flag for processing implied name
- do_implied_name = True
+ # for processing implied properties: collects if property types (p, e, u, d(t)) or children (h) have been processed
+ parsed_types_aggregation = set()
if backcompat_mode:
el = backcompat.apply_rules(el, self.__html_parser__)
@@ -174,13 +174,13 @@ def handle_microformat(root_class_names, el, value_property=None,
# parse for properties and children
for child in get_children(el):
- child_props, child_children, child_stops_implied_name = parse_props(child)
+ child_props, child_children, child_parsed_types_aggregation = parse_props(child)
for key, new_value in child_props.items():
prop_value = properties.get(key, [])
prop_value.extend(new_value)
properties[key] = prop_value
children.extend(child_children)
- do_implied_name = do_implied_name and not child_stops_implied_name
+ parsed_types_aggregation.update(child_parsed_types_aggregation)
# complex h-* objects can take their "value" from the
# first explicit property ("name" for p-* or "url" for u-*)
@@ -190,8 +190,7 @@ def handle_microformat(root_class_names, el, value_property=None,
# if some properties not already found find in implied ways unless in backcompat mode
if not backcompat_mode:
# stop implied name if any p-*, e-*, h-* is already found
- if "name" not in properties and do_implied_name:
-
+ if "name" not in properties and parsed_types_aggregation.isdisjoint("peh"):
properties["name"] = [implied_properties.name(el, base_url=self.__url__)]
if "photo" not in properties:
@@ -199,7 +198,8 @@ def handle_microformat(root_class_names, el, value_property=None,
if x is not None:
properties["photo"] = [x]
- if "url" not in properties:
+ # stop implied url if any u-* or h-* is already found
+ if "url" not in properties and parsed_types_aggregation.isdisjoint("uh"):
x = implied_properties.url(el, base_url=self.__url__)
if x is not None:
properties["url"] = [x]
@@ -241,8 +241,8 @@ def parse_props(el):
"""
props = self.dict_class()
children = []
- # Does this element stop implied name?
- stops_implied_name = False
+ # for processing implied properties: collects if property types (p, e, u, d(t)) or children (h) have been processed
+ parsed_types_aggregation = set()
classes = el.get("class", [])
# Is this element a microformat2 root?
@@ -254,6 +254,9 @@ def parse_props(el):
root_class_names = backcompat.root(classes)
backcompat_mode = True
+ if root_class_names:
+ parsed_types_aggregation.add('h')
+
# Is this a property element (p-*, u-*, etc.) flag
# False is default
is_property_el = False
@@ -262,16 +265,14 @@ def parse_props(el):
p_value = None
for prop_name in mf2_classes.text(classes):
is_property_el = True
- stops_implied_name = True
+ parsed_types_aggregation.add('p')
prop_value = props.setdefault(prop_name, [])
# if value has not been parsed then parse it
if p_value is None:
p_value = text_type(parse_property.text(el, base_url=self.__url__))
-
if root_class_names:
- stops_implied_name = True
prop_value.append(handle_microformat(
root_class_names, el, value_property="name",
simple_value=p_value, backcompat_mode=backcompat_mode))
@@ -282,6 +283,7 @@ def parse_props(el):
u_value = None
for prop_name in mf2_classes.url(classes):
is_property_el = True
+ parsed_types_aggregation.add('u')
prop_value = props.setdefault(prop_name, [])
# if value has not been parsed then parse it
@@ -289,7 +291,6 @@ def parse_props(el):
u_value = parse_property.url(el, self.dict_class, self.__img_with_alt__, base_url=self.__url__)
if root_class_names:
- stops_implied_name = True
prop_value.append(handle_microformat(
root_class_names, el, value_property="url",
simple_value=u_value, backcompat_mode=backcompat_mode))
@@ -303,6 +304,7 @@ def parse_props(el):
dt_value = None
for prop_name in mf2_classes.datetime(classes):
is_property_el = True
+ parsed_types_aggregation.add('d')
prop_value = props.setdefault(prop_name, [])
# if value has not been parsed then parse it
@@ -326,7 +328,7 @@ def parse_props(el):
e_value = None
for prop_name in mf2_classes.embedded(classes):
is_property_el = True
- stops_implied_name = True
+ parsed_types_aggregation.add('e')
prop_value = props.setdefault(prop_name, [])
# if value has not been parsed then parse it
@@ -347,21 +349,18 @@ def parse_props(el):
# if this is not a property element, but it is a h-* microformat,
# add it to our list of children
if not is_property_el and root_class_names:
- stops_implied_name = True
children.append(handle_microformat(root_class_names, el, backcompat_mode=backcompat_mode))
-
# parse child tags, provided this isn't a microformat root-class
if not root_class_names:
for child in get_children(el):
- child_properties, child_microformats, child_stops_implied_name = parse_props(child)
+ child_properties, child_microformats, child_parsed_types_aggregation = parse_props(child)
for prop_name in child_properties:
v = props.get(prop_name, [])
v.extend(child_properties[prop_name])
props[prop_name] = v
children.extend(child_microformats)
- stops_implied_name = stops_implied_name or child_stops_implied_name
-
- return props, children, stops_implied_name
+ parsed_types_aggregation.update(child_parsed_types_aggregation)
+ return props, children, parsed_types_aggregation
def parse_rels(el):
"""Parse an element for rel microformats
diff --git a/test/examples/implied_properties/stop_implied_url.html b/test/examples/implied_properties/stop_implied_url.html
new file mode 100644
index 0000000..2250291
--- /dev/null
+++ b/test/examples/implied_properties/stop_implied_url.html
@@ -0,0 +1,59 @@
+
real world example
+
+
+
+
Er zijn van die momenten dat ik het liefste de hele dag ga zitten puzzelen hoe ik nu de webmentions op deze site in orde moet maken. Het loopt allemaal nog steeds niet lekker, maar ik weet niet goed welke kant ik op moet denken en werken voor een oplossing.
+
Waar loop ik nog tegen aan?
+
...
+
+
+
+
+synthetic test cases
+
+
+ u- on only link stops implied url
+
+
+
+
+
+
+
+
+
+ nested object in property stops u-url parsing
+
+
+
+
+ nested child object stops u-url parsing
+
+
+
+
+ deeper nested child object stops u-url parsing
+
+
+
+
+ p- property doesn't stop implied url parsing
+
+
+
+
+ e-property doesn't stop implied url parsing"
+
+
+
+
+ implied u-photo does not stop implied u-url parsing
+
+
+
+
+
+ implied u-photo does not stop implied u-url parsing
+
+
+
\ No newline at end of file
diff --git a/test/test_parser.py b/test/test_parser.py
index 26b538f..d7f1d4f 100644
--- a/test/test_parser.py
+++ b/test/test_parser.py
@@ -470,6 +470,23 @@ def test_implied_url():
for i in range(12, 23):
assert_false("url" in result["items"][i]["properties"])
+def test_stop_implied_url():
+ """testing that explicit properties case implied url-parsing to be aborted"""
+
+ result = parse_fixture("implied_properties/stop_implied_url.html")
+
+ assert_false("url" in result["items"][0]["properties"])
+ assert_false("url" in result["items"][1]["properties"])
+ assert_false("url" in result["items"][2]["properties"])
+ assert_false("url" in result["items"][3]["properties"])
+ assert_false("url" in result["items"][4]["properties"])
+ assert_false("url" in result["items"][5]["properties"])
+
+ assert_equal(result["items"][6]["properties"]["url"], ["http://example.com/"])
+ assert_equal(result["items"][7]["properties"]["url"], ["http://example.com/"])
+ assert_equal(result["items"][8]["properties"]["url"], ["http://example.com/"])
+ assert_equal(result["items"][9]["properties"]["url"], ["http://example.com/"])
+
def test_implied_nested_photo():
result = parse_fixture("implied_properties/implied_properties.html", url="http://bar.org")
assert_equal(result["items"][2]["properties"]["photo"][0],