-
Notifications
You must be signed in to change notification settings - Fork 28
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
reduced implied urls (fixes #117), added tests #119
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,22 +165,22 @@ def handle_microformat(root_class_names, el, value_property=None, | |
properties = self.dict_class() | ||
children = [] | ||
self._default_date = None | ||
# flag for processing implied name | ||
do_implied_name = True | ||
# for processing implied properties: collects if property types (p, e, u, d(t)) or children (h) have been processed | ||
parsed_types_aggregation = set() | ||
|
||
if backcompat_mode: | ||
el = backcompat.apply_rules(el, self.__html_parser__) | ||
root_class_names = mf2_classes.root(el.get('class',[])) | ||
|
||
# parse for properties and children | ||
for child in get_children(el): | ||
child_props, child_children, child_stops_implied_name = parse_props(child) | ||
child_props, child_children, child_parsed_types_aggregation = parse_props(child) | ||
for key, new_value in child_props.items(): | ||
prop_value = properties.get(key, []) | ||
prop_value.extend(new_value) | ||
properties[key] = prop_value | ||
children.extend(child_children) | ||
do_implied_name = do_implied_name and not child_stops_implied_name | ||
parsed_types_aggregation.update(child_parsed_types_aggregation) | ||
|
||
# complex h-* objects can take their "value" from the | ||
# first explicit property ("name" for p-* or "url" for u-*) | ||
|
@@ -190,16 +190,16 @@ def handle_microformat(root_class_names, el, value_property=None, | |
# if some properties not already found find in implied ways unless in backcompat mode | ||
if not backcompat_mode: | ||
# stop implied name if any p-*, e-*, h-* is already found | ||
if "name" not in properties and do_implied_name: | ||
|
||
if "name" not in properties and parsed_types_aggregation.isdisjoint("peh"): | ||
properties["name"] = [implied_properties.name(el, base_url=self.__url__)] | ||
|
||
if "photo" not in properties: | ||
x = implied_properties.photo(el, self.dict_class, self.__img_with_alt__, base_url=self.__url__) | ||
if x is not None: | ||
properties["photo"] = [x] | ||
|
||
if "url" not in properties: | ||
# stop implied url if any u-* or h-* is already found | ||
if "url" not in properties and parsed_types_aggregation.isdisjoint("uh"): | ||
x = implied_properties.url(el, base_url=self.__url__) | ||
if x is not None: | ||
properties["url"] = [x] | ||
|
@@ -241,8 +241,8 @@ def parse_props(el): | |
""" | ||
props = self.dict_class() | ||
children = [] | ||
# Does this element stop implied name? | ||
stops_implied_name = False | ||
# for processing implied properties: collects if property types (p, e, u, d(t)) or children (h) have been processed | ||
parsed_types_aggregation = set() | ||
|
||
classes = el.get("class", []) | ||
# Is this element a microformat2 root? | ||
|
@@ -254,6 +254,9 @@ def parse_props(el): | |
root_class_names = backcompat.root(classes) | ||
backcompat_mode = True | ||
|
||
if root_class_names: | ||
parsed_types_aggregation.add('h') | ||
|
||
# Is this a property element (p-*, u-*, etc.) flag | ||
# False is default | ||
is_property_el = False | ||
|
@@ -262,16 +265,14 @@ def parse_props(el): | |
p_value = None | ||
for prop_name in mf2_classes.text(classes): | ||
is_property_el = True | ||
stops_implied_name = True | ||
parsed_types_aggregation.add('p') | ||
prop_value = props.setdefault(prop_name, []) | ||
|
||
# if value has not been parsed then parse it | ||
if p_value is None: | ||
p_value = text_type(parse_property.text(el, base_url=self.__url__)) | ||
|
||
|
||
if root_class_names: | ||
stops_implied_name = True | ||
prop_value.append(handle_microformat( | ||
root_class_names, el, value_property="name", | ||
simple_value=p_value, backcompat_mode=backcompat_mode)) | ||
|
@@ -282,14 +283,14 @@ def parse_props(el): | |
u_value = None | ||
for prop_name in mf2_classes.url(classes): | ||
is_property_el = True | ||
parsed_types_aggregation.add('u') | ||
prop_value = props.setdefault(prop_name, []) | ||
|
||
# if value has not been parsed then parse it | ||
if u_value is None: | ||
u_value = parse_property.url(el, self.dict_class, self.__img_with_alt__, base_url=self.__url__) | ||
|
||
if root_class_names: | ||
stops_implied_name = True | ||
prop_value.append(handle_microformat( | ||
root_class_names, el, value_property="url", | ||
simple_value=u_value, backcompat_mode=backcompat_mode)) | ||
|
@@ -303,6 +304,7 @@ def parse_props(el): | |
dt_value = None | ||
for prop_name in mf2_classes.datetime(classes): | ||
is_property_el = True | ||
parsed_types_aggregation.add('d') | ||
prop_value = props.setdefault(prop_name, []) | ||
|
||
# if value has not been parsed then parse it | ||
|
@@ -326,7 +328,7 @@ def parse_props(el): | |
e_value = None | ||
for prop_name in mf2_classes.embedded(classes): | ||
is_property_el = True | ||
stops_implied_name = True | ||
parsed_types_aggregation.add('e') | ||
prop_value = props.setdefault(prop_name, []) | ||
|
||
# if value has not been parsed then parse it | ||
|
@@ -347,21 +349,18 @@ def parse_props(el): | |
# if this is not a property element, but it is a h-* microformat, | ||
# add it to our list of children | ||
if not is_property_el and root_class_names: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't this add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should be covered in Line 258. Since it doesn't matter if it is a child or a nested property, I add it there already. Unless I missed a code path? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. aah yes I missed that change |
||
stops_implied_name = True | ||
children.append(handle_microformat(root_class_names, el, backcompat_mode=backcompat_mode)) | ||
|
||
# parse child tags, provided this isn't a microformat root-class | ||
if not root_class_names: | ||
for child in get_children(el): | ||
child_properties, child_microformats, child_stops_implied_name = parse_props(child) | ||
child_properties, child_microformats, child_parsed_types_aggregation = parse_props(child) | ||
for prop_name in child_properties: | ||
v = props.get(prop_name, []) | ||
v.extend(child_properties[prop_name]) | ||
props[prop_name] = v | ||
children.extend(child_microformats) | ||
stops_implied_name = stops_implied_name or child_stops_implied_name | ||
|
||
return props, children, stops_implied_name | ||
parsed_types_aggregation.update(child_parsed_types_aggregation) | ||
return props, children, parsed_types_aggregation | ||
|
||
def parse_rels(el): | ||
"""Parse an element for rel microformats | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
<h1>real world example</h1> | ||
<article class="post h-entry"> | ||
<h1 class="p-name"><a href="/Waar-te-beginnen-met-Webmentions/">Waar te beginnen met Webmentions</a></h1> | ||
<div class="entry e-content"> | ||
<p>Er zijn van die momenten dat ik het liefste de hele dag ga zitten puzzelen hoe ik nu de <a href="/webmentions/">webmentions</a> op deze site in orde moet maken. Het loopt allemaal nog steeds niet lekker, maar ik weet niet goed welke kant ik op moet denken en werken voor een oplossing.</p> | ||
<p>Waar loop ik nog tegen aan?</p> | ||
<p>...</p> | ||
</div> | ||
<a href="https://news.indieweb.org/nl" class="u-syndication"></a> | ||
</article> | ||
|
||
<h1>synthetic test cases</h1> | ||
|
||
<article class="h-entry"> | ||
<h1>u- on only link stops implied url</h1> | ||
<a href="http://example.com/" class="u-property"></a> | ||
</article> | ||
|
||
<article class="h-entry"> | ||
<h1><a href="http://example.com/post" class="u-property">u- on second link stops implied url</a></h1> | ||
<a href="http://example.com/"></a> | ||
</article> | ||
|
||
<article class="h-entry"> | ||
<h1 class="p-property h-entry">nested object in property stops u-url parsing</h1> | ||
<a href="http://example.com/"></a> | ||
</article> | ||
|
||
<article class="h-entry"> | ||
<h1 class="h-entry">nested child object stops u-url parsing</h1> | ||
<a href="http://example.com/"></a> | ||
</article> | ||
|
||
<article class="h-entry"> | ||
<div><span><h1 class="h-entry">deeper nested child object stops u-url parsing</h1></span></div> | ||
<a href="http://example.com/"></a> | ||
</article> | ||
|
||
<article class="h-entry"> | ||
<h1 class="p-name">p- property doesn't stop implied url parsing</h1> | ||
<a href="http://example.com/"></a> | ||
</article> | ||
|
||
<article class="h-entry"> | ||
<p class="e-content">e-property doesn't stop implied url parsing"</p> | ||
<a href="http://example.com/"></a> | ||
</article> | ||
|
||
<article class="h-entry"> | ||
<h1>implied u-photo does not stop implied u-url parsing</h1> | ||
<img src="http://example.com/avatar.png"> | ||
<a href="http://example.com/"></a> | ||
</article> | ||
|
||
<article class="h-entry"> | ||
<h1>implied u-photo does not stop implied u-url parsing</h1> | ||
<img src="http://example.com/avatar.png"> | ||
<a href="http://example.com/"></a> | ||
</article> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
might be good to add a comment like in the implied name thing