Skip to content

Commit

Permalink
for implied name: replace img with alt but not with src. update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Kartik Prabhu committed Jun 27, 2018
1 parent ab25aa8 commit 71997eb
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 9 deletions.
10 changes: 5 additions & 5 deletions mf2py/dom_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def get_descendents(node):
if isinstance(desc, bs4.Tag):
yield desc

def get_textContent(el, replace_img=False, base_url=''):
def get_textContent(el, replace_img=False, img_to_src=True, base_url=''):
""" Get the text content of an element, replacing images by alt or src
"""

Expand All @@ -78,7 +78,7 @@ def get_textContent(el, replace_img=False, base_url=''):
P_BREAK_BEFORE = 1
P_BREAK_AFTER = 0

def text_collection(el, replace_img=False, base_url=''):
def text_collection(el, replace_img=False, img_to_src=True, base_url=''):
# returns array of strings or integers

items = []
Expand All @@ -103,7 +103,7 @@ def text_collection(el, replace_img=False, base_url=''):

elif el.name == 'img' and replace_img:
value = el.get('alt')
if value is None:
if value is None and img_to_src:
value = el.get('src')
if value is not None:
value = urljoin(base_url, value)
Expand All @@ -117,7 +117,7 @@ def text_collection(el, replace_img=False, base_url=''):
else:
for child in el.children:

child_items = text_collection(child, replace_img, base_url)
child_items = text_collection(child, replace_img, img_to_src, base_url)
items.extend(child_items)

if el.name == 'p':
Expand All @@ -127,7 +127,7 @@ def text_collection(el, replace_img=False, base_url=''):

return items

results = [t for t in text_collection(el, replace_img, base_url) if t is not '']
results = [t for t in text_collection(el, replace_img, img_to_src, base_url) if t is not '']

if results:
# remove <space> if it is first and last or if it is preceded by a <space> or <int> or followed by a <int>
Expand Down
5 changes: 3 additions & 2 deletions mf2py/implied_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ def non_empty(val):
return text_type(prop_value)

# use text if all else fails
# don't replace images in implied name (https://github.com/microformats/microformats2-parsing/issues/35)
return get_textContent(el, base_url=base_url)
# replace images with alt but not with src in implied name
# proposal: https://github.com/microformats/microformats2-parsing/issues/35#issuecomment-393615508
return get_textContent(el, replace_img=True, img_to_src=False, base_url=base_url)


def photo(el, dict_class, img_with_alt, base_url=''):
Expand Down
2 changes: 2 additions & 0 deletions test/examples/implied_properties/implied_properties.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

<a class="h-card" href="http://tommorris.org/"><img src="http://tommorris.org/photo.png" alt="" />Tom Morris</a>

<a class="h-card" href="http://tommorris.org/"><img src="http://tommorris.org/photo.png"/>Tom Morris</a>

<a class="h-card" href="http://tommorris.org/"><img src="http://tommorris.org/photo.png" alt="Tom Morris" /></a>

<img class="h-card" src="http://tommorris.org/photo.png" alt="Tom Morris" />
Expand Down
8 changes: 6 additions & 2 deletions test/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def test_nested_values():
def test_implied_name():
result = parse_fixture("implied_properties/implied_properties.html")

for i in range(6):
for i in range(7):
assert_equal(result["items"][i]["properties"]["name"][0], "Tom Morris")


Expand Down Expand Up @@ -474,8 +474,12 @@ def test_implied_nested_photo():
result = parse_fixture("implied_properties/implied_properties.html", url="http://bar.org")
assert_equal(result["items"][2]["properties"]["photo"][0],
"http://tommorris.org/photo.png")
assert_equal(result["items"][3]["properties"]["photo"][0],
"http://tommorris.org/photo.png")
assert_equal(result["items"][4]["properties"]["photo"][0],
"http://tommorris.org/photo.png")
# src="" is relative to the base url
assert_equal(result["items"][5]["properties"]["photo"][0],
assert_equal(result["items"][6]["properties"]["photo"][0],
"http://bar.org")


Expand Down

0 comments on commit 71997eb

Please sign in to comment.