Skip to content

Commit

Permalink
Merge pull request #106 from kartikprabhu/master
Browse files Browse the repository at this point in the history
new version 1.1.1 - looks good to me
  • Loading branch information
kevinmarks authored Jul 7, 2018
2 parents dda3a59 + 71997eb commit 9a84d56
Show file tree
Hide file tree
Showing 35 changed files with 1,387 additions and 263 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,26 @@
# Change Log
All notable changes to this project will be documented in this file.

## 1.1.1 - 2018-06-15

- streamline backcompat to use JSON only.
- fix multiple mf1 root rel-tag parsing
- correct url and photo for hreview.
- add rules for nested hreview. update backcompat to use multiple matches in old properties.
- fix `rel-tag` to `p-category` conversion so that other classes are not lost.
- use original authored html for `e-*` parsing in backcompat
- make classes and rels into unordered (alphabetically ordered) deduped arrays.
- only use class names for mf2 which follow the naming rules
- fix `parse` method to use default html parser.
- always use the first value for attributes for rels.
- correct AM/PM conversion in datetime value class pattern.
- add ordinal date parsing to datetimes value class pattern. ordinal date is normalised to YYYY-MM-DD
- remove hack for html tag classes since that is fixed in new BS
- better whitespace algorithm for `name` and `html.value` parsing
- experimental flag for including `alt` in `u-photo` parsing
- make a copy of the BeautifulSoup given by user to work on for parsing to prevent changes to original doc
- bump version to 1.1.1

## 1.1.0 - 2018-03-16

- bump version to 1.1.0 since it is a "major" change
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ Filter by microformat type

p.to_dict(filter_by_type="h-entry")
p.to_json(filter_by_type="h-entry")

Experimental features
---------------------
- pass the optional argument `img_with_alt=True` to either the `Parser` object or to the `parse` method to enable parsing of the `alt` attribute of `<img>` tags according to [issue: image alt text is lost during parsing](https://github.com/microformats/microformats2-parsing/issues/2). By default this is `False` to be backwards compatible.

Frontends
-------------
Expand All @@ -68,3 +72,4 @@ Contributions
We welcome contributions and bug reports via Github, and on the microformats wiki.

We try to follow the [IndieWebCamp code of conduct](http://indiewebcamp.com/code-of-conduct). Please be respectful of other contributors, and forge a spirit of positive co-operation without discrimination or disrespect.

10 changes: 9 additions & 1 deletion mf2py/backcompat-rules/hentry.json
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,13 @@
"longitude": [
"p-longitude"
]
},
"rels": {
"bookmark": [
"u-url"
],
"tag": [
"p-category"
]
}
}
}
7 changes: 6 additions & 1 deletion mf2py/backcompat-rules/hfeed.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,10 @@
"title": [
"p-name"
]
},
"rels": {
"tag": [
"p-category"
]
}
}
}
5 changes: 2 additions & 3 deletions mf2py/backcompat-rules/hproduct.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@
],
"review": [
"p-review",
"h-review",
"e-description"
"h-review"
],
"fn": [
"p-name"
]
}
}
}
10 changes: 9 additions & 1 deletion mf2py/backcompat-rules/hrecipe.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@
],
"ingredient": [
"p-ingredient"
],
"category": [
"p-category"
]
},
"rels": {
"tag": [
"p-category"
]
}
}
}
26 changes: 25 additions & 1 deletion mf2py/backcompat-rules/hreview.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,13 @@
"h-card"
],
"url": [
"p-item",
"h-item",
"u-url"
],
"photo": [
"p-item",
"h-item",
"u-photo"
],
"best": [
Expand All @@ -35,6 +39,26 @@
],
"summary": [
"p-name"
],
"item vcard": [
"p-item",
"vcard"
],
"item vevent": [
"p-item",
"vevent"
],
"item hproduct": [
"p-item",
"hproduct"
]
},
"rels": {
"self bookmark": [
"u-url"
],
"tag": [
"p-category"
]
}
}
}
8 changes: 7 additions & 1 deletion mf2py/backcompat-rules/vcard.json
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,12 @@
],
"organization-name": [
"p-organization-name"
],
"title": [
"p-job-title"
],
"role": [
"p-role"
]
}
}
}
141 changes: 73 additions & 68 deletions mf2py/backcompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
"""

from __future__ import unicode_literals, print_function
from .dom_helpers import get_descendents
from .dom_helpers import get_children
from .mf_helpers import unordered_list
from . import mf2_classes
import bs4
import copy
Expand All @@ -18,11 +19,8 @@
else:
from urllib.parse import unquote

# Classic Root Classname map
CLASSIC_ROOT_MAP = {}

# Classic Root properties map
CLASSIC_PROPERTY_MAP = {}
# Classic map
_CLASSIC_MAP = {}

# populate backcompat rules from JSON files

Expand All @@ -34,86 +32,93 @@
with codecs.open(file_path, 'r', 'utf-8') as f:
rules = json.load(f)

CLASSIC_ROOT_MAP[root] = rules['type'][0]
CLASSIC_PROPERTY_MAP[root] = rules['properties']



def root(classes):
"""get all backcompat root classnames
"""
return [c for c in classes if c in CLASSIC_ROOT_MAP]
_CLASSIC_MAP[root] = rules


def make_classes_rule(old_class, new_classes):
def _make_classes_rule(old_classes, new_classes):
"""Builds a rule for augmenting an mf1 class with its mf2
equivalent(s).
"""
def f(child, **kwargs):
child_original = child.original or copy.copy(child)
child_classes = child.get('class', [])
if old_class in child_classes:
child_classes += [c for c in new_classes
if c not in child_classes]
if all(cl in child_classes for cl in old_classes):
child_classes.extend([cl for cl in new_classes if cl not in child_classes])
child['class'] = child_classes
return f


# The RULES map has a list of rules for each root class type.
# We'll build the vast majority of it from the CLASSIC_PROPERTY_MAP
RULES = dict(
(old_root, [make_classes_rule(old_class, new_classes)
for old_class, new_classes in properties.items()])
for old_root, properties in CLASSIC_PROPERTY_MAP.items())


def rel_bookmark_to_url_rule(child, **kwargs):
"""rel=bookmark gets augmented with class="u-url
"""
child_classes = child.get('class', [])
if ('bookmark' in child.get('rel', [])
and 'u-url' not in child_classes):
child_classes.append('u-url')
child['class'] = child_classes

# if any new class is e-* attach original to parse originally authored HTML
if mf2_classes.embedded(child_classes) and child.original is None:
child.original = child_original
return f

def rel_tag_to_category_rule(child, **kwargs):
def _rel_tag_to_category_rule(child, html_parser, **kwargs):
"""rel=tag converts to p-category using a special transformation (the
category becomes the tag href's last path segment). This rule adds a new
data tag so that
<a rel="tag" href="http://example.com/tags/cat"></a> gets augmented with
category becomes the tag href's last path segment). This rule adds a new data tag so that
<a rel="tag" href="http://example.com/tags/cat"></a> gets replaced with
<data class="p-category" value="cat"></data>
"""

href = child.get('href', '')
rels = child.get('rel', [])
classes = child.get('class', [])
if ('tag' in rels and child.get('href')
and 'p-category' not in classes
and 'u-category' not in classes):
segments = [seg for seg in child.get('href').split('/') if seg]
if 'tag' in rels and href:
segments = [seg for seg in href.split('/') if seg]
if segments:
data = bs4.BeautifulSoup('<data></data>').data
# use mf1 class here so it doesn't get removed later
data['class'] = ['category']
if html_parser:
soup = bs4.BeautifulSoup('', features=html_parser)
else:
soup = bs4.BeautifulSoup('')

data = soup.new_tag('data')
# this does not use what's given in the JSON
# but that is not a problem currently
# use mf1 class so it doesn't get removed later
data['class'] = ['p-category']
data['value'] = unquote(segments[-1])
child.parent.append(data)
child.insert_before(data)
# remove tag from rels to avoid repeat
child['rel'] = [r for r in rels if r != 'tag']


# Augment with special rules
RULES['hentry'] += [
rel_bookmark_to_url_rule,
rel_tag_to_category_rule,
]
def _make_rels_rule(old_rels, new_classes, html_parser):
"""Builds a rule for augmenting an mf1 rel with its mf2 class equivalent(s).
"""

# need to special case rel=tag as it operates differently

def f(child, **kwargs):
child_rels = child.get('rel', [])
child_classes = child.get('class', [])
if all(r in child_rels for r in old_rels):
if 'tag' in old_rels:
_rel_tag_to_category_rule(child, html_parser, **kwargs)
else:
child_classes.extend([cl for cl in new_classes if cl not in child_classes])
child['class'] = child_classes
return f

def apply_rules(el):
"""add modern classnames for older mf1 classnames

returns a copy of el and does not modify the original
def _get_rules(old_root, html_parser):
""" for given mf1 root get the rules as a list of functions to act on children """

class_rules = [_make_classes_rule(old_classes.split(), new_classes)
for old_classes, new_classes in _CLASSIC_MAP[old_root].get('properties', {}).items()]
rel_rules = [_make_rels_rule(old_rels.split(), new_classes, html_parser)
for old_rels, new_classes in _CLASSIC_MAP[old_root].get('rels', {}).items()]

return class_rules + rel_rules

def root(classes):
"""get all backcompat root classnames
"""
return unordered_list([c for c in classes if c in _CLASSIC_MAP])

el_copy = copy.copy(el)
def apply_rules(el, html_parser):
"""add modern classnames for older mf1 classnames
"""

def apply_prop_rules_to_children(parent, rules):

for child in (c for c in parent.children if isinstance(c, bs4.Tag)):
for child in get_children(parent):
classes = child.get('class',[])
# find existing mf2 properties if any and delete them
mf2_props = mf2_classes.property_classes(classes)
Expand All @@ -129,19 +134,19 @@ def apply_prop_rules_to_children(parent, rules):


# add mf2 root equivalent
classes = el_copy.get('class', [])
classes = el.get('class', [])
old_roots = root(classes)
for old_root in old_roots:
new_root = CLASSIC_ROOT_MAP[old_root]
if new_root not in classes:
el_copy['class'].append(new_root)
new_roots = _CLASSIC_MAP[old_root]['type']
classes.extend(new_roots)
el['class'] = classes


# add mf2 prop equivalent to descendents and remove existing mf2 props
rules = []
for old_root in old_roots:
rules.extend(RULES.get(old_root,[]))
rules.extend(_get_rules(old_root, html_parser))

apply_prop_rules_to_children(el_copy, rules)
apply_prop_rules_to_children(el, rules)

return el_copy
return el
Loading

0 comments on commit 9a84d56

Please sign in to comment.