Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correct support for parsing components and portions #70

Merged
merged 2 commits into from
Jun 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,13 @@ Cobalt is Copyright 2015-2020 AfricanLII.
Change Log
----------

6.0.0
-----

- Add support for portions, such as ``~chp_2``
- Remove non-standard support for expression component and subcomponent
- Remove non-standard legacy support for work components without ``!``

5.0.0
-----

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
5.0.0
6.0.0
25 changes: 25 additions & 0 deletions cobalt/akn.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ class StructuredDocument(AkomaNtosoDocument):
""" The name of the document type, corresponding to the primary document XML element.
"""

non_eid_portions = "arguments background conclusions decision header introduction motivation preamble" \
" preface remedies".split()
""" Portion names that are valid portions, but don't have eids, for use with get_portion_element.
"""

@classmethod
def for_document_type(cls, document_type):
""" Return the subclass for this document type.
Expand Down Expand Up @@ -441,6 +446,26 @@ def components(self):

return components

def get_portion_element(self, portion, component=None):
""" Get a single portion of this document. The `portion` is usually an eId, as specified by
https://docs.oasis-open.org/legaldocml/akn-nc/v1.0/os/akn-nc-v1.0-os.html#_Toc531692279.

The optional `component` is the ancestor element within which to look for the portion.

Range portions (eg. `chp_1->chp_3`) are not supported by this function.
"""
root = component or self.root

if portion in self.non_eid_portions:
# these are valid portions that don't have eids
xpath = f'.//a:{portion}'
else:
portion = portion.replace('"', '')
xpath = f'.//a:*[@eId="{portion}"]'

for x in root.xpath(xpath, namespaces={'a': self.namespace}):
return x

def _ensure_lifecycle(self):
try:
after = self.meta.publication
Expand Down
63 changes: 27 additions & 36 deletions cobalt/uri.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,16 @@
(/(?P<actor>[^0-9][^/]*))? # actor (optional), cannot start with a number
/(?P<date>[0-9]{4}(-[0-9]{2}(-[0-9]{2})?)?) # date
/(?P<number>[^/]+) # number
(/
( # either a work component or expression details
( # optional expression details
(/ # optional expression language and date
(?P<language>[a-z]{3}) # language (eg. eng)
(?P<expression_date>[@:][^/]*)? # expression date (eg. @ or @2012-12-22 or :2012-12-22)
(/!? # optional expression component
# the ! is optional for backwards compatibility but won't be optional
# in a future version
(?P<expression_component>[^/]+?)? # expression component (eg. !main or !schedule1)
(/(?P<expression_subcomponent>[^.]+))? # expression subcomponent (eg. chapter/1 or section/20)
)? #
(\.(?P<format>[a-z0-9]+))? # format (eg. .xml, .akn, .html, .pdf)
)| #
!?(?P<work_component>.+) # work component
))?$""", re.X)
)?
(/
(!(?P<work_component>[^~.]+?))? # optional component (eg. !main or !schedule1)
(~(?P<portion>[^.]+))? # optional portion
)?
(\.(?P<format>[a-z0-9]+))? # optional format (eg. .xml, .akn, .html, .pdf)
$""", re.X)


class FrbrUri(object):
Expand All @@ -41,7 +36,7 @@ class FrbrUri(object):

Example::

>>> uri = FrbrUri.parse('/akn/za-jhb/act/by-law/2003/public-health/eng:2015-01-01/!main/part/A.xml')
>>> uri = FrbrUri.parse('/akn/za-jhb/act/by-law/2003/public-health/eng:2015-01-01/!main~part_1.xml')
>>> uri.prefix
'akn'
>>> uri.country
Expand All @@ -60,18 +55,18 @@ class FrbrUri(object):
'eng'
>>> uri.expression_date
':2015-01-01'
>>> uri.expression_component
>>> uri.work_component
'main'
>>> uri.expression_subcomponent
'part/A'
>>> uri.portion
'part_1'
>>> uri.format
'xml'
>>> uri.work_uri()
'/za-jhb/act/by-law/2003/public-health'
>>> uri.expression_uri()
'/za-jhb/act/by-law/2003/public-health/eng:2015-01-01/main/part/A'
'/za-jhb/act/by-law/2003/public-health/eng:2015-01-01/!main~part_1'
>>> uri.manifestation_uri()
'/za-jhb/act/by-law/2003/public-health/eng:2015-01-01/main/part/A.xml'
'/za-jhb/act/by-law/2003/public-health/eng:2015-01-01/!main~part_1.xml'

:ivar prefix: optional `akn` prefix
:ivar country: two letter country code
Expand All @@ -84,16 +79,13 @@ class FrbrUri(object):
:ivar work_component: name of the work component, may be None
:ivar language: three-letter expression language code, may be None
:ivar expression_date: expression date (str), [@:]YYYY[-MM[-DD]], may be None
:ivar expression_component: name of the expression component, may be None
:ivar expression_subcomponent: name of the expression subcomponent, may be None
:ivar format: format extension, may be None
"""

default_language = 'eng'

def __init__(self, country, locality, doctype, subtype, actor, date, number,
work_component=None, language=None, expression_date=None, expression_component=None,
expression_subcomponent=None, format=None, prefix="akn"):
def __init__(self, country, locality, doctype, subtype, actor, date, number, work_component=None, language=None,
expression_date=None, format=None, portion=None, prefix="akn"):
self.prefix = prefix
self.country = country
self.locality = locality
Expand All @@ -103,11 +95,10 @@ def __init__(self, country, locality, doctype, subtype, actor, date, number,
self.date = date
self.number = number
self.work_component = work_component
self.portion = portion

self.language = language or self.default_language
self.expression_date = expression_date
self.expression_component = expression_component
self.expression_subcomponent = expression_subcomponent
self.format = format

def clone(self):
Expand All @@ -125,8 +116,7 @@ def clone(self):
work_component=self.work_component,
language=self.language,
expression_date=self.expression_date,
expression_component=self.expression_component,
expression_subcomponent=self.expression_subcomponent,
portion=self.portion,
format=self.format,
)

Expand Down Expand Up @@ -168,16 +158,17 @@ def expression_uri(self, work_component=True):
if self.expression_date is not None:
uri = uri + self.expression_date

# expression component is preferred over a work component
if self.expression_component:
uri = uri + "/!" + self.expression_component
if self.expression_subcomponent:
uri = uri + "/" + self.expression_subcomponent

# if we have a work component, use it
elif work_component and self.work_component:
slashed = False
if work_component and self.work_component:
slashed = True
uri = uri + "/!" + self.work_component

if self.portion:
if not slashed:
uri = uri + "/"
uri = uri + "~" + self.portion

return uri

def manifestation_uri(self, work_component=True):
Expand All @@ -190,7 +181,7 @@ def manifestation_uri(self, work_component=True):
def __str__(self):
if self.format:
return self.manifestation_uri()
if self.expression_date or self.expression_component:
if self.expression_date or self.work_component:
return self.expression_uri()
return self.work_uri()

Expand Down
Loading