Skip to content
This repository has been archived by the owner on Nov 29, 2023. It is now read-only.

Commit

Permalink
test: Dependencies unit tests (#50)
Browse files Browse the repository at this point in the history
* correctly formatted models to data classes

* updated model Names and added new phrases

* #49 dependency extraction function unit tests

* style: apply automatic fixes of linters

* fix: possible fix for build error

* added json dump of dependencies to CLI flow

* mypy fix

* style: apply automatic fixes of linters

* do the versions make a difference on dependency understanding?

* updated url for spacy langauge module

* quick fix, need to work on punctuation extraction

* removed hanging punctuation when extracting actions

* style: apply automatic fixes of linters

* test for dependency extraction class subordinating_conjuction

* style: apply automatic fixes of linters

* more documentation for functions

* style: apply automatic fixes of linters

* updated ontology to represent current code state

* removed pattern without implemented function for now

* added dependency README

Co-authored-by: Aleksandr Sergeev <[email protected]>
Co-authored-by: aserge16 <[email protected]>
Co-authored-by: Lars Reimann <[email protected]>
  • Loading branch information
4 people authored Jan 20, 2022
1 parent 3278c4d commit 60fc290
Show file tree
Hide file tree
Showing 9 changed files with 257 additions and 76 deletions.
46 changes: 27 additions & 19 deletions ontology/Dependency_Constraints.owl
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,23 @@
#################################################################

### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#hasAction
:hasAction rdf:type owl:ObjectProperty ;
rdfs:domain :Dependency .
:hasAction rdf:type owl:ObjectProperty ,
owl:IrreflexiveProperty ;
rdfs:domain :Dependency ;
rdfs:range [ rdf:type owl:Restriction ;
owl:onProperty :hasCondition ;
owl:someValuesFrom :Action
] .


### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#hasCondition
:hasCondition rdf:type owl:ObjectProperty ;
rdfs:domain :Action ,
:Condition ,
:Dependency .
:hasCondition rdf:type owl:ObjectProperty ,
owl:IrreflexiveProperty ;
rdfs:domain :Dependency ;
rdfs:range [ rdf:type owl:Restriction ;
owl:onProperty :hasCondition ;
owl:someValuesFrom :Condition
] .


### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#hasDependentParameter
Expand Down Expand Up @@ -55,11 +63,6 @@
rdfs:range xsd:string .


### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#hasType
:hasType rdf:type owl:DatatypeProperty ;
rdfs:comment "Each parameter has a type." .


### http://www.w3.org/2002/07/owl#topDataProperty
owl:topDataProperty rdfs:subPropertyOf owl:topDataProperty .

Expand All @@ -83,11 +86,6 @@ owl:topDataProperty rdfs:subPropertyOf owl:topDataProperty .
rdfs:comment "This class describes Dependencies."@en .


### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#IsIgnored
:IsIgnored rdf:type owl:Class ;
rdfs:subClassOf :StaticAction .


### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#Parameter
:Parameter rdf:type owl:Class ;
rdfs:comment "This describes Parameters of Scikit-lean APIs." .
Expand All @@ -98,9 +96,19 @@ owl:topDataProperty rdfs:subPropertyOf owl:topDataProperty .
rdfs:subClassOf :StaticCondition .


### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#ParameterOnlyUsedWhen
:ParameterOnlyUsedWhen rdf:type owl:Class ;
rdfs:subClassOf :StaticCondition .
### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#ParameterIsIgnored
:ParameterIsIgnored rdf:type owl:Class ;
rdfs:subClassOf :StaticAction .


### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#ParameterIsIllegal
:ParameterIsIllegal rdf:type owl:Class ;
rdfs:subClassOf :StaticAction .


### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#ParameterIsNone
:ParameterIsNone rdf:type owl:Class ;
rdfs:subClassOf :StaticCondition .


### http://www.semanticweb.org/praja/ontologies/2021/11/Dependency_Constraints#RuntimeAction
Expand Down
13 changes: 9 additions & 4 deletions package_parser/package_parser/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,20 @@ def cli() -> None:

if args.command == __API_COMMAND:
public_api = get_api(args.package)
get_dependencies(public_api)
public_api_dependencies = get_dependencies(public_api)

out_dir: Path = args.out
out_file = out_dir.joinpath(
out_file_api = out_dir.joinpath(
f"{public_api.distribution}__{public_api.package}__{public_api.version}__api.json"
)
ensure_file_exists(out_file)
with out_file.open("w") as f:
out_file_api_dependencies = out_dir.joinpath(
f"{public_api.distribution}__{public_api.package}__{public_api.version}__api_dependencies.json"
)
ensure_file_exists(out_file_api)
with out_file_api.open("w") as f:
json.dump(public_api.to_json(), f, indent=2, cls=CustomEncoder)
with out_file_api_dependencies.open("w") as f:
json.dump(public_api_dependencies.to_json(), f, indent=2, cls=CustomEncoder)


def __get_args() -> argparse.Namespace:
Expand Down
44 changes: 25 additions & 19 deletions package_parser/package_parser/commands/get_api/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,13 +447,12 @@ def to_json(self) -> Any:

@dataclass
class Action:
action: str

@classmethod
def from_json(cls, json: Any):
return cls(json["action"])

def __init__(self, action: str) -> None:
self.action = action

def to_json(self) -> Dict:
return {"action": self.action}

Expand All @@ -480,13 +479,12 @@ def __init__(self, action: str) -> None:

@dataclass
class Condition:
condition: str

@classmethod
def from_json(cls, json: Any):
return cls(json["condition"])

def __init__(self, condition: str) -> None:
self.condition = condition

def to_json(self) -> Dict:
return {"condition": self.condition}

Expand All @@ -506,12 +504,18 @@ def __init__(self, condition: str) -> None:
super().__init__(condition)


class ParameterIsOptional(StaticCondition):
class ParameterIsNone(StaticCondition):
def __init__(self, condition: str) -> None:
super().__init__(condition)


@dataclass
class Dependency:
hasDependentParameter: Parameter
isDependingOn: Parameter
hasCondition: Condition
hasAction: Action

@classmethod
def from_json(cls, json: Any):
return cls(
Expand All @@ -521,22 +525,24 @@ def from_json(cls, json: Any):
Action.from_json(["hasAction"]),
)

def __init__(
self,
hasDependentParameter: Parameter,
isDependingOn: Parameter,
hasCondition: Condition,
hasAction: Action,
) -> None:
self.hasDependentParameter = hasDependentParameter
self.isDependingOn = isDependingOn
self.hasCondition = hasCondition
self.hasAction = hasAction

def to_json(self) -> Dict:
return {
"hasDependentParameter": self.hasDependentParameter.to_json(),
"isDependingOn": self.isDependingOn.to_json(),
"hasCondition": self.hasCondition.to_json(),
"hasAction": self.hasAction.to_json(),
}


@dataclass
class APIDependencies:
dependencies: Dict

def to_json(self) -> Dict:
return {
function_name: {
parameter_name: [dependency.to_json() for dependency in dependencies]
for parameter_name, dependencies in parameter_name.items()
}
for function_name, parameter_name in self.dependencies.items()
}
19 changes: 19 additions & 0 deletions package_parser/package_parser/commands/get_dependencies/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Dependency Extaction

## How do we imagine a Dependency

A basic parameter dependency, what we are in this lab handling, is contained within a single sentence. In this sentence, we expect the identifying name of another parameter in the same function to appear, specifically in the dependency subtree of the condition.


## How do we extract a Dependency

Relying on spaCy's DependencyMatcher, we write functions to detect the head token of both the action and condition dependency subtrees. We assume that the action is always the root of the sentence, and a subtree inside the action contains the condition text.

Phrases are used to identify the type of the action/condition and create the appropriate model object.

Parsing a dependency subtree in an InOrder traversal, we can rebuild a sentence from the spaCy token objects.


### Dependency Tree Example

![Alt text](dependency_tree_example.png "Dependency Tree Example")
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,5 @@
"RIGHT_ID": "dependee_param",
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubj", "nsubjpass"]}},
},
],
"pattern_parameter_": [
{
"RIGHT_ID": "action",
"RIGHT_ATTRS": {"POS": "VERB"}, # verb is set as an anchor token
},
{
"LEFT_ID": "action",
"REL_OP": ">",
"RIGHT_ID": "ActionParameterName", # verb is a direct head of subject which is a NOUN i.e. Parameter Name
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubjpass", "nsubj"]}},
},
{
"LEFT_ID": "action",
"REL_OP": ">",
"RIGHT_ID": "ConditionalVerbModifier", # Verb is restricted by Verb Modifier
"RIGHT_ATTRS": {"DEP": "advmod"},
},
{
"LEFT_ID": "action",
"REL_OP": ">>",
"RIGHT_ID": "ConditionalParameterName", # verb is a head in chain of object i.e. Parameter name or value
"RIGHT_ATTRS": {"DEP": {"IN": ["dobj", "pobj"]}},
},
],
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
from ..get_api._model import (
API,
Action,
APIDependencies,
Condition,
Dependency,
Parameter,
ParameterHasValue,
ParameterIsIgnored,
ParameterIsIllegal,
ParameterIsOptional,
ParameterIsNone,
)
from ._dependency_patterns import dependency_matcher_patterns
from ._preprocess_docstring import preprocess_docstring
Expand All @@ -23,6 +24,9 @@


def extract_lefts_and_rights(curr_token: Token, extracted: Union[List, None] = None):
"""
Given a spaCy token, extract recursively all tokens in its dependency subtree in inorder traversal.
"""
if extracted is None:
extracted = []

Expand All @@ -40,6 +44,10 @@ def extract_lefts_and_rights(curr_token: Token, extracted: Union[List, None] = N


def extract_action(action_token: Token, condition_token: Token) -> Action:
"""
Create action object given head token of action phrase in docstring.
Condition token used to avoid traversing into the condition phrase dependency subtree of the docstring.
"""
action_tokens = []
action_lefts = list(action_token.lefts)
action_rights = list(action_token.rights)
Expand All @@ -52,6 +60,9 @@ def extract_action(action_token: Token, condition_token: Token) -> Action:
if token != condition_token:
action_tokens.extend(extract_lefts_and_rights(token))

# Remove trailing punctiation
if any(p == action_tokens[-1] for p in [",", "."]):
del action_tokens[-1]
action_text = " ".join(action_tokens)

ignored_phrases = [
Expand All @@ -71,27 +82,41 @@ def extract_action(action_token: Token, condition_token: Token) -> Action:


def extract_condition(condition_token: Token) -> Condition:
"""
Create condition object given head token of condition phrase in docstring.
"""
condition_token_subtree = list(condition_token.subtree)
condition_text = " ".join([token.text for token in condition_token_subtree])

is_optional_phrases = [
is_none_phrases = [
"is none",
"is not set",
"is also none" "is not set",
"is not specified",
"is not none",
"if none",
"if not none",
]
has_value_phrases = ["equals", "is true", "is false", "is set to"]
if any(phrase in condition_text.lower() for phrase in is_optional_phrases):
return ParameterIsOptional(condition=condition_text)
has_value_phrases = [
"equals",
"is true",
"is false",
"is set to",
"is greater than",
"is less than",
]
if any(phrase in condition_text.lower() for phrase in is_none_phrases):
return ParameterIsNone(condition=condition_text)
elif any(phrase in condition_text.lower() for phrase in has_value_phrases):
return ParameterHasValue(condition=condition_text)
else:
return Condition(condition=condition_text)


class DependencyExtractor:
"""
Functions to extract each type of pattern in _dependency_patterns
"""

@staticmethod
def extract_pattern_parameter_subordinating_conjunction(
dependent_param: Parameter,
Expand Down Expand Up @@ -133,7 +158,8 @@ def extract_dependencies_from_docstring(
spacy_id_to_pattern_id_mapping: Dict,
) -> List[Dependency]:
"""
Extract readable dependencies in a Docstring from pattern matches
Extract readable dependencies in a Docstring from pattern matches.
Function fetched from class DependencyExtractor, when 'extract_' + pattern name match function name in the class.
"""
dependencies = list()
for match in matches:
Expand All @@ -149,7 +175,7 @@ def extract_dependencies_from_docstring(
return dependencies


def get_dependencies(api: API) -> Dict:
def get_dependencies(api: API) -> APIDependencies:
"""
Loop through all functions in the API
Parse and preprocess each doc string from every function
Expand Down Expand Up @@ -187,4 +213,4 @@ def get_dependencies(api: API) -> Dict:
if param_dependencies:
all_dependencies[function_name][parameter.name] = param_dependencies

return all_dependencies
return APIDependencies(dependencies=all_dependencies)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions package_parser/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ importlib-metadata = "^4.10.0"
numpydoc = "^1.1.0"
spacy = "^3.2.1"

[tool.poetry.dependencies.en_core_web_sm]
url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.2.0/en_core_web_sm-3.2.0-py3-none-any.whl"

[tool.poetry.dev-dependencies]
pytest = "^6.2.5"
pytest-cov = "^3.0.0"
Expand Down
Loading

0 comments on commit 60fc290

Please sign in to comment.