Skip to content
This repository has been archived by the owner on Jan 19, 2025. It is now read-only.

Commit

Permalink
feat: Action/Condition Classification (#48)
Browse files Browse the repository at this point in the history
* #43 basic condtion and action extraction functions

* #43 corrected extractions, now matching occurs per sentence, removes many false positives

* #43 fixed recursive action extraction function

* Added new pattern

* #43 classifying of ignored parameter actions

* #43 conditions and actions are now dataclasses, renamed one condition

* #43 phrases to categories actions/conditions

* #43 mypy fixes

* style: apply automatic fixes of linters

* #43 renamed IDs in general SCONJ pattern

* #43 more descriptive pattern name

Co-authored-by: Aleksandr Sergeev <[email protected]>
Co-authored-by: prajakta <[email protected]>
Co-authored-by: aserge16 <[email protected]>
  • Loading branch information
4 people authored Jan 17, 2022
1 parent 3efa424 commit 13c789c
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 112 deletions.
2 changes: 2 additions & 0 deletions package_parser/package_parser/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import Any

from .commands.get_api import get_api
from .commands.get_dependencies import get_dependencies
from .utils import ensure_file_exists

__API_COMMAND = "api"
Expand All @@ -22,6 +23,7 @@ def cli() -> None:

if args.command == __API_COMMAND:
public_api = get_api(args.package)
get_dependencies(public_api)

out_dir: Path = args.out
out_file = out_dir.joinpath(
Expand Down
6 changes: 4 additions & 2 deletions package_parser/package_parser/commands/get_api/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import inspect
import re
from dataclasses import asdict
from dataclasses import asdict, dataclass
from enum import Enum, auto
from typing import Any, Dict, Optional, Union

Expand Down Expand Up @@ -445,6 +445,7 @@ def to_json(self) -> Any:
return {"type": self.type, "description": self.description}


@dataclass
class Action:
@classmethod
def from_json(cls, json: Any):
Expand Down Expand Up @@ -477,6 +478,7 @@ def __init__(self, action: str) -> None:
super().__init__(action)


@dataclass
class Condition:
@classmethod
def from_json(cls, json: Any):
Expand Down Expand Up @@ -504,7 +506,7 @@ def __init__(self, condition: str) -> None:
super().__init__(condition)


class ParameterIsSet(StaticCondition):
class ParameterIsOptional(StaticCondition):
def __init__(self, condition: str) -> None:
super().__init__(condition)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,53 +1,41 @@
dependency_matcher_patterns = {
"pattern_parameter_used_condition": [
{"RIGHT_ID": "used", "RIGHT_ATTRS": {"ORTH": {"IN": ["used", "Used"]}}},
"pattern_parameter_subordinating_conjunction": [
{"RIGHT_ID": "action_head", "RIGHT_ATTRS": {"POS": "VERB"}},
{
"LEFT_ID": "used",
"LEFT_ID": "action_head",
"REL_OP": ">",
"RIGHT_ID": "condition",
"RIGHT_ID": "condition_head",
"RIGHT_ATTRS": {"DEP": "advcl"},
},
{
"LEFT_ID": "condition",
"LEFT_ID": "condition_head",
"REL_OP": ">",
"RIGHT_ID": "dependee_param",
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubj", "nsubjpass"]}},
},
],
"pattern_parameter_ignored_condition": [
"pattern_parameter_": [
{
"RIGHT_ID": "ignored",
"RIGHT_ATTRS": {"ORTH": {"IN": ["ignored", "Ignored"]}},
"RIGHT_ID": "action",
"RIGHT_ATTRS": {"POS": "VERB"}, # verb is set as an anchor token
},
{
"LEFT_ID": "ignored",
"LEFT_ID": "action",
"REL_OP": ">",
"RIGHT_ID": "condition",
"RIGHT_ATTRS": {"DEP": "advcl"},
"RIGHT_ID": "ActionParameterName", # verb is a direct head of subject which is a NOUN i.e. Parameter Name
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubjpass", "nsubj"]}},
},
{
"LEFT_ID": "condition",
"LEFT_ID": "action",
"REL_OP": ">",
"RIGHT_ID": "dependee_param",
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubj", "nsubjpass"]}},
"RIGHT_ID": "ConditionalVerbModifier", # Verb is restricted by Verb Modifier
"RIGHT_ATTRS": {"DEP": "advmod"},
},
],
"pattern_parameter_applies_condition": [
{
"RIGHT_ID": "applies",
"RIGHT_ATTRS": {"ORTH": {"IN": ["applies", "Applies"]}},
},
{
"LEFT_ID": "applies",
"REL_OP": ">",
"RIGHT_ID": "condition",
"RIGHT_ATTRS": {"DEP": "advcl"},
},
{
"LEFT_ID": "condition",
"REL_OP": ">",
"RIGHT_ID": "dependee_param",
"RIGHT_ATTRS": {"DEP": {"IN": ["nsubj", "nsubjpass"]}},
"LEFT_ID": "action",
"REL_OP": ">>",
"RIGHT_ID": "ConditionalParameterName", # verb is a head in chain of object i.e. Parameter name or value
"RIGHT_ATTRS": {"DEP": {"IN": ["dobj", "pobj"]}},
},
],
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,84 +2,98 @@

import spacy
from spacy.matcher import DependencyMatcher
from spacy.tokens import Token
from spacy.tokens.doc import Doc

from ..get_api._model import API, Action, Condition, Dependency, Parameter
from ..get_api._model import (
API,
Action,
Condition,
Dependency,
Parameter,
ParameterHasValue,
ParameterIsIgnored,
ParameterIsIllegal,
ParameterIsOptional,
)
from ._dependency_patterns import dependency_matcher_patterns
from ._preprocess_docstring import preprocess_docstring

PIPELINE = "en_core_web_sm"


class DependencyExtractor:
@staticmethod
def extract_pattern_parameter_used_condition(
dependent_param: Parameter,
func_parameters: List[Parameter],
match: Tuple,
param_docstring: Doc,
) -> Union[Dependency, None]:
is_depending_on_param_index = match[1][2]
is_depending_on_param_name = param_docstring[is_depending_on_param_index].text
is_depending_on_param = next(
filter(
lambda param: param.name == is_depending_on_param_name, func_parameters
),
None,
)
if is_depending_on_param is None:
# Likely not a correct dependency match
return None

condition_verb = param_docstring[match[1][1]]
condition_verb_subtree = list(condition_verb.subtree)
condition_text = " ".join([token.text for token in condition_verb_subtree])
condition = Condition(condition=condition_text)

action = Action(action="used")

return Dependency(
hasDependentParameter=dependent_param,
isDependingOn=is_depending_on_param,
hasCondition=condition,
hasAction=action,
)
def extract_lefts_and_rights(curr_token: Token, extracted: Union[List, None] = None):
if extracted is None:
extracted = []

token_lefts = list(curr_token.lefts)
for token in token_lefts:
extract_lefts_and_rights(token, extracted)

extracted.append(curr_token.text)

token_rights = list(curr_token.rights)
for token in token_rights:
extract_lefts_and_rights(token, extracted)

return extracted


def extract_action(action_token: Token, condition_token: Token) -> Action:
action_tokens = []
action_lefts = list(action_token.lefts)
action_rights = list(action_token.rights)

for token in action_lefts:
if token != condition_token:
action_tokens.extend(extract_lefts_and_rights(token))
action_tokens.append(action_token.text)
for token in action_rights:
if token != condition_token:
action_tokens.extend(extract_lefts_and_rights(token))

action_text = " ".join(action_tokens)

ignored_phrases = [
"ignored",
"not used",
"no impact",
"only supported",
"only applies",
]
illegal_phrases = ["raise", "exception", "must be", "must not be"]
if any(phrase in action_text.lower() for phrase in ignored_phrases):
return ParameterIsIgnored(action=action_text)
elif any(phrase in action_text.lower() for phrase in illegal_phrases):
return ParameterIsIllegal(action=action_text)
else:
return Action(action=action_text)


def extract_condition(condition_token: Token) -> Condition:
condition_token_subtree = list(condition_token.subtree)
condition_text = " ".join([token.text for token in condition_token_subtree])

is_optional_phrases = [
"is none",
"is not set",
"is not specified",
"is not none",
"if none",
"if not none",
]
has_value_phrases = ["equals", "is true", "is false", "is set to"]
if any(phrase in condition_text.lower() for phrase in is_optional_phrases):
return ParameterIsOptional(condition=condition_text)
elif any(phrase in condition_text.lower() for phrase in has_value_phrases):
return ParameterHasValue(condition=condition_text)
else:
return Condition(condition=condition_text)

@staticmethod
def extract_pattern_parameter_ignored_condition(
dependent_param: Parameter,
func_parameters: List[Parameter],
match: Tuple,
param_docstring: Doc,
) -> Union[Dependency, None]:
is_depending_on_param_index = match[1][2]
is_depending_on_param_name = param_docstring[is_depending_on_param_index].text
is_depending_on_param = next(
filter(
lambda param: param.name == is_depending_on_param_name, func_parameters
),
None,
)
if is_depending_on_param is None:
# Likely not a correct dependency match
return None

condition_verb = param_docstring[match[1][1]]
condition_verb_subtree = list(condition_verb.subtree)
condition_text = " ".join([token.text for token in condition_verb_subtree])
condition = Condition(condition=condition_text)

action = Action(action="ignored")

return Dependency(
hasDependentParameter=dependent_param,
isDependingOn=is_depending_on_param,
hasCondition=condition,
hasAction=action,
)

class DependencyExtractor:
@staticmethod
def extract_pattern_parameter_applies_condition(
def extract_pattern_parameter_subordinating_conjunction(
dependent_param: Parameter,
func_parameters: List[Parameter],
match: Tuple,
Expand All @@ -97,12 +111,11 @@ def extract_pattern_parameter_applies_condition(
# Likely not a correct dependency match
return None

condition_verb = param_docstring[match[1][1]]
condition_verb_subtree = list(condition_verb.subtree)
condition_text = " ".join([token.text for token in condition_verb_subtree])
condition = Condition(condition=condition_text)
condition_token = param_docstring[match[1][1]]
condition = extract_condition(condition_token)

action = Action(action="applies")
action_token = param_docstring[match[1][0]]
action = extract_action(action_token, condition_token)

return Dependency(
hasDependentParameter=dependent_param,
Expand Down Expand Up @@ -159,14 +172,18 @@ def get_dependencies(api: API) -> Dict:
docstring = parameter.docstring.description
docstring_preprocessed = preprocess_docstring(docstring)
doc = nlp(docstring_preprocessed)
dependency_matches = matcher(doc)
param_dependencies = extract_dependencies_from_docstring(
parameter,
parameters,
doc,
dependency_matches,
spacy_id_to_pattern_id_mapping,
)
param_dependencies = []
for sentence in doc.sents:
sentence_dependency_matches = matcher(sentence)
sentence_dependencies = extract_dependencies_from_docstring(
parameter,
parameters,
sentence,
sentence_dependency_matches,
spacy_id_to_pattern_id_mapping,
)
if sentence_dependencies:
param_dependencies.extend(sentence_dependencies)
if param_dependencies:
all_dependencies[function_name][parameter.name] = param_dependencies

Expand Down

0 comments on commit 13c789c

Please sign in to comment.