Skip to content

Commit

Permalink
Ensure rule_ids are valid. (#6738)
Browse files Browse the repository at this point in the history
  • Loading branch information
tjprescott authored Aug 9, 2023
1 parent 7a8c5c9 commit 5f40621
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 17 deletions.
16 changes: 3 additions & 13 deletions packages/python-packages/apiview-gpt/output.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"violations": [
{
"rule_ids": [
"python-client-naming",
"python-client-separate-sync-async"
"python_design.html#python-client-naming",
"python_design.html#python-client-separate-sync-async"
],
"line_no": 2,
"bad_code": "class azure.contoso.ContosoAsyncManager:",
Expand All @@ -13,7 +13,7 @@
},
{
"rule_ids": [
"python-client-options-naming"
"python_design.html#python-client-options-naming"
],
"line_no": 8,
"bad_code": "options: dict",
Expand Down Expand Up @@ -64,16 +64,6 @@
"bad_code": "red = \"red\"",
"suggestion": "RED = \"red\"",
"comment": "Enum names should be in UPPERCASE."
},
{
"rule_ids": [
"python_design.html#python-client-naming",
"python_design.html#python-models-dict-result"
],
"line_no": 30,
"bad_code": "class azure.contoso.WidgetResult:",
"suggestion": "Use a simple Mapping (e.g. dict) instead of creating a WidgetResult class if the WidgetResult class is not used as an input parameter for other APIs.",
"comment": "Service client types should have a Client suffix. Consider using a simple Mapping instead of creating a separate class for operation results."
}
]
}
2 changes: 1 addition & 1 deletion packages/python-packages/apiview-gpt/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def console_entry_point():
print("Running apiview-gpt version {}".format(__version__))
reviewer = GptReviewer()
# FIXME: Make this a proper CLI
input_filename = "test.txt"
input_filename = "test2.txt"
file_path = os.path.join(_PACKAGE_ROOT, input_filename)
with open(file_path, "r") as f:
apiview_text = f.read()
Expand Down
44 changes: 41 additions & 3 deletions packages/python-packages/apiview-gpt/src/_gpt_reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def __init__(self):
self.chain = LLMChain(llm=self.llm, prompt=self.prompt_template)

def get_response(self, apiview, language):
general_guidelines, language_guidelines = self.retrieve_guidelines(language)
all_guidelines = general_guidelines + language_guidelines
apiview = self.unescape(apiview)
all_guidelines = self.retrieve_guidelines(language)

# TODO: Make this not hard-coded!
guidelines = self.select_guidelines(all_guidelines, [
Expand Down Expand Up @@ -76,8 +76,46 @@ def get_response(self, apiview, language):
# FIXME see: https://github.com/Azure/azure-sdk-tools/issues/6571
if len(output.violations) > 0:
final_results.status = "Error"
self.process_rule_ids(final_results, all_guidelines)
return final_results

""" Ensure that each rule ID matches with an actual guideline ID.
This ensures that the links that appear in APIView should never be broken (404).
"""
def process_rule_ids(self, results, guidelines):
# create an index for easy lookup
index = { x["id"]: x for x in guidelines }
for violation in results.violations:
to_remove = []
to_add = []
for rule_id in violation.rule_ids:
try:
index[rule_id]
continue
except KeyError:
# see if any guideline ID ends with the rule_id. If so, update it and preserve in the index
matched = False
for guideline in guidelines:
if guideline["id"].endswith(rule_id):
to_remove.append(rule_id)
to_add.append(guideline["id"])
index[rule_id] = guideline["id"]
matched = True
break
if matched:
continue
# no match or partial match found, so remove the rule_id
to_remove.append(rule_id)
print(f"WARNING: Rule ID {rule_id} not found. Possible hallucination.")
# update the rule_ids arrays with the new values. Don't modify the array while iterating over it!
for rule_id in to_remove:
violation.rule_ids.remove(rule_id)
for rule_id in to_add:
violation.rule_ids.append(rule_id)

def unescape(self, text: str) -> str:
return str(bytes(text, "utf-8").decode("unicode_escape"))

def process_violations(self, violations: List[Violation], section: Section) -> List[Violation]:
if not violations:
return violations
Expand Down Expand Up @@ -146,4 +184,4 @@ def retrieve_guidelines(self, language):
with open(os.path.join(language_guidelines_path, filename), "r") as f:
items = json.loads(f.read())
language_guidelines.extend(items)
return general_guidelines, language_guidelines
return general_guidelines + language_guidelines
1 change: 1 addition & 0 deletions packages/python-packages/apiview-gpt/test2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Package is parsed using apiview-stub-generator(version:0.3.7), Python version: 3.10.12\n\nclass azure.contoso.ContosoAsyncManager:\n\n async def __init__(\n self,\n endpoint,\n credential,\n options: dict\n )\n\n async def get_widget_async(self, options: dict) -> WidgetResult\n\nclass azure.contoso.ContosoManager:\n\n def __init__(\n self,\n endpoint,\n credential,\n options: dict\n )\n\n def get_widget(self, options: dict) -> WidgetResult\n\nclass azure.contoso.WidgetColor(Enum):\n\n blue = "blue"\n green = "green"\n red = "red"\n\nclass azure.contoso.WidgetResult:\n\n def __init__(\n self,\n name: str,\n color: WidgetColor\n )\n\n

0 comments on commit 5f40621

Please sign in to comment.