Skip to content

Commit

Permalink
Merge pull request #1573 from chaoss/add-print-traceback-method-to-al…
Browse files Browse the repository at this point in the history
…l-workers

Add print traceback method to all workers
  • Loading branch information
sgoggins authored Jan 25, 2022
2 parents 64b3a84 + 561e176 commit 1cfde66
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 113 deletions.
11 changes: 3 additions & 8 deletions workers/deps_libyear_worker/deps_libyear_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,7 @@ def deps_libyear_model(self, entry_info, repo_id):
try:
self.generate_deps_libyear_data(repo_id, absolute_repo_path)
except Exception as e:
self.logger.debug(f"This is the exception from generate_deps_libyear_data exception registered {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
self.print_traceback("Deps_libyear_worker: generate_deps_libyear_data", e, True)

self.register_task_completion(entry_info, repo_id, "deps_libyear")

Expand Down Expand Up @@ -101,8 +99,5 @@ def generate_deps_libyear_data(self, repo_id, path):

result = self.db.execute(self.repo_deps_libyear_table.insert().values(repo_deps))
self.logger.info(f"Added dep: {result.inserted_primary_key}")
except Exception as e:
self.logger.debug(f"error generating libyear data, exception registered: {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
except Exception as e:
self.print_traceback("Deps_libyear_worker: generating and inserting data", e, True)
22 changes: 6 additions & 16 deletions workers/deps_worker/deps_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ def deps_model(self, entry_info, repo_id):
try:
self.generate_deps_data(repo_id, absolute_repo_path)
except Exception as e:
self.logger.debug(f"This is the error generated: {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
self.print_traceback("Deps model: generate_deps_data", e, True)

self.register_task_completion(entry_info, repo_id, "deps")

Expand All @@ -86,9 +84,7 @@ def ossf_scorecard_model(self, entry_info, repo_id):
try:
self.generate_scorecard(repo_id, scorecard_repo_path)
except Exception as e:
self.logger.debug(f"This is the error for scorecard generation: {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
self.print_traceback("Depts model: scorecard generation", e, True)

self.register_task_completion(entry_info, repo_id, "deps")

Expand Down Expand Up @@ -142,11 +138,8 @@ def generate_scorecard(self, repo_id, path):
}
result = self.db.execute(self.repo_deps_scorecard_table.insert().values(repo_deps_scorecard))
self.logger.info(f"Added OSSF scorecard data : {result.inserted_primary_key}")
except Exception as e:
self.logger.debug(f"Encountered trouble and exception registered inserting scorecard info: {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")

except Exception as e:
self.print_traceback("inserting scorecard info for deps_worker", e, True)

def generate_deps_data(self, repo_id, path):
"""Runs scc on repo and stores data in database
Expand All @@ -173,8 +166,5 @@ def generate_deps_data(self, repo_id, path):

result = self.db.execute(self.repo_dependencies_table.insert().values(repo_deps))
self.logger.info(f"Added dep: {result.inserted_primary_key}")
except Exception as e:
self.logger.debug(f"generate deps data failed on {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
except Exception as e:
self.print_traceback("Deps worker: generate_deps_data", e, True)
72 changes: 26 additions & 46 deletions workers/github_worker/github_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ def issues_model(self, entry_info, repo_id):
Query the GitHub API for issues
"""

try:
x = 1 / 0
self.logger.info(x)
except Exception as e:
self.print_traceback("testing exception in beginning of pr model", e, False)

github_url = entry_info['given']['github_url']

# Contributors are part of this model, and finding all for the repo saves us
Expand All @@ -209,26 +215,18 @@ def issues_model(self, entry_info, repo_id):
issue_events_all = self.issue_events_model(pk_source_issues)
self.issue_nested_data_model(pk_source_issues, issue_events_all)
except Exception as e:
self.logger.info(f"issue comments model failed on {e}. exception registered")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
finally:
self.print_traceback("one of the issue models failed", e, False)
finally:
try:
issue_events_all = self.issue_events_model(pk_source_issues)
except Exception as e:
self.print_traceback("issue events model failed", e, False)
finally:
try:
issue_events_all = self.issue_events_model(pk_source_issues)
self.issue_nested_data_model(pk_source_issues, issue_events_all)
except Exception as e:
self.logger.info(f"issue events model failed on {e}. exception registered")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
finally:
try:
self.issue_nested_data_model(pk_source_issues, issue_events_all)
except Exception as e:
self.logger.info(f"issue nested model failed on {e}. exception registered")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
self.print_traceback("issue nested model failed", e, False)


# Register this task as completed
self.register_task_completion(entry_info, self.repo_id, 'issues')
Expand Down Expand Up @@ -305,9 +303,7 @@ def issue_comments_insert(inc_issue_comments, comment_action_map):
self.bulk_insert(self.message_table, insert=issue_comments_insert,
unique_columns=comment_action_map['insert']['augur'])
except Exception as e:
self.logger.info(f"bulk insert of comments failed on {e}. exception registerred")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
self.print_traceback("bulk insert of issue comments", e, False)

""" ISSUE MESSAGE REF TABLE """
try:
Expand All @@ -316,7 +312,7 @@ def issue_comments_insert(inc_issue_comments, comment_action_map):
comment_action_map['insert']['source'], comment_action_map['insert']['augur']
)
except Exception as e:
self.logger.info(f"exception registered in enrich_data_primary_keys for message_ref issues table: {e}.. exception registered")
self.print_traceback("enrich data primary keys for getting msg_id for issue comments", e, False)

self.logger.info(f"log of the length of c_pk_source_comments {len(c_pk_source_comments)}.")

Expand All @@ -329,10 +325,7 @@ def issue_comments_insert(inc_issue_comments, comment_action_map):
c_pk_source_comments, self.issues_table, ['issue_url'], ['issue_url']
)
except Exception as e:
self.logger.info(f"exception registered in enrich_data_primary_keys for message_ref issues table: {e}.. exception registered")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
self.print_traceback("enrich data primary keys for getting issue_id for issue comments", e, False)

issue_message_ref_insert = [
{
Expand All @@ -353,10 +346,7 @@ def issue_comments_insert(inc_issue_comments, comment_action_map):
unique_columns=comment_ref_action_map['insert']['augur']
)
except Exception as e:
self.logger.info(f"exception registered in bulk insert for issue_msg_ref_table: {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
self.print_traceback("bulk insert on issue_msg_ref_table", e, False)

# list to hold contributors needing insertion or update
try:
Expand Down Expand Up @@ -385,10 +375,7 @@ def issue_comments_insert(inc_issue_comments, comment_action_map):
return

except Exception as e:
self.logger.info(f"exception registered in paginate endpoint for issue comments: {e}")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
self.print_traceback("paginate endpoint for issue comments", e, False)

def issue_events_model(self, pk_source_issues):

Expand Down Expand Up @@ -546,11 +533,8 @@ def is_nan(value):
# assignees_all += source_assignees

# self.logger.info(f"Total of assignee's is: {assignees_all}. Labels are next.")
except Exception as e:
self.logger.debug(f'assignee exception: {e}.')
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
except Exception as e:
self.print_traceback("when creating source assignees list", e, False)

finally:

Expand Down Expand Up @@ -598,12 +582,8 @@ def is_nan(value):

# Closed issues, update with closer id
''' TODO: Right here I am not sure if the update columns are right, and will catch the state changes. '''
except Exception as e:
self.logger.debug(f'assignee exception: {e}.')
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass

except Exception as e:
self.print_traceback("issue assignees", e, True)

try:

Expand All @@ -612,7 +592,7 @@ def is_nan(value):
update_columns=['cntrb_id', 'issue_state', 'closed_at']
)
except Exception as e:
self.logger.info(f"Bulk insert failed on {e}. exception registerred.")
self.print_traceback("bulk insert on issues table", e, False)

''' Action maps are used to determine uniqueness based on the natural key at the source. '''

Expand Down
12 changes: 4 additions & 8 deletions workers/pull_request_worker/review_model_outfactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,8 @@ def pull_request_reviews_model(self, pk_source_prs=[]):
unique_columns=review_action_map['insert']['augur'],
update_columns=review_action_map['update']['augur']
)
except Exception as e:
self.logger.debug(f"PR reviews data model failed on {e}. exception registered.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
except Exception as e:
self.print_traceback("PR reviews data model", e, True)

# Merge source data to inserted data to have access to inserted primary keys

Expand Down Expand Up @@ -261,7 +259,5 @@ def pull_request_reviews_model(self, pk_source_prs=[]):
self.pull_request_review_message_ref_table,
insert=pr_review_msg_ref_insert, unique_columns = review_msg_ref_action_map['insert']['augur']
)
except Exception as e:
self.logger.debug(f"bulk insert for review message ref failed on : {e}")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
except Exception as e:
self.print_traceback("bulk insert for review message ref", e, True)
26 changes: 10 additions & 16 deletions workers/worker_git_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,18 +315,16 @@ def enrich_cntrb_id(
user_unique_ids.append(row['gh_user_id'])

except KeyError:
self.logger.info("Source data doesn't have user.id. Using node_id instead.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
self.print_traceback("Enrich_cntrb_id, data doesn't have user.id. Using node_id instead", e, True)

finally:
for row in table_values_cntrb:
try:
user_unique_ids.append(row['gh_node_id'])
except Exception as e:
self.logger.info(f"Error adding gh_node_id: {e}. Row: {row}")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
try:
user_unique_ids.append(row['gh_node_id'])
except Exception as e:
self.logger.info(f"Error adding gh_node_id: {e}. Row: {row}")
self.print_traceback("", e, True)



#self.logger.info(f"gh_user_ids: {gh_user_ids}")
Expand Down Expand Up @@ -1379,12 +1377,8 @@ def load_url(url, extra_data={}):

## Added additional exception logging and a pass in this block.
except Exception as e:
self.logger.debug(
f"{url} generated an exception: count is {count}, attemts are {attempts}."
)
stacker = traceback.format_exc()
self.logger.debug(f"\n\n{stacker}\n\n")
pass
self.logger.info(f"Error adding gh_node_id: {e}. Row: {row}")
self.print_traceback(f"{url} generated an exception: count is {count}, attemts are {attempts}.", e, True)

attempts += 1

Expand Down
45 changes: 26 additions & 19 deletions workers/worker_persistance.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,7 @@ def sync_df_types(self, subject, source, subject_columns, source_columns):
#self.logger.info(f"Type dict at {subject_columns[index]} is : {type(source[source_index].values[0])}")
except Exception as e:
self.logger.info(f"Source data registered exception: {source[source_index]}")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
self.print_traceback("", e, True)

subject = subject.astype(type_dict)

Expand Down Expand Up @@ -835,9 +834,7 @@ def psql_insert_copy(table, conn, keys, data_iter):
self.logger.info(f"{e}")
dbapi_conn.rollback()
except Exception as e:
self.logger.debug(f"Bulk insert error: {e}. exception registered")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
self.print_traceback("Bulk insert error", e, True)
dbapi_conn.rollback()

try:
Expand Down Expand Up @@ -950,21 +947,14 @@ def _add_nested_columns(self, df, column_names):
except ValueError as e:
# columns already added (happens if trying to expand the same column twice)
# TODO: Catch this before by only looping unique prefixs?
self.logger.debug(f"value error: {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
self.print_traceback("value error in _add_nested_columns", e, True)

except Exception as e:
self.logger.debug(f"Looking for nan user error: {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")
pass
self.print_traceback("_add_nested_columns", e, True)

finally:
self.logger.debug(f"finished _add_nested_columns.")




return df


Expand Down Expand Up @@ -1282,9 +1272,7 @@ def get_relevant_columns(self, table, action_map={}):
return relevant_columns_return
except Exception as e:
self.logger.info(f"Column may not exist in the database -- registered exception: {e}.")
stacker = traceback.format_exc()
self.logger.debug(f"{stacker}")

self.print_traceback("", e, True)

def retrieve_tuple(self, key_values, tables):
table_str = tables[0]
Expand All @@ -1307,3 +1295,22 @@ def retrieve_tuple(self, key_values, tables):
pd.read_sql(retrieveTupleSQL, self.db, params={}).to_json(orient="records")
)
return values

"""
Prints the traceback when an exception occurs
Params
exception_message: String - Explain the location that the exception occurred
exception: String - Exception object that python returns during an Exception
debug_log: Boolean - Determines whether the message is printed to the debug log or info log
Notes
To print the location of the exception to the info log and the traceback to the debug log,
add a self.logger.info call then call self.print_traceback("", e) to print the traceback to only the debug log
"""
def print_traceback(self, exception_message, exception, debug_log=True):

if debug_log:
self.logger.debug(f"{exception_message}. ERROR: {exception}", exc_info=sys.exc_info())
else:
self.logger.info(f"{exception_message}. ERROR: {exception}", exc_info=sys.exc_info())

0 comments on commit 1cfde66

Please sign in to comment.