From 98cad17ae60341a4ecc2940ed956dc340798f72c Mon Sep 17 00:00:00 2001 From: Michael Chin Date: Wed, 31 May 2023 16:23:41 -0700 Subject: [PATCH 1/2] Replace deprecated Pandas append method --- .../People-Analytics-using-Neptune-ML.ipynb | 60 ++++++++++++++----- .../04-Machine-Learning/neptune_ml_utils.py | 11 +++- 2 files changed, 52 insertions(+), 19 deletions(-) diff --git a/src/graph_notebook/notebooks/04-Machine-Learning/Sample-Applications/01-People-Analytics/People-Analytics-using-Neptune-ML.ipynb b/src/graph_notebook/notebooks/04-Machine-Learning/Sample-Applications/01-People-Analytics/People-Analytics-using-Neptune-ML.ipynb index 89780c9d..cd0294d2 100644 --- a/src/graph_notebook/notebooks/04-Machine-Learning/Sample-Applications/01-People-Analytics/People-Analytics-using-Neptune-ML.ipynb +++ b/src/graph_notebook/notebooks/04-Machine-Learning/Sample-Applications/01-People-Analytics/People-Analytics-using-Neptune-ML.ipynb @@ -332,6 +332,12 @@ "role_dept_list = []\n", "role_field_list = []\n", "\n", + "edge_emp_dept_rows_list = [edge_emp_dept]\n", + "edge_emp_role_rows_list = [edge_emp_role]\n", + "edge_emp_field_rows_list = [edge_emp_field]\n", + "edge_role_dept_rows_list = [edge_role_dept]\n", + "edge_role_field_rows_list = [edge_role_field]\n", + "\n", "for index, row in df.iterrows():\n", " emp = row['EmployeeNumber']\n", " emp_id = emp_map[emp]\n", @@ -341,30 +347,52 @@ " field_id = field_map[field]\n", " dept = row['Department']\n", " dept_id = dept_map[dept]\n", - " \n", - " edge_emp_dept = edge_emp_dept.append({'~id': uuid.uuid4(), '~from': emp_id, \n", - " '~to': dept_id, \n", - " '~label': 'works_in'}, ignore_index=True)\n", - " edge_emp_role = edge_emp_role.append({'~id': uuid.uuid4(), '~from': emp_id, \n", - " '~to': role_id, \n", - " '~label': 'works_as'}, ignore_index=True)\n", - " edge_emp_field = edge_emp_field.append({'~id': uuid.uuid4(), '~from': emp_id, \n", - " '~to': field_id, \n", - " '~label': 'has_education_level'}, ignore_index=True)\n", + "\n", + " edge_emp_dept_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(),\n", + " '~from': emp_id,\n", + " '~to': dept_id,\n", + " '~label': 'works_in'},\n", + " orient='index').T\n", + " edge_emp_dept_rows_list.append(edge_emp_dept_row_df)\n", + " edge_emp_role_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(),\n", + " '~from': emp_id,\n", + " '~to': role_id,\n", + " '~label': 'works_as'},\n", + " orient='index').T\n", + " edge_emp_role_rows_list.append(edge_emp_role_row_df)\n", + " edge_emp_field_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(),\n", + " '~from': emp_id,\n", + " '~to': field_id,\n", + " '~label': 'has_education_level'},\n", + " orient='index').T\n", + " edge_emp_field_rows_list.append(edge_emp_field_row_df)\n", " \n", " role_dept = f\"{role_id}-{dept_id}\"\n", " role_field = f\"{role_id}-{field_id}\"\n", " if role_dept not in role_dept_list:\n", - " edge_role_dept = edge_role_dept.append({'~id': uuid.uuid4(), '~from': role_id, \n", - " '~to': dept_id, \n", - " '~label': 'part_of'}, ignore_index=True)\n", + " edge_role_dept_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(),\n", + " '~from': role_id,\n", + " '~to': dept_id,\n", + " '~label': 'part_of'},\n", + " orient='index').T\n", + " edge_role_dept_rows_list.append(edge_role_dept_row_df)\n", + " #edge_role_dept = pd.concat([edge_role_dept, edge_role_dept_row_df], ignore_index=True)\n", " role_dept_list.append(role_dept)\n", " if role_field not in role_field_list:\n", - " edge_role_field = edge_role_field.append({'~id': uuid.uuid4(), '~from': role_id, \n", - " '~to': field_id, \n", - " '~label': 'requires'}, ignore_index=True)\n", + " edge_role_field_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(), '~from': role_id,\n", + " '~to': field_id,\n", + " '~label': 'requires'},\n", + " orient='index').T\n", + " edge_role_field_rows_list.append(edge_role_field_row_df)\n", " role_field_list.append(role_field)\n", " edge_cnt = edge_cnt + 1\n", + "\n", + "edge_emp_dept = pd.concat(edge_emp_dept_rows_list, ignore_index=True)\n", + "edge_emp_role = pd.concat(edge_emp_role_rows_list, ignore_index=True)\n", + "edge_emp_field = pd.concat(edge_emp_field_rows_list, ignore_index=True)\n", + "edge_role_dept = pd.concat(edge_role_dept_rows_list, ignore_index=True)\n", + "edge_role_field = pd.concat(edge_role_field_rows_list, ignore_index=True)\n", + "\n", "edge_df = pd.concat([edge_emp_dept, edge_emp_role, edge_emp_field, edge_role_dept, edge_role_field])\n", "edge_df.to_csv(os.path.join(output_folder, 'edge.csv'), index=False)\n", "\n", diff --git a/src/graph_notebook/notebooks/04-Machine-Learning/neptune_ml_utils.py b/src/graph_notebook/notebooks/04-Machine-Learning/neptune_ml_utils.py index be5dbf98..4818e389 100644 --- a/src/graph_notebook/notebooks/04-Machine-Learning/neptune_ml_utils.py +++ b/src/graph_notebook/notebooks/04-Machine-Learning/neptune_ml_utils.py @@ -360,18 +360,23 @@ def __process_movies_genres(self): genre_df['name'] = genre_df['~id'] genre_df.to_csv(os.path.join(self.formatted_directory, 'genre_vertex.csv'), index=False) + genres_edge_df_rows_list = [genres_edges_df] # Loop through all the movies and pull out the genres for index, row in movie_genre_df.iterrows(): genre_lst = [] for g in genres: if row[g] == 1: - genres_edges_df = genres_edges_df.append( - {'~id': f"{row['~id']}-included_in-{g}", '~label': 'included_in', - '~from': row['~id'], '~to': g}, ignore_index=True) + row_as_df = pd.DataFrame.from_dict({'~id': f"{row['~id']}-included_in-{g}", + '~label': 'included_in', + '~from': row['~id'], + '~to': g}, + orient='index').T + genres_edge_df_rows_list.append(row_as_df) genre_lst.append(g) movies_df.loc[index, 'genre:String[]'] = ';'.join(genre_lst) + genres_edges_df = pd.concat(genres_edge_df_rows_list, ignore_index=True) # rename the release data column to specify the data type movies_df['release_date:Date'] = movies_df['release_date'] # Drop the genre columns as well as the uneeded release date columns From 06e575f63bab85a1a2460f863a0c33390666bcff Mon Sep 17 00:00:00 2001 From: Michael Chin Date: Wed, 31 May 2023 16:36:05 -0700 Subject: [PATCH 2/2] update changelog --- ChangeLog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog.md b/ChangeLog.md index ea47b468..93acd6d5 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -11,6 +11,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd - Added support for setting `%graph_notebook_vis_options` from a variable ([Link to PR](https://github.com/aws/graph-notebook/pull/487)) - Pinned JupyterLab<4.x to fix Python 3.8/3.10 builds ([Link to PR](https://github.com/aws/graph-notebook/pull/490)) - Changed datatype of "amount" from String to numeric for "Transaction" vertices in Fraud Graph sample notebook ([Link to PR](https://github.com/aws/graph-notebook/pull/489)) +- Replaced usages of deprecated DataFrame.append method in ML samples([Link to PR](https://github.com/aws/graph-notebook/pull/495)) ## Release 3.8.1 (April 17, 2023) - Reinstate Python 3.7 support for compatibility with legacy AL1 Neptune Notebooks ([Link to PR](https://github.com/aws/graph-notebook/pull/479))