Skip to content

Commit

Permalink
wip import new "verlinkungen" #100; ToDo: run against server
Browse files Browse the repository at this point in the history
  • Loading branch information
csae8092 committed Sep 19, 2024
1 parent 4ab26be commit 72ed508
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ node_modules

# other things
.env
*.ipynb
Untitled*.ipynb
env.secret
staticfiles
media/event_error.csv
Expand Down
147 changes: 147 additions & 0 deletions issue__100_verlinkungen.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "86caea5a-01df-46b0-84d7-9e4a989ddc56",
"metadata": {},
"outputs": [],
"source": [
"from django.core.exceptions import ObjectDoesNotExist\n",
"from tqdm import tqdm\n",
"\n",
"from archiv.models import Event, Person, Place\n",
"from archiv.import_utils import gsheet_to_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a6172706-ae6d-4076-a676-95df531b9fca",
"metadata": {},
"outputs": [],
"source": [
"sheet_id = \"1agkc8QZuNnZTzRNW88y6UhizidgeXCvcyqQ3OGSCfm8\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec125df7-d53b-469d-acab-d70e4888a4a6",
"metadata": {},
"outputs": [],
"source": [
"df = gsheet_to_df(sheet_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1d33ec3-6189-4fba-a5fc-c1c3850a3dd7",
"metadata": {},
"outputs": [],
"source": [
"for g, ndf in tqdm(df.groupby(\"entity\")):\n",
" ent = Person.objects.get(id=g)\n",
" for i, row in ndf.iterrows():\n",
" try:\n",
" event = Event.objects.get(id=row[\"article\"])\n",
" except ObjectDoesNotExist:\n",
" print(row[\"article\"])\n",
" event.person.add(ent)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a3143d02-77d6-41d3-84fa-cf30f10ffccc",
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b0f0ab8a-455d-4495-b61e-19872925314b",
"metadata": {},
"outputs": [],
"source": [
"sheet_id = \"1VV47bmUTi5GQwber7xJhHvj95XmsYI-Jrswaw1hprV4\"\n",
"df = gsheet_to_df(sheet_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "43ca3a09-2b5a-4c93-aac3-4e02a16c7554",
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04a27e79-17c2-4899-9f0e-9e1df89b0fc9",
"metadata": {},
"outputs": [],
"source": [
"event_does_not_exist = []\n",
"for g, ndf in tqdm(df.groupby(\"entity\")):\n",
" ent = Place.objects.get(id=g)\n",
" for i, row in ndf.iterrows():\n",
" try:\n",
" event = Event.objects.get(id=row[\"article\"])\n",
" except ObjectDoesNotExist:\n",
" event_does_not_exist.append(row[\"article\"])\n",
" continue\n",
" event.place.add(ent)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "88102882-a127-4f60-9879-c02d5e2fd807",
"metadata": {},
"outputs": [],
"source": [
"len(event_does_not_exist)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6fd1e8ee-e674-4fdf-9da1-9d80524d6043",
"metadata": {},
"outputs": [],
"source": [
"_"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Django Shell-Plus",
"language": "python",
"name": "django_extensions"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 72ed508

Please sign in to comment.