[CICO-6] added pre-commit, added ruff Github action, added ruff pre-c…

…ommit, started working on fixing add_item method, updated requirements.txt, started working on PyPi structure
diging · Nov 21, 2024 · 0039256 · 0039256
1 parent a1b73bd
commit 0039256
Show file tree

Hide file tree

Showing 12 changed files with 161 additions and 87 deletions.
diff --git a/.github/workflows/github-actions-demo.yml b/.github/workflows/github-actions-demo.yml
@@ -0,0 +1,8 @@
+name: Ruff
+on: [ push, pull_request ]
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: astral-sh/ruff-action@v1
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,14 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.1.0
+    hooks:
+    -   id: check-yaml
+    -   id: check-ast
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.7.4
+    hooks:
+    -   id: ruff
+        args: [ --fix ]
+    - id: ruff-format
diff --git a/authentication.py b/authentication.py
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,10 @@
+cfgv==3.4.0
+distlib==0.3.9
+filelock==3.16.1
+identify==2.6.2
+nodeenv==1.9.1
+platformdirs==4.3.6
+pre_commit==4.0.1
+PyYAML==6.0.2
+ruff==0.7.4
+virtualenv==20.27.1
diff --git a/CitesphereConnector.py → src/CitesphereConnector.py b/CitesphereConnector.py → src/CitesphereConnector.py
@@ -11,30 +11,45 @@ def __init__(self, api, auth_token_object):
         self.handle_api_params()
 
     def validate(self):
-        if not hasattr(self.auth_token_object, 'authType'):
-            raise AttributeError('Missing authType attribute')
-
-        if not hasattr(self.auth_token_object, 'headers'):
-            raise AttributeError('Missing headers attribute')
-
-        if not hasattr(self.auth_token_object, 'access_token'):
-            if not hasattr(self.auth_token_object, 'username') and not hasattr(self.auth_token_object, 'password'):
-                raise AttributeError('Either username and password or access_token should be present')
-
-        if not self.auth_token_object.authType == 'oauth' and not self.auth_token_object.authType == 'basic':
+        if not hasattr(self.auth_token_object, "authType"):
+            raise AttributeError("Missing authType attribute")
+
+        if not hasattr(self.auth_token_object, "headers"):
+            raise AttributeError("Missing headers attribute")
+
+        if not hasattr(self.auth_token_object, "access_token"):
+            if not hasattr(self.auth_token_object, "username") and not hasattr(
+                self.auth_token_object, "password"
+            ):
+                raise AttributeError(
+                    "Either username and password or access_token should be present"
+                )
+
+        if (
+            not self.auth_token_object.authType == "oauth"
+            and not self.auth_token_object.authType == "basic"
+        ):
             raise Exception("authType should be either oauth or basic")
 
     def handle_api_params(self):
         if self.auth_token_object.authType == "oauth":
-            self.auth_token_object.headers = {'Authorization': 'Bearer {}'.format(self.auth_token_object.access_token)}
+            self.auth_token_object.headers = {
+                "Authorization": "Bearer {}".format(self.auth_token_object.access_token)
+            }
         elif self.auth_token_object.authType == "basic":
-            auth_str = '{}:{}'.format(self.auth_token_object.username, self.auth_token_object.password)
-            auth_b64 = base64.b64encode(auth_str.encode('ascii'))
-            self.auth_token_object.headers = {'Authorization': 'Basic {}'.format(auth_b64)}
+            auth_str = "{}:{}".format(
+                self.auth_token_object.username, self.auth_token_object.password
+            )
+            auth_b64 = base64.b64encode(auth_str.encode("ascii"))
+            self.auth_token_object.headers = {
+                "Authorization": "Basic {}".format(auth_b64)
+            }
 
     def execute_command(self, url):
         try:
-            response = urllib2.urlopen(urllib2.Request(url, headers=self.auth_token_object.headers))
+            response = urllib2.urlopen(
+                urllib2.Request(url, headers=self.auth_token_object.headers)
+            )
             data = json.load(response)
 
             return data
@@ -48,7 +63,7 @@ def get_user(self):
     def check_test(self):
         url = f"{self.api}/v1/test"
         return self.execute_command(url)
-    
+
     def check_access(self, document_id):
         url = f"{self.api}/files/giles/{document_id}/access/check"
         return self.execute_command(url)
@@ -75,20 +90,25 @@ def get_collections(self, zotero_group_id):
         return self.execute_command(url)
 
     def get_collection_items(self, zotero_group_id, collection_id, page_number=0):
-        url = f"{self.api}/v1/groups/{zotero_group_id}/collections/{collection_id}/items"
+        url = (
+            f"{self.api}/v1/groups/{zotero_group_id}/collections/{collection_id}/items"
+        )
         if page_number:
-            url = f"{url}?&page={page_number}"  
+            url = f"{url}?&page={page_number}"
         return self.execute_command(url)
 
     def get_item_info(self, zotero_group_id, item_id):
         url = f"{self.api}/v1/groups/{zotero_group_id}/items/{item_id}"
         return self.execute_command(url)
-    
+
     def get_collections_by_collection_id(self, zotero_group_id, collection_id):
         url = f"{self.api}/groups/{zotero_group_id}/collections/{collection_id}/collections"
         return self.execute_command(url)
 
-    def add_item(self, group_id):
+    def add_item(self, group_id, file_path):
+        # with open(file_path, "rb") as file:
+        # files = {"file": file}
+        # response = requests.post(url, files=files)
+
         url = f"{self.api}/v1/groups/{group_id}/items/create"
         return self.execute_command(url)
-
diff --git a/src/__pycache__/CitesphereConnector.cpython-310.pyc b/src/__pycache__/CitesphereConnector.cpython-310.pyc
diff --git a/src/__pycache__/authentication.cpython-310.pyc b/src/__pycache__/authentication.cpython-310.pyc
diff --git a/src/__pycache__/constants.cpython-310.pyc b/src/__pycache__/constants.cpython-310.pyc
diff --git a/src/authentication.py b/src/authentication.py
@@ -0,0 +1,14 @@
+class AuthObject:
+    def __init__(
+        self,
+        authType=None,
+        headers=None,
+        username=None,
+        password=None,
+        access_token=None,
+    ):
+        self.authType = authType
+        self.headers = headers
+        self.username = username
+        self.password = password
+        self.access_token = access_token
diff --git a/constants.py → src/constants.py b/constants.py → src/constants.py
@@ -1,3 +1,3 @@
 CITESPHERE_API_URL = "https://diging-dev.asu.edu/citesphere-review/api"
 MAX_SIZE = 50
-GILES_URL = f"https://diging.asu.edu/geco-giles-staging/api/v2/resources/files/"
+GILES_URL = "https://diging.asu.edu/geco-giles-staging/api/v2/resources/files/"
diff --git a/csvGenerator.ipynb → src/csvGenerator.ipynb b/csvGenerator.ipynb → src/csvGenerator.ipynb
@@ -10,9 +10,8 @@
     "import os\n",
     "import csv\n",
     "import math\n",
-    "import random\n",
     "import requests\n",
-    "import constants as const \n",
+    "import constants as const\n",
     "from authentication import AuthObject\n",
     "from CitesphereConnector import CitesphereConnector"
    ]
@@ -25,15 +24,16 @@
    "outputs": [],
    "source": [
     "auth_object = AuthObject()\n",
-    "auth_object.authType  = 'oauth'\n",
+    "auth_object.authType = \"oauth\"\n",
     "auth_object.access_token = \"f5f7e899-30d3-4531-8b2e-8009e9969ed4\"\n",
     "citesphere_api_url = const.CITESPHERE_API_URL\n",
     "connector = CitesphereConnector(citesphere_api_url, auth_object)\n",
-    "#default max number of items displayed on a collection items page in citesphere\n",
-    "max_size=const.MAX_SIZE\n",
+    "# default max number of items displayed on a collection items page in citesphere\n",
+    "max_size = const.MAX_SIZE\n",
     "\n",
-    "def get_file(file_id:str)-> str:\n",
-    "    return const.GILES_URL+\"{}/content\".format(file_id)"
+    "\n",
+    "def get_file(file_id: str) -> str:\n",
+    "    return const.GILES_URL + \"{}/content\".format(file_id)"
    ]
   },
   {
@@ -67,7 +67,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "groups=connector.get_groups()"
+    "groups = connector.get_groups()"
    ]
   },
   {
@@ -107,35 +107,33 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#download files from the collection items\n",
-    "def download_files(folder_path:str,ids:set, citesphere_token:str) -> list:\n",
-    "    \n",
-    "    #stores paths to downloaded files\n",
+    "# download files from the collection items\n",
+    "def download_files(folder_path: str, ids: set, citesphere_token: str) -> list:\n",
+    "    # stores paths to downloaded files\n",
     "    path_list = []\n",
-    "    \n",
-    "    #iterating through the ids list\n",
-    "    for (file_id, file_name) in ids:\n",
-    "        \n",
+    "\n",
+    "    # iterating through the ids list\n",
+    "    for file_id, file_name in ids:\n",
     "        # getting the file ur using giles file id\n",
     "        giles_url = get_file(file_id)\n",
     "        os.makedirs(folder_path, exist_ok=True)\n",
     "        filename = os.path.join(folder_path, f\"{file_name}\")\n",
-    "        \n",
-    "        #append the path of the saved file to the folder\n",
+    "\n",
+    "        # append the path of the saved file to the folder\n",
     "        path_list.append(filename)\n",
-    "        \n",
-    "        #header for get request\n",
+    "\n",
+    "        # header for get request\n",
     "        headers = {\n",
     "            \"Authorization\": f\"Bearer {citesphere_token}\",\n",
-    "            \"Content-Type\": \"application/pdf;charset=UTF-8\"\n",
-    "            }\n",
+    "            \"Content-Type\": \"application/pdf;charset=UTF-8\",\n",
+    "        }\n",
     "        response = requests.get(giles_url, headers=headers)\n",
-    "        \n",
-    "        #saving the file if retrieved successfully\n",
+    "\n",
+    "        # saving the file if retrieved successfully\n",
     "        if response.status_code == 200:\n",
     "            with open(filename, \"wb\") as file:\n",
     "                file.write(response.content)\n",
-    "    return path_list "
+    "    return path_list"
    ]
   },
   {
@@ -226,8 +224,7 @@
    "source": [
     "# Create the CSV file with all the metadata and file path of the downloaded files\n",
     "def write_to_csv(csv_name: str, item: list, flag: int) -> None:\n",
-    "\n",
-    "    with open(csv_name, 'a', newline='') as file:\n",
+    "    with open(csv_name, \"a\", newline=\"\") as file:\n",
     "        writer = csv.writer(file)\n",
     "\n",
     "        # Check if it's the first time writing to the file\n",
@@ -236,7 +233,7 @@
     "            writer.writerow(fields)\n",
     "\n",
     "        # Write the values to the CSV file\n",
-    "        writer.writerow(list(item.values()))\n"
+    "        writer.writerow(list(item.values()))"
    ]
   },
   {
@@ -275,8 +272,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "def add_to_csv(csv_name: str, folder_name: str, items: list, csv_dict: dict, flag: int) -> int:\n",
-    "\n",
+    "def add_to_csv(\n",
+    "    csv_name: str, folder_name: str, items: list, csv_dict: dict, flag: int\n",
+    ") -> int:\n",
     "    for item in items[\"items\"]:\n",
     "        if item[\"key\"] in csv_dict:\n",
     "            continue\n",
@@ -291,16 +289,28 @@
     "            for values in items_list:\n",
     "                # Getting the file IDs in uploadedFile, extractedText, pages, image, text, ocr, additionalFiles\n",
     "                if values[\"uploadedFile\"] and values[\"uploadedFile\"] != \"None\":\n",
-    "                    giles_ids.add((values[\"uploadedFile\"][\"id\"], values[\"uploadedFile\"][\"filename\"]))\n",
+    "                    giles_ids.add(\n",
+    "                        (\n",
+    "                            values[\"uploadedFile\"][\"id\"],\n",
+    "                            values[\"uploadedFile\"][\"filename\"],\n",
+    "                        )\n",
+    "                    )\n",
     "\n",
     "                # Check if extractedText is present and not equal to \"None\"\n",
     "                if values[\"extractedText\"] and values[\"extractedText\"] != \"None\":\n",
-    "                    giles_ids.add((values[\"extractedText\"][\"id\"], values[\"extractedText\"][\"filename\"]))\n",
+    "                    giles_ids.add(\n",
+    "                        (\n",
+    "                            values[\"extractedText\"][\"id\"],\n",
+    "                            values[\"extractedText\"][\"filename\"],\n",
+    "                        )\n",
+    "                    )\n",
     "\n",
     "                # Check if pages is present and not equal to \"None\"\n",
     "                if values[\"pages\"] and values[\"pages\"] != \"None\":\n",
     "                    for value in values[\"pages\"]:\n",
-    "                        giles_ids.add((value[\"image\"][\"id\"], value[\"image\"][\"filename\"]))\n",
+    "                        giles_ids.add(\n",
+    "                            (value[\"image\"][\"id\"], value[\"image\"][\"filename\"])\n",
+    "                        )\n",
     "                        giles_ids.add((value[\"text\"][\"id\"], value[\"text\"][\"filename\"]))\n",
     "                        giles_ids.add((value[\"ocr\"][\"id\"], value[\"ocr\"][\"filename\"]))\n",
     "\n",
@@ -310,14 +320,16 @@
     "\n",
     "            if giles_ids:\n",
     "                # store paths of the downloaded files to the path attribute\n",
-    "                item[\"paths\"] = download_files(folder_name, giles_ids, auth_object.access_token)\n",
+    "                item[\"paths\"] = download_files(\n",
+    "                    folder_name, giles_ids, auth_object.access_token\n",
+    "                )\n",
     "\n",
     "        # Add the item to csv_dict and write it to the CSV file\n",
     "        csv_dict[item[\"key\"]] = item\n",
     "        write_to_csv(csv_name, item, flag)\n",
     "        flag = 1\n",
     "\n",
-    "    return flag\n"
+    "    return flag"
    ]
   },
   {
@@ -358,26 +370,29 @@
    "outputs": [],
    "source": [
     "# Downloads and generates a CSV file containing all the group items information\n",
-    "def process_groups(csv_name: str, folder_path: str, groups: list, connector, max_size: int) -> dict:\n",
-    "\n",
+    "def process_groups(\n",
+    "    csv_name: str, folder_path: str, groups: list, connector, max_size: int\n",
+    ") -> dict:\n",
     "    csv_dict = {}\n",
     "    flag = 0\n",
-    "    \n",
-    "    #Iterate over the groups\n",
+    "\n",
+    "    # Iterate over the groups\n",
     "    for group in groups:\n",
     "        group_id = group[\"id\"]\n",
     "        collections = connector.get_collections(group_id)\n",
-    "        \n",
-    "        #Iterate over the collections in the respective group\n",
+    "\n",
+    "        # Iterate over the collections in the respective group\n",
     "        for collection in collections[\"collections\"]:\n",
     "            num_pages = math.ceil(collection[\"numberOfItems\"] / max_size)\n",
-    "            \n",
-    "            #Iterating over the pages\n",
+    "\n",
+    "            # Iterating over the pages\n",
     "            for page in range(1, num_pages + 1):\n",
-    "                items = connector.get_collection_items(group_id, collection[\"key\"], page)\n",
+    "                items = connector.get_collection_items(\n",
+    "                    group_id, collection[\"key\"], page\n",
+    "                )\n",
     "                flag = add_to_csv(csv_name, folder_path, items, csv_dict, flag)\n",
     "\n",
-    "    return csv_dict\n"
+    "    return csv_dict"
    ]
   },
   {
@@ -391,9 +406,9 @@
    "source": [
     "csv_filename = \"citesphere_csv.csv\"\n",
     "\n",
-    "folder_path  = \"Files\"\n",
+    "folder_path = \"Files\"\n",
     "\n",
-    "process_groups(csv_filename,folder_path,groups,connector,max_size)\n"
+    "process_groups(csv_filename, folder_path, groups, connector, max_size)"
    ]
   },
   {