From 422e1090a82116c8d77ea8377988792fc7dee243 Mon Sep 17 00:00:00 2001 From: thomas loubrieu Date: Thu, 16 Jan 2025 13:28:27 -0500 Subject: [PATCH 1/2] make dev_mode shorten the legacy registry synchronization --- docker/sweepers_driver.py | 3 ++- setup.cfg | 3 +++ .../legacy_registry_sync/legacy_registry_sync.py | 5 +++++ src/pds/registrysweepers/reindexer/main.py | 2 +- src/pds/registrysweepers/utils/db/client.py | 6 +++--- src/pds/registrysweepers/utils/misc.py | 4 ++++ 6 files changed, 18 insertions(+), 5 deletions(-) diff --git a/docker/sweepers_driver.py b/docker/sweepers_driver.py index c8d288f..a4df683 100755 --- a/docker/sweepers_driver.py +++ b/docker/sweepers_driver.py @@ -68,11 +68,12 @@ from pds.registrysweepers.utils import configure_logging, parse_log_level from pds.registrysweepers.utils.db.client import get_opensearch_client_from_environment from pds.registrysweepers.utils.misc import get_human_readable_elapsed_since +from pds.registrysweepers.utils.misc import is_dev_mode configure_logging(filepath=None, log_level=logging.INFO) log = logging.getLogger(__name__) -dev_mode = str(os.environ.get("DEV_MODE")).lower() not in {'none', '', '0', 'false'} +dev_mode = is_dev_mode() if dev_mode: log.warning('Operating in development mode - host verification disabled') import urllib3 diff --git a/setup.cfg b/setup.cfg index e48ba80..6dfea4e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -69,6 +69,9 @@ dev = pre-commit~=3.3.3 sphinx~=3.2.1 sphinx-rtd-theme~=0.5.0 + types-python-dateutil==2.9.0.20241206 + + # unclear why the following pins are necessary, but without them, versions dependent on sphinx>=5.0 are installed sphinxcontrib-applehelp==1.0.4 diff --git a/src/pds/registrysweepers/legacy_registry_sync/legacy_registry_sync.py b/src/pds/registrysweepers/legacy_registry_sync/legacy_registry_sync.py index 37c12b3..953ff8e 100644 --- a/src/pds/registrysweepers/legacy_registry_sync/legacy_registry_sync.py +++ b/src/pds/registrysweepers/legacy_registry_sync/legacy_registry_sync.py @@ -9,6 +9,7 @@ from pds.registrysweepers.legacy_registry_sync.opensearch_loaded_product import get_already_loaded_lidvids from pds.registrysweepers.legacy_registry_sync.solr_doc_export_to_opensearch import SolrOsWrapperIter from pds.registrysweepers.utils import configure_logging +from pds.registrysweepers.utils.misc import is_dev_mode from solr_to_es.solrSource import SlowSolrDocs # type: ignore log = logging.getLogger(__name__) @@ -57,8 +58,12 @@ def run( tries = 0 es_actions = SolrOsWrapperIter(solr_itr, OS_INDEX, found_ids=prod_ids) + dev_mode = is_dev_mode() for ok, item in opensearchpy.helpers.streaming_bulk( client, es_actions, chunk_size=50, max_chunk_bytes=50000000, max_retries=5, initial_backoff=10 ): if not ok: log.error(item) + + if dev_mode: + break diff --git a/src/pds/registrysweepers/reindexer/main.py b/src/pds/registrysweepers/reindexer/main.py index 0231f4c..c07f19c 100644 --- a/src/pds/registrysweepers/reindexer/main.py +++ b/src/pds/registrysweepers/reindexer/main.py @@ -74,7 +74,7 @@ def fetch_dd_field_types(client: OpenSearch) -> Dict[str, str]: def get_mapping_field_types_by_field_name(client: OpenSearch, index_name: str) -> Dict[str, str]: return { - k: v["type"] for k, v in client.indices.get_mapping(index_name)[index_name]["mappings"]["properties"].items() + k: v["type"] for k, v in client.indices.get_mapping(index_name)[index_name]["mappings"]["properties"].items() # type: ignore } diff --git a/src/pds/registrysweepers/utils/db/client.py b/src/pds/registrysweepers/utils/db/client.py index b8436d6..df21964 100644 --- a/src/pds/registrysweepers/utils/db/client.py +++ b/src/pds/registrysweepers/utils/db/client.py @@ -3,11 +3,11 @@ import os from typing import Union -import boto3 +import boto3 # type: ignore import requests -from botocore.credentials import Credentials +from botocore.credentials import Credentials # type: ignore from opensearchpy import OpenSearch -from opensearchpy import RequestsAWSV4SignerAuth +from opensearchpy import RequestsAWSV4SignerAuth # type: ignore from opensearchpy import RequestsHttpConnection from requests_aws4auth import AWS4Auth # type: ignore diff --git a/src/pds/registrysweepers/utils/misc.py b/src/pds/registrysweepers/utils/misc.py index 9fed63a..3153bd2 100644 --- a/src/pds/registrysweepers/utils/misc.py +++ b/src/pds/registrysweepers/utils/misc.py @@ -141,3 +141,7 @@ def bin_elements(elements: Iterable[V], key_f: Callable[[V], K]) -> Dict[K, List result[k].append(e) return result + + +def is_dev_mode(): + return str(os.environ.get("DEV_MODE")).lower() not in {"none", "", "0", "false"} From 9b9f4841eb361b227795f9e1316f3ac88731151f Mon Sep 17 00:00:00 2001 From: thomas loubrieu Date: Thu, 16 Jan 2025 13:57:37 -0500 Subject: [PATCH 2/2] ignore type for mypy --- .github/workflows/codeql-analysis.yml | 20 ++++++++++---------- src/pds/registrysweepers/utils/db/client.py | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 46dc7be..60d8620 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -53,25 +53,25 @@ jobs: - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 - + - run: | pip install nasa-scrub - + results_dir=`realpath ${{ github.workspace }}/../results` sarif_files=`find $results_dir -name '*.sarif'` - + for sarif_file in $sarif_files do output_file="$results_dir/$(basename $sarif_file .sarif).scrub" - + python3 -m scrub.tools.parsers.translate_results $sarif_file $output_file ${{ github.workspace }} scrub done - + python3 -m scrub.tools.parsers.csv_parser $results_dir - + echo "RESULTS_DIR=$results_dir" >> $GITHUB_ENV - - + + - name: Upload CodeQL Artifacts uses: actions/upload-artifact@v4 with: @@ -96,8 +96,8 @@ jobs: uses: djdefi/cloc-action@6 with: options: --report-file=cloc.md - - + + - name: Upload SLOC uses: actions/upload-artifact@v4 diff --git a/src/pds/registrysweepers/utils/db/client.py b/src/pds/registrysweepers/utils/db/client.py index df21964..a1cf361 100644 --- a/src/pds/registrysweepers/utils/db/client.py +++ b/src/pds/registrysweepers/utils/db/client.py @@ -5,9 +5,9 @@ import boto3 # type: ignore import requests -from botocore.credentials import Credentials # type: ignore +from botocore.credentials import Credentials from opensearchpy import OpenSearch -from opensearchpy import RequestsAWSV4SignerAuth # type: ignore +from opensearchpy import RequestsAWSV4SignerAuth from opensearchpy import RequestsHttpConnection from requests_aws4auth import AWS4Auth # type: ignore