Skip to content

Commit

Permalink
fff
Browse files Browse the repository at this point in the history
  • Loading branch information
hynky1999 committed Oct 24, 2023
1 parent a6bab67 commit 8b4ca74
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 7 deletions.
6 changes: 3 additions & 3 deletions .env.sample
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MYSQL_HOST=localhost:3306
MYSQL_HOST=127.0.0.1
MYSQL_PORT=3306
MYSQL_USER=
MYSQL_PASSWORD=
MYSQL_USER=root
MYSQL_PASSWORD=root
MYSQL_DB_NAME=test_cc
MYSQL_TABLE_NAME=ccindex
2 changes: 1 addition & 1 deletion cmoncrawl/aggregator/athena_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def __init__(
max_retry: int = 5,
extra_sql_where_clause: str | None = None,
batch_size: int = 1,
aws_profile: str = "default",
aws_profile: Optional[str] = None,
bucket_name: Optional[str] = None,
catalog_name: str = "AwsDataCatalog",
database_name: str = "commoncrawl",
Expand Down
6 changes: 4 additions & 2 deletions tests/athena_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from unittest.mock import patch

import boto3
from tests.utils import MySQLRecordsDB
from tests.utils import MySQLRecordsDB, set_up_aws_credentials_testing
import aioboto3

from cmoncrawl.aggregator.athena_query import (
Expand Down Expand Up @@ -120,6 +120,7 @@ async def asyncSetUp(self) -> None:
"https://index.commoncrawl.org/CC-MAIN-2021-09-index",
"https://index.commoncrawl.org/CC-MAIN-2020-50-index",
]
set_up_aws_credentials_testing()

def test_prepare_athena_sql_query_multiple_urls(self):
query = prepare_athena_sql_query(
Expand Down Expand Up @@ -242,6 +243,7 @@ def setUp(self) -> None:
self.mock_s3.start()
self.mock_athena = mock_athena()
self.mock_athena.start()
set_up_aws_credentials_testing()

def tearDown(self) -> None:
self.mock_s3.stop()
Expand Down Expand Up @@ -546,7 +548,7 @@ async def test_batch_size_zero(self):

async def test_extra_sql_where(self):
self.domains = ["seznam.cz"]
where_clause = "cc.fetch_status != 200"
where_clause = 'cc.warc_filename = "filename1"'
self.iterator = AthenaAggregator.AthenaAggregatorIterator(
aws_client=self.aws_client,
domains=self.domains,
Expand Down
12 changes: 11 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def seed_db(self):
100,
200,
"CC-MAIN-2021-05",
100,
200,
"warc",
],
[
Expand Down Expand Up @@ -130,3 +130,13 @@ def setUp(self):
def tearDown(self):
self.remove_db()
self.db.close()


def set_up_aws_credentials_testing():
import os

os.environ["AWS_ACCESS_KEY_ID"] = "testing"
os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
os.environ["AWS_SECURITY_TOKEN"] = "testing"
os.environ["AWS_SESSION_TOKEN"] = "testing"
os.environ["AWS_DEFAULT_REGION"] = "us-east-1"

0 comments on commit 8b4ca74

Please sign in to comment.