Skip to content

Commit

Permalink
Merge pull request #298 from OP-TED/feature/TED-822
Browse files Browse the repository at this point in the history
notice collection mat view
  • Loading branch information
CaptainOfHacks authored Oct 12, 2022
2 parents 8cf23bc + 5777dbf commit 411b59a
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from pymongo import MongoClient, ASCENDING, DESCENDING

from ted_sws import config

NOTICE_COLLECTION_NAME = "notice_collection"
NOTICES_MATERIALISED_VIEW_NAME = "notices_collection_materialised_view"


def create_notice_collection_materialised_view(mongo_client: MongoClient):
database = mongo_client[config.MONGO_DB_AGGREGATES_DATABASE_NAME or "aggregates_db"]
notice_collection = database[NOTICE_COLLECTION_NAME]
notice_collection.aggregate([
{
"$project": {
"_id": True,
"created_at": True,
"status": True,
"validation_summary": True,
"version_number": True,
"form_number": "$normalised_metadata.form_number",
"form_type": "$normalised_metadata.form_type",
"eu_institution": "$normalised_metadata.eu_institution",
"extracted_legal_basis_directive": "$normalised_metadata.extracted_legal_basis_directive",
"ojs_type": "$normalised_metadata.ojs_type",
"legal_basis_directive": "$normalised_metadata.legal_basis_directive",
"country_of_buyer": "$normalised_metadata.country_of_buyer",
"eforms_subtype": "$normalised_metadata.eforms_subtype",
"notice_type": "$normalised_metadata.notice_type",
"xsd_version": "$normalised_metadata.xsd_version",
"publication_date": "$normalised_metadata.publication_date",
}
}, {
"$merge": {
"into": NOTICES_MATERIALISED_VIEW_NAME
}
}
])
materialised_view = database[NOTICES_MATERIALISED_VIEW_NAME]
materialised_view.create_index([("created_at", DESCENDING)])
materialised_view.create_index([("publication_date", DESCENDING)])
materialised_view.create_index([("eu_institution", ASCENDING)])
materialised_view.create_index([("status", ASCENDING)])
materialised_view.create_index([("form_number", ASCENDING)])
materialised_view.create_index([("form_number", ASCENDING), ("status", ASCENDING)])
materialised_view.create_index([("form_number", ASCENDING), ("legal_basis_directive", ASCENDING)])
32 changes: 13 additions & 19 deletions tests/e2e/data_manager/test_mongodb_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import random

from pymongo import MongoClient
from pymongo.command_cursor import CommandCursor

from ted_sws import config
from ted_sws.data_manager.services.create_notice_collection_materialised_view import \
create_notice_collection_materialised_view, NOTICES_MATERIALISED_VIEW_NAME


def test_mongodb_client(notice_2016):
Expand Down Expand Up @@ -50,16 +51,12 @@ def test_mongodb_queries():
mongodb_client.drop_database('test')
test_db = mongodb_client['test']
objects_collection = test_db['objects']
print(type(objects_collection))
for i in range(0, 20):
objects_collection.insert_one(random_object())

unique_xpaths = objects_collection.distinct("xpath")
print(type(unique_xpaths))

unique_notice_ids = objects_collection.distinct("notices")
print("unique_notice_ids: ", unique_notice_ids)
print("unique_xpaths: ", unique_xpaths)
minimal_set_of_xpaths = []
covered_notice_ids = []
while len(unique_notice_ids):
Expand Down Expand Up @@ -87,8 +84,6 @@ def test_mongodb_queries():
if len(tmp_result):
xpaths.append(tmp_result[0])

# for xpath in xpaths:
# print(xpath)
top_xpath = sorted(xpaths, key=lambda d: d['count_notices'], reverse=True)[0]
minimal_set_of_xpaths.append(top_xpath["xpath"])
notice_ids = top_xpath["notice_ids"]
Expand All @@ -97,35 +92,25 @@ def test_mongodb_queries():
unique_notice_ids.remove(notice_id)
covered_notice_ids.append(notice_id)

print("minimal_set_of_xpaths: ", minimal_set_of_xpaths)
print("covered_notice_ids: ", covered_notice_ids)


def test_mongo_db_query_2():
uri = config.MONGO_DB_AUTH_URL
mongodb_client = MongoClient(uri)
mongodb_client.drop_database('test')
test_db = mongodb_client['test']
objects_collection = test_db['objects']
print(type(objects_collection))
for i in range(0, 3):
objects_collection.insert_one(random_object())

unique_xpaths = objects_collection.distinct("xpath")
print(type(unique_xpaths))

unique_notice_ids = objects_collection.distinct("notices")
print("unique_notice_ids: ", unique_notice_ids)
print("unique_xpaths: ", unique_xpaths)
result = objects_collection.aggregate([
{
"$group": {"_id": None,
"xpaths": {"$push": "$xpath"}
}
},
# {"$project": {"_id": 0,
# "xpaths": 1,
# }},
{
"$project": {
"_id": 0,
Expand All @@ -141,5 +126,14 @@ def test_mongo_db_query_2():
}
}
])
for r in result:
print(r)


def test_create_matview_for_notices():
uri = config.MONGO_DB_AUTH_URL
mongodb_client = MongoClient(uri)
create_notice_collection_materialised_view(mongo_client=mongodb_client)
db = mongodb_client[config.MONGO_DB_AGGREGATES_DATABASE_NAME]
assert NOTICES_MATERIALISED_VIEW_NAME in db.list_collection_names()
fields_in_the_materialised_view = db[NOTICES_MATERIALISED_VIEW_NAME].find_one().keys()
assert 'form_type' in fields_in_the_materialised_view
assert 'form_number' in fields_in_the_materialised_view

0 comments on commit 411b59a

Please sign in to comment.