diff --git a/datamule/datamule/__pycache__/monitor.cpython-311.pyc b/datamule/datamule/__pycache__/monitor.cpython-311.pyc index 6509273a..26616cd4 100644 Binary files a/datamule/datamule/__pycache__/monitor.cpython-311.pyc and b/datamule/datamule/__pycache__/monitor.cpython-311.pyc differ diff --git a/datamule/datamule/data/sgml_compression.dict b/datamule/datamule/data/sgml_compression.dict new file mode 100644 index 00000000..e8fc6495 Binary files /dev/null and b/datamule/datamule/data/sgml_compression.dict differ diff --git a/datamule/datamule/mapping_dicts/txt_mapping_dicts.py b/datamule/datamule/mapping_dicts/txt_mapping_dicts.py index f0516371..5ef1201c 100644 --- a/datamule/datamule/mapping_dicts/txt_mapping_dicts.py +++ b/datamule/datamule/mapping_dicts/txt_mapping_dicts.py @@ -30,9 +30,11 @@ } item_pattern_mapping = r"^\n\n\s*(ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?" +item_pattern_mapping_8k = r"^\n\n\s*(ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?" part_pattern_mapping = r"^\n\n\s*(PART|Part)\s+(?:I{1,3}|IV)\.?" item_pattern_standardization = r"^\s*(?:ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?" +item_pattern_standardization_8k = r"^\s*(?:ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN)\.?" part_pattern_standardization = r"^\s*(?:PART|Part)\s+([IVX]+)" @@ -194,7 +196,7 @@ { "type": "hierarchy", "name": "item", - "pattern": item_pattern_mapping, + "pattern": item_pattern_mapping_8k, "hierarchy": 0 }, ]) @@ -204,7 +206,7 @@ "type": "standardize", "match": { "type": "item", - "text_pattern": item_pattern_standardization + "text_pattern": item_pattern_standardization_8k }, "output": { "format": "item{}", diff --git a/datamule/docs/source/changelog.rst b/datamule/docs/source/changelog.rst index 50092316..f7066d77 100644 --- a/datamule/docs/source/changelog.rst +++ b/datamule/docs/source/changelog.rst @@ -1,7 +1,8 @@ Changelog ========= -v01.0.3 2025-02-10 +v01.0.3 - modified item mapping dict regex to be more robust. +- modified Monitor().monitor_submissions() to have a start_date parameter. v01.0.2 2025-02-06 - modified .parse() to return dictionary in previous package format. diff --git a/examples/earnings_report.py b/examples/earnings_report.py new file mode 100644 index 00000000..6bdd79cf --- /dev/null +++ b/examples/earnings_report.py @@ -0,0 +1,32 @@ +# Download earnings announcements for a given date range and save them to a new folder. + +from datamule import Portfolio +from pathlib import Path +import shutil + +portfolio = Portfolio('8K') + +portfolio.download_submissions(submission_type='8-K',filing_date=('2020-01-01','2020-01-31')) + +def process_submission(submission): + try: + for document in submission.document_type(['8-K']): + document.parse() + + if 'item2.02' in document.data['document'].keys(): + for document in submission.document_type(['EX-99.1']): + return document.path + except: + return None + + +paths = portfolio.process_submissions(process_submission) +paths = [p for p in paths if p is not None] + +new_folder = Path('earnings_announcements') +new_folder.mkdir(exist_ok=True) + +for path in paths: + new_path = new_folder / path.name + shutil.copy(path, new_path) # Copy the file +