Skip to content

Commit

Permalink
modified mapping dict for 8k
Browse files Browse the repository at this point in the history
  • Loading branch information
john-friedman committed Feb 12, 2025
1 parent c63c925 commit 0c7f22d
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 3 deletions.
Binary file modified datamule/datamule/__pycache__/monitor.cpython-311.pyc
Binary file not shown.
Binary file added datamule/datamule/data/sgml_compression.dict
Binary file not shown.
6 changes: 4 additions & 2 deletions datamule/datamule/mapping_dicts/txt_mapping_dicts.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,11 @@
}

item_pattern_mapping = r"^\n\n\s*(ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
item_pattern_mapping_8k = r"^\n\n\s*(ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
part_pattern_mapping = r"^\n\n\s*(PART|Part)\s+(?:I{1,3}|IV)\.?"

item_pattern_standardization = r"^\s*(?:ITEM|Item)\s+(\d+[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN|[0-9]+[a-zA-Z]?)\.?"
item_pattern_standardization_8k = r"^\s*(?:ITEM|Item)\s+(\d+(?:\.\d+)?[a-zA-Z]?|ONE|TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT|NINE|TEN|ELEVEN|TWELVE|THIRTEEN|FOURTEEN|FIFTEEN|SIXTEEN)\.?"
part_pattern_standardization = r"^\s*(?:PART|Part)\s+([IVX]+)"


Expand Down Expand Up @@ -194,7 +196,7 @@
{
"type": "hierarchy",
"name": "item",
"pattern": item_pattern_mapping,
"pattern": item_pattern_mapping_8k,
"hierarchy": 0
},
])
Expand All @@ -204,7 +206,7 @@
"type": "standardize",
"match": {
"type": "item",
"text_pattern": item_pattern_standardization
"text_pattern": item_pattern_standardization_8k
},
"output": {
"format": "item{}",
Expand Down
3 changes: 2 additions & 1 deletion datamule/docs/source/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
Changelog
=========
v01.0.3 2025-02-10
v01.0.3
- modified item mapping dict regex to be more robust.
- modified Monitor().monitor_submissions() to have a start_date parameter.

v01.0.2 2025-02-06
- modified .parse() to return dictionary in previous package format.
Expand Down
32 changes: 32 additions & 0 deletions examples/earnings_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Download earnings announcements for a given date range and save them to a new folder.

from datamule import Portfolio
from pathlib import Path
import shutil

portfolio = Portfolio('8K')

portfolio.download_submissions(submission_type='8-K',filing_date=('2020-01-01','2020-01-31'))

def process_submission(submission):
try:
for document in submission.document_type(['8-K']):
document.parse()

if 'item2.02' in document.data['document'].keys():
for document in submission.document_type(['EX-99.1']):
return document.path
except:
return None


paths = portfolio.process_submissions(process_submission)
paths = [p for p in paths if p is not None]

new_folder = Path('earnings_announcements')
new_folder.mkdir(exist_ok=True)

for path in paths:
new_path = new_folder / path.name
shutil.copy(path, new_path) # Copy the file

0 comments on commit 0c7f22d

Please sign in to comment.