Skip to content

Commit

Permalink
fix: resolve issue (#178)
Browse files Browse the repository at this point in the history
Modified the data merge process to handle cases where recent data lacks sufficient valid entries. The search now continues through older data to ensure proper merging.
  • Loading branch information
josw123 committed Sep 2, 2024
1 parent 82e9867 commit 056d533
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 5 deletions.
7 changes: 5 additions & 2 deletions dart_fss/corp/corp.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ def extract_fs(self,
cumulative: bool = False,
progressbar: bool = True,
skip_error: bool = True,
last_report_only: bool = True) -> FinancialStatement:
last_report_only: bool = True,
min_required: int = 4) -> FinancialStatement:
"""
재무제표 검색
Expand Down Expand Up @@ -243,11 +244,13 @@ def extract_fs(self,
Error 발생시 skip 여부 (default: True)
last_report_only: bool, optional
최종 보고서만을 이용하여 데이터를 추출할지 여부 (default: True)
min_required: int, optional
Merge를 위한 최소한의 유효 데이터 개수 (default: 4)
Returns
-------
FinancialStatement
제무제표 검색 결과
"""
return extract(self.corp_code, bgn_de, end_de, fs_tp, separate, report_tp, lang,
separator, dataset, cumulative, progressbar, skip_error, last_report_only)
separator, dataset, cumulative, progressbar, skip_error, last_report_only, min_required)
20 changes: 17 additions & 3 deletions dart_fss/fs/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,8 @@ def init_label(fs_df: Dict[str, DataFrame],
def merge_fs(fs_df: Dict[str, DataFrame],
nfs_df: Dict[str, DataFrame],
label_df: Dict[str, DataFrame],
fs_tp: Tuple[str] = ('bs', 'is', 'cis', 'cf')):
fs_tp: Tuple[str] = ('bs', 'is', 'cis', 'cf'),
min_required: int = 4) -> Tuple[Dict[str, DataFrame], Dict[str, DataFrame]]:
"""
재무제표 DataFrame과 Report의 데이터를 합쳐주는 Method
Expand All @@ -1015,6 +1016,8 @@ def merge_fs(fs_df: Dict[str, DataFrame],
재무제표 검색결과시 추출된 값의 Label
fs_tp: tuple of str, optional
'bs' 재무상태표, 'is' 손익계산서, 'cis' 포괄손익계산서, 'cf' 현금흐름표
min_required: int, optional
Merge를 위한 최소한의 데이터 개수
Returns
-------
tuple of dict of {str: DataFrame}
Expand Down Expand Up @@ -1071,6 +1074,12 @@ def merge_fs(fs_df: Dict[str, DataFrame],
for compare_func in additional_comparison_function:
ndata, nlabels = compare_func(column, df, ndf, label_df[tp], ndata, nlabels)

ndata = np.array(ndata, dtype=np.float64)
count = np.count_nonzero(~np.isnan(ndata))

if count < min_required: # ndata의 유효한 값이 min_required 이하인 경우 추가 Merge X
continue

label_df[tp][column] = nlabels
fs_df[tp][column] = ndata

Expand Down Expand Up @@ -1389,7 +1398,8 @@ def extract(corp_code: str,
cumulative: bool = False,
progressbar: bool = True,
skip_error: bool = True,
last_report_only: bool = True) -> FinancialStatement:
last_report_only: bool = True,
min_required: int = 4) -> FinancialStatement:
"""
재무제표 검색
Expand Down Expand Up @@ -1421,6 +1431,8 @@ def extract(corp_code: str,
Error 발생시 skip 여부 (default: True)
last_report_only: bool, optional
최종 보고서만을 이용하여 데이터를 추출할지 여부 (default: True)
min_required: int, optional
Merge를 위한 최소한의 유효 데이터 개수 (default: 4)
Returns
-------
FinancialStatement
Expand Down Expand Up @@ -1510,7 +1522,9 @@ def check_report_tp(req_tp, tp):
warnings_text = 'Unable to extract financial statements: {}.'.format(report.to_dict())
warnings.warn(warnings_text, RuntimeWarning)
else:
statements, label_df = merge_fs(statements, nstatements, fs_tp=fs_tp, label_df=label_df)
statements, label_df = merge_fs(statements, nstatements,
fs_tp=fs_tp, label_df=label_df,
min_required=min_required)
except Exception as ex:
traceback.print_exc()
warnings_text = 'Unable to extract financial statements: {}.'.format(report.to_dict())
Expand Down

0 comments on commit 056d533

Please sign in to comment.