Skip to content

Commit

Permalink
Revert "Allow manually setting 'freq' for specific datasets with simp…
Browse files Browse the repository at this point in the history
…le data …"

This reverts commit 901a987.
  • Loading branch information
liu-jc authored Aug 22, 2024
1 parent 901a987 commit 08009e9
Showing 1 changed file with 7 additions and 67 deletions.
74 changes: 7 additions & 67 deletions src/uni2ts/data/builder/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,9 @@ def _from_long_dataframe(
df: pd.DataFrame,
offset: Optional[int] = None,
date_offset: Optional[pd.Timestamp] = None,
freq: str = "H",
) -> tuple[GenFunc, Features]:
items = df.item_id.unique()

# Infer the freq and generate the prompt
inferred_freq = pd.infer_freq(df.index)

if inferred_freq is not None:
print(
f"Inferred frequency: {inferred_freq}. Using this value for the 'freq' parameter."
)
else:
print(
f"Inferred frequency is None. Using predefined {freq} for the 'freq' parameter."
)

def example_gen_func() -> Generator[dict[str, Any], None, None]:
for item_id in items:
item_df = df.query(f'item_id == "{item_id}"').drop("item_id", axis=1)
Expand All @@ -63,11 +50,7 @@ def example_gen_func() -> Generator[dict[str, Any], None, None]:
yield {
"target": item_df.to_numpy(),
"start": item_df.index[0],
"freq": (
pd.infer_freq(df.index)
if pd.infer_freq(df.index) is not None
else freq
),
"freq": pd.infer_freq(item_df.index),
"item_id": item_id,
}

Expand All @@ -87,7 +70,6 @@ def _from_wide_dataframe(
df: pd.DataFrame,
offset: Optional[int] = None,
date_offset: Optional[pd.Timestamp] = None,
freq: str = "H",
) -> tuple[GenFunc, Features]:
if offset is not None:
df = df.iloc[:offset]
Expand All @@ -96,28 +78,12 @@ def _from_wide_dataframe(

print(df)

# Infer the freq and generate the prompt
inferred_freq = pd.infer_freq(df.index)

if inferred_freq is not None:
print(
f"Inferred frequency: {inferred_freq}. Using this value for the 'freq' parameter."
)
else:
print(
f"Inferred frequency is None. Using predefined {freq} for the 'freq' parameter."
)

def example_gen_func() -> Generator[dict[str, Any], None, None]:
for i in range(len(df.columns)):
yield {
"target": df.iloc[:, i].to_numpy(),
"start": df.index[0],
"freq": (
pd.infer_freq(df.index)
if pd.infer_freq(df.index) is not None
else freq
),
"freq": pd.infer_freq(df.index),
"item_id": f"item_{i}",
}

Expand All @@ -137,32 +103,17 @@ def _from_wide_dataframe_multivariate(
df: pd.DataFrame,
offset: Optional[int] = None,
date_offset: Optional[pd.Timestamp] = None,
freq: str = "H",
) -> tuple[GenFunc, Features]:
if offset is not None:
df = df.iloc[:offset]
elif date_offset is not None:
df = df[df.index <= date_offset]

# Infer the freq and generate the prompt
inferred_freq = pd.infer_freq(df.index)

if inferred_freq is not None:
print(
f"Inferred frequency: {inferred_freq}. Using this value for the 'freq' parameter."
)
else:
print(
f"Inferred frequency is None. Using predefined {freq} for the 'freq' parameter."
)

def example_gen_func() -> Generator[dict[str, Any], None, None]:
yield {
"target": df.to_numpy().T,
"start": df.index[0],
"freq": (
pd.infer_freq(df.index) if pd.infer_freq(df.index) is not None else freq
),
"freq": pd.infer_freq(df.index),
"item_id": "item_0",
}

Expand Down Expand Up @@ -194,7 +145,6 @@ def build_dataset(
dataset_type: str,
offset: Optional[int] = None,
date_offset: Optional[pd.Timestamp] = None,
freq: str = "H",
):
assert offset is None or date_offset is None, (
"One or neither offset and date_offset must be specified, but not both. "
Expand All @@ -216,7 +166,7 @@ def build_dataset(
)

example_gen_func, features = _from_dataframe(
df, freq=freq, offset=offset, date_offset=date_offset
df, offset=offset, date_offset=date_offset
)
hf_dataset = datasets.Dataset.from_generator(
example_gen_func, features=features
Expand Down Expand Up @@ -253,7 +203,7 @@ class SimpleEvalDatasetBuilder(DatasetBuilder):
def __post_init__(self):
self.storage_path = Path(self.storage_path)

def build_dataset(self, file: Path, dataset_type: str, freq: str = "H"):
def build_dataset(self, file: Path, dataset_type: str):
df = pd.read_csv(file, index_col=0, parse_dates=True)

if dataset_type == "long":
Expand All @@ -268,7 +218,7 @@ def build_dataset(self, file: Path, dataset_type: str, freq: str = "H"):
" Valid options are 'long', 'wide', and 'wide_multivariate'."
)

example_gen_func, features = _from_dataframe(df, freq=freq)
example_gen_func, features = _from_dataframe(df)
hf_dataset = datasets.Dataset.from_generator(
example_gen_func, features=features
)
Expand Down Expand Up @@ -339,21 +289,13 @@ def generate_eval_builders(
type=str,
default=None,
)
# Define the `freq` argument with a default value. Use this value as 'freq' if 'freq' is None.
parser.add_argument(
"--freq",
default="H", # Set the default value
help="The user specified frequency",
)

args = parser.parse_args()

SimpleDatasetBuilder(dataset=args.dataset_name).build_dataset(
file=Path(args.file_path),
dataset_type=args.dataset_type,
offset=args.offset,
date_offset=pd.Timestamp(args.date_offset) if args.date_offset else None,
freq=args.freq,
)

if args.offset is not None or args.date_offset is not None:
Expand All @@ -365,6 +307,4 @@ def generate_eval_builders(
prediction_length=None,
context_length=None,
patch_size=None,
).build_dataset(
file=Path(args.file_path), dataset_type=args.dataset_type, freq=args.freq
)
).build_dataset(file=Path(args.file_path), dataset_type=args.dataset_type)

0 comments on commit 08009e9

Please sign in to comment.