Skip to content

Commit

Permalink
Merge pull request #672 from nick-fournier-rsg/annotate_bugfix
Browse files Browse the repository at this point in the history
added stricter joining of annotated fields
  • Loading branch information
jpn-- authored Feb 9, 2024
2 parents 5141945 + a3f60a4 commit bdc9cac
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 3 deletions.
29 changes: 28 additions & 1 deletion activitysim/core/test/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas.testing as pdt
import pytest

from ..util import other_than, quick_loc_df, quick_loc_series, reindex
from ..util import other_than, quick_loc_df, quick_loc_series, reindex, df_from_dict


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -62,3 +62,30 @@ def test_quick_loc_series():

assert list(quick_loc_series(loc_list, series)) == attrib_list
assert list(quick_loc_series(loc_list, series)) == list(series.loc[loc_list])


def test_df_from_dict():

index = [1, 2, 3, 4, 5]
df = pd.DataFrame({"attrib": [1, 2, 2, 3, 1]}, index=index)

# scramble index order for one expression and not the other
sorted = df.eval("attrib.sort_values()")
not_sorted = df.eval("attrib * 1")

# check above expressions
pdt.assert_series_equal(
sorted, pd.Series([1, 1, 2, 2, 3], index=[1, 5, 2, 3, 4]), check_names=False
)
pdt.assert_series_equal(not_sorted, df.attrib, check_names=False)

# create a new dataframe from the above expressions
values = {"sorted": sorted, "not_sorted": not_sorted}
new_df = df_from_dict(values, index)

# index should become unscrambed and back to the same order as before
expected_df = pd.DataFrame(
{"sorted": [1, 2, 2, 3, 1], "not_sorted": [1, 2, 2, 3, 1]}, index=index
)

pdt.assert_frame_equal(new_df, expected_df)
15 changes: 13 additions & 2 deletions activitysim/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@


def si_units(x, kind="B", digits=3, shift=1000):

# nano micro milli kilo mega giga tera peta exa zeta yotta
tiers = ["n", "µ", "m", "", "K", "M", "G", "T", "P", "E", "Z", "Y"]

Expand Down Expand Up @@ -342,7 +341,6 @@ def assign_in_place(df, df2):
# this is a hack fix for a bug in pandas.update
# github.com/pydata/pandas/issues/4094
for c, old_dtype in zip(common_columns, old_dtypes):

# if both df and df2 column were same type, but result is not
if (old_dtype == df2[c].dtype) and (df[c].dtype != old_dtype):
try:
Expand Down Expand Up @@ -373,7 +371,20 @@ def assign_in_place(df, df2):
df[new_columns] = df2[new_columns]


def reindex_if_series(values, index):
if index is not None:
return values

if isinstance(values, pd.Series):
assert len(set(values.index).intersection(index)) == len(index)

if all(values.index != index):
return values.reindex(index=index)


def df_from_dict(values, index=None):
# If value object is a series and has out of order index, reindex it
values = {k: reindex_if_series(v, index) for k, v in values.items()}

df = pd.DataFrame.from_dict(values)
if index is not None:
Expand Down

0 comments on commit bdc9cac

Please sign in to comment.