Skip to content

Commit

Permalink
CLN: Add additional typing information
Browse files Browse the repository at this point in the history
  • Loading branch information
bashtage committed Jan 21, 2020
1 parent 9eff8b1 commit 4fcbe33
Showing 1 changed file with 25 additions and 24 deletions.
49 changes: 25 additions & 24 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,10 @@
stata_epoch = datetime.datetime(1960, 1, 1)


def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
# TODO: Add typing. As of January 2020 it is not possible to type this function since
# mypy doesn't understand that a Series and an int can be combined using mathematical
# operations. (+, -).
def _stata_elapsed_date_to_datetime_vec(dates, fmt) -> Series:
"""
Convert from SIF to datetime. http://www.stata.com/help.cgi?datetime
Expand Down Expand Up @@ -219,7 +222,7 @@ def _stata_elapsed_date_to_datetime_vec(dates: Series, fmt: str) -> Series:
MIN_MS_DELTA = MIN_DAY_DELTA * 24 * 3600 * 1000
MAX_MS_DELTA = MAX_DAY_DELTA * 24 * 3600 * 1000

def convert_year_month_safe(year: Series, month: Series) -> Series:
def convert_year_month_safe(year, month) -> Series:
"""
Convert year and month to datetimes, using pandas vectorized versions
when the date range falls within the range supported by pandas.
Expand All @@ -234,7 +237,7 @@ def convert_year_month_safe(year: Series, month: Series) -> Series:
[datetime.datetime(y, m, 1) for y, m in zip(year, month)], index=index
)

def convert_year_days_safe(year: Series, days: Series) -> Series:
def convert_year_days_safe(year, days) -> Series:
"""
Converts year (e.g. 1999) and days since the start of the year to a
datetime or datetime64 Series
Expand All @@ -249,9 +252,7 @@ def convert_year_days_safe(year: Series, days: Series) -> Series:
]
return Series(value, index=index)

def convert_delta_safe(
base: datetime.datetime, deltas: Series, unit: str
) -> Series:
def convert_delta_safe(base, deltas, unit) -> Series:
"""
Convert base dates and deltas to datetimes, using pandas vectorized
versions if the deltas satisfy restrictions required to be expressed
Expand Down Expand Up @@ -298,21 +299,21 @@ def convert_delta_safe(
# Delta days relative to base
elif fmt.startswith(("%td", "td", "%d", "d")):
base = stata_epoch
days: Series = dates
days = dates
conv_dates = convert_delta_safe(base, days, "d")
# does not count leap days - 7 days is a week.
# 52nd week may have more than 7 days
elif fmt.startswith(("%tw", "tw")):
year: Series = stata_epoch.year + dates // 52
year = stata_epoch.year + dates // 52
days = (dates % 52) * 7
conv_dates = convert_year_days_safe(year, days)
elif fmt.startswith(("%tm", "tm")): # Delta months relative to base
year = stata_epoch.year + dates // 12
month: Series = (dates % 12) + 1
month = (dates % 12) + 1
conv_dates = convert_year_month_safe(year, month)
elif fmt.startswith(("%tq", "tq")): # Delta quarters relative to base
year = stata_epoch.year + dates // 4
quarter_month: Series = (dates % 4) * 3 + 1
quarter_month = (dates % 4) * 3 + 1
conv_dates = convert_year_month_safe(year, quarter_month)
elif fmt.startswith(("%th", "th")): # Delta half-years relative to base
year = stata_epoch.year + dates // 2
Expand Down Expand Up @@ -347,19 +348,19 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series:
NS_PER_DAY = 24 * 3600 * 1000 * 1000 * 1000
US_PER_DAY = NS_PER_DAY / 1000

def parse_dates_safe(
dates: Series, delta: bool = False, year: bool = False, days: bool = False
):
d: Dict[str, Any] = {}
def parse_dates_safe(dates, delta=False, year=False, days=False):
d = {}
if is_datetime64_dtype(dates.values):
if delta:
time_delta: Series = dates - stata_epoch
time_delta = dates - stata_epoch
d["delta"] = time_delta.values.astype(np.int64) // 1000 # microseconds
if days or year:
# ignore since mypy reports that DatetimeIndex has no year/month
date_index = DatetimeIndex(dates)
d["year"], d["month"] = date_index.year, date_index.month
d["year"] = date_index.year # type: ignore
d["month"] = date_index.month # type: ignore
if days:
days_in_ns: Series = dates.astype(np.int64) - to_datetime(
days_in_ns = dates.astype(np.int64) - to_datetime(
d["year"], format="%Y"
).astype(np.int64)
d["days"] = days_in_ns // NS_PER_DAY
Expand Down Expand Up @@ -588,10 +589,10 @@ def __init__(self, catarray: Series, encoding: str = "latin-1"):
categories = catarray.cat.categories
self.value_labels = list(zip(np.arange(len(categories)), categories))
self.value_labels.sort(key=lambda x: x[0])
self.text_len = np.int32(0)
self.off = []
self.val = []
self.txt = []
self.text_len = 0
self.off: List[int] = []
self.val: List[int] = []
self.txt: List[bytes] = []
self.n = 0

# Compute lengths and setup lists of offsets and labels
Expand Down Expand Up @@ -2131,7 +2132,7 @@ def _prepare_categoricals(self, data: DataFrame) -> DataFrame:

is_cat = [is_categorical_dtype(data[col]) for col in data]
self._is_col_cat = is_cat
self._value_labels = []
self._value_labels: List[StataValueLabel] = []
if not any(is_cat):
return data

Expand Down Expand Up @@ -2290,8 +2291,8 @@ def _check_column_names(self, data: DataFrame) -> DataFrame:
return data

def _set_formats_and_types(self, dtypes: Series) -> None:
self.typlist = []
self.fmtlist = []
self.fmtlist: List[str] = []
self.typlist: List[int] = []
for col, dtype in dtypes.items():
self.fmtlist.append(_dtype_to_default_stata_fmt(dtype, self.data[col]))
self.typlist.append(_dtype_to_stata_type(dtype, self.data[col]))
Expand Down

0 comments on commit 4fcbe33

Please sign in to comment.