From 630d094e3ac215b2fb99205c2353c9a610592616 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 13 Apr 2021 10:08:47 -0500 Subject: [PATCH 01/49] initial changes --- .../source/_static/RAPIDS-logo-purple.png | Bin 0 -> 22593 bytes docs/cudf/source/_static/params.css | 19 + docs/cudf/source/api.rst | 273 +- .../api/cudf.core.dataframe.DataFrame.rst | 261 + .../api/cudf.core.dataframe.extract_col.rst | 6 + .../api/cudf.core.dataframe.from_pandas.rst | 6 + .../source/api/cudf.core.dataframe.merge.rst | 6 + docs/cudf/source/api_docs/api.rst | 22 + .../source/api_docs/api/cudf.DataFrame.T.rst | 6 + .../api_docs/api/cudf.DataFrame.__iter__.rst | 6 + .../api_docs/api/cudf.DataFrame.add.rst | 6 + .../api_docs/api/cudf.DataFrame.agg.rst | 6 + .../api_docs/api/cudf.DataFrame.all.rst | 6 + .../api_docs/api/cudf.DataFrame.any.rst | 6 + .../api_docs/api/cudf.DataFrame.append.rst | 6 + .../api_docs/api/cudf.DataFrame.assign.rst | 6 + .../api_docs/api/cudf.DataFrame.astype.rst | 6 + .../source/api_docs/api/cudf.DataFrame.at.rst | 6 + .../api_docs/api/cudf.DataFrame.clip.rst | 6 + .../api_docs/api/cudf.DataFrame.columns.rst | 6 + .../api_docs/api/cudf.DataFrame.copy.rst | 6 + .../api_docs/api/cudf.DataFrame.corr.rst | 6 + .../api_docs/api/cudf.DataFrame.count.rst | 6 + .../api_docs/api/cudf.DataFrame.cov.rst | 6 + .../api_docs/api/cudf.DataFrame.cummax.rst | 6 + .../api_docs/api/cudf.DataFrame.cummin.rst | 6 + .../api_docs/api/cudf.DataFrame.cumprod.rst | 6 + .../api_docs/api/cudf.DataFrame.cumsum.rst | 6 + .../api_docs/api/cudf.DataFrame.describe.rst | 6 + .../api_docs/api/cudf.DataFrame.div.rst | 6 + .../api_docs/api/cudf.DataFrame.drop.rst | 6 + .../api/cudf.DataFrame.drop_duplicates.rst | 6 + .../api_docs/api/cudf.DataFrame.dropna.rst | 6 + .../api_docs/api/cudf.DataFrame.dtypes.rst | 6 + .../api_docs/api/cudf.DataFrame.empty.rst | 6 + .../api_docs/api/cudf.DataFrame.equals.rst | 6 + .../api_docs/api/cudf.DataFrame.explode.rst | 6 + .../api_docs/api/cudf.DataFrame.fillna.rst | 6 + .../api_docs/api/cudf.DataFrame.floordiv.rst | 6 + .../api/cudf.DataFrame.from_records.rst | 6 + .../api_docs/api/cudf.DataFrame.groupby.rst | 6 + .../api_docs/api/cudf.DataFrame.head.rst | 6 + .../api_docs/api/cudf.DataFrame.iat.rst | 6 + .../api_docs/api/cudf.DataFrame.iloc.rst | 6 + .../api_docs/api/cudf.DataFrame.index.rst | 6 + .../api_docs/api/cudf.DataFrame.info.rst | 6 + .../api_docs/api/cudf.DataFrame.insert.rst | 6 + .../api_docs/api/cudf.DataFrame.isin.rst | 6 + .../api_docs/api/cudf.DataFrame.isna.rst | 6 + .../api_docs/api/cudf.DataFrame.isnull.rst | 6 + .../api_docs/api/cudf.DataFrame.iteritems.rst | 6 + .../api_docs/api/cudf.DataFrame.iterrows.rst | 6 + .../api/cudf.DataFrame.itertuples.rst | 6 + .../api_docs/api/cudf.DataFrame.join.rst | 6 + .../api_docs/api/cudf.DataFrame.keys.rst | 6 + .../api_docs/api/cudf.DataFrame.kurt.rst | 6 + .../api_docs/api/cudf.DataFrame.kurtosis.rst | 6 + .../api_docs/api/cudf.DataFrame.loc.rst | 6 + .../api_docs/api/cudf.DataFrame.mask.rst | 6 + .../api_docs/api/cudf.DataFrame.max.rst | 6 + .../api_docs/api/cudf.DataFrame.mean.rst | 6 + .../api_docs/api/cudf.DataFrame.melt.rst | 6 + .../api/cudf.DataFrame.memory_usage.rst | 6 + .../api_docs/api/cudf.DataFrame.merge.rst | 6 + .../api_docs/api/cudf.DataFrame.min.rst | 6 + .../api_docs/api/cudf.DataFrame.mod.rst | 6 + .../api_docs/api/cudf.DataFrame.mode.rst | 6 + .../api_docs/api/cudf.DataFrame.mul.rst | 6 + .../api_docs/api/cudf.DataFrame.ndim.rst | 6 + .../api_docs/api/cudf.DataFrame.nlargest.rst | 6 + .../api_docs/api/cudf.DataFrame.notna.rst | 6 + .../api_docs/api/cudf.DataFrame.notnull.rst | 6 + .../api_docs/api/cudf.DataFrame.nsmallest.rst | 6 + .../api_docs/api/cudf.DataFrame.pipe.rst | 6 + .../api_docs/api/cudf.DataFrame.pivot.rst | 6 + .../api_docs/api/cudf.DataFrame.pop.rst | 6 + .../api_docs/api/cudf.DataFrame.pow.rst | 6 + .../api_docs/api/cudf.DataFrame.prod.rst | 6 + .../api_docs/api/cudf.DataFrame.product.rst | 6 + .../api_docs/api/cudf.DataFrame.quantile.rst | 6 + .../api_docs/api/cudf.DataFrame.query.rst | 6 + .../api_docs/api/cudf.DataFrame.radd.rst | 6 + .../api_docs/api/cudf.DataFrame.rank.rst | 6 + .../api_docs/api/cudf.DataFrame.rdiv.rst | 6 + .../api_docs/api/cudf.DataFrame.reindex.rst | 6 + .../api_docs/api/cudf.DataFrame.rename.rst | 6 + .../api_docs/api/cudf.DataFrame.replace.rst | 6 + .../api/cudf.DataFrame.reset_index.rst | 6 + .../api_docs/api/cudf.DataFrame.rfloordiv.rst | 6 + .../api_docs/api/cudf.DataFrame.rmod.rst | 6 + .../api_docs/api/cudf.DataFrame.rmul.rst | 6 + .../api_docs/api/cudf.DataFrame.rolling.rst | 6 + .../api_docs/api/cudf.DataFrame.round.rst | 6 + .../api_docs/api/cudf.DataFrame.rpow.rst | 6 + .../source/api_docs/api/cudf.DataFrame.rst | 181 + .../api_docs/api/cudf.DataFrame.rsub.rst | 6 + .../api_docs/api/cudf.DataFrame.rtruediv.rst | 6 + .../api_docs/api/cudf.DataFrame.sample.rst | 6 + .../api/cudf.DataFrame.select_dtypes.rst | 6 + .../api_docs/api/cudf.DataFrame.set_index.rst | 6 + .../api_docs/api/cudf.DataFrame.shape.rst | 6 + .../api_docs/api/cudf.DataFrame.shift.rst | 6 + .../api_docs/api/cudf.DataFrame.size.rst | 6 + .../api_docs/api/cudf.DataFrame.skew.rst | 6 + .../api/cudf.DataFrame.sort_index.rst | 6 + .../api/cudf.DataFrame.sort_values.rst | 6 + .../api_docs/api/cudf.DataFrame.stack.rst | 6 + .../api_docs/api/cudf.DataFrame.std.rst | 6 + .../api_docs/api/cudf.DataFrame.sub.rst | 6 + .../api_docs/api/cudf.DataFrame.sum.rst | 6 + .../api_docs/api/cudf.DataFrame.tail.rst | 6 + .../api_docs/api/cudf.DataFrame.take.rst | 6 + .../api_docs/api/cudf.DataFrame.to_csv.rst | 6 + .../api_docs/api/cudf.DataFrame.to_dict.rst | 6 + .../api/cudf.DataFrame.to_feather.rst | 6 + .../api_docs/api/cudf.DataFrame.to_hdf.rst | 6 + .../api_docs/api/cudf.DataFrame.to_json.rst | 6 + .../api/cudf.DataFrame.to_parquet.rst | 6 + .../api/cudf.DataFrame.to_records.rst | 6 + .../api_docs/api/cudf.DataFrame.to_string.rst | 6 + .../api_docs/api/cudf.DataFrame.transpose.rst | 6 + .../api_docs/api/cudf.DataFrame.truediv.rst | 6 + .../api_docs/api/cudf.DataFrame.unstack.rst | 6 + .../api_docs/api/cudf.DataFrame.update.rst | 6 + .../api_docs/api/cudf.DataFrame.values.rst | 6 + .../api_docs/api/cudf.DataFrame.var.rst | 6 + .../api_docs/api/cudf.DataFrame.where.rst | 6 + .../api_docs/api/cudf.Series.__array__.rst | 6 + .../api_docs/api/cudf.Series.__iter__.rst | 6 + .../source/api_docs/api/cudf.Series.abs.rst | 6 + .../source/api_docs/api/cudf.Series.add.rst | 6 + .../source/api_docs/api/cudf.Series.all.rst | 6 + .../source/api_docs/api/cudf.Series.any.rst | 6 + .../api_docs/api/cudf.Series.append.rst | 6 + .../api_docs/api/cudf.Series.argsort.rst | 6 + .../api_docs/api/cudf.Series.astype.rst | 6 + .../source/api_docs/api/cudf.Series.cat.rst | 6 + .../source/api_docs/api/cudf.Series.clip.rst | 6 + .../source/api_docs/api/cudf.Series.copy.rst | 6 + .../source/api_docs/api/cudf.Series.corr.rst | 6 + .../source/api_docs/api/cudf.Series.count.rst | 6 + .../source/api_docs/api/cudf.Series.cov.rst | 6 + .../api_docs/api/cudf.Series.cummax.rst | 6 + .../api_docs/api/cudf.Series.cummin.rst | 6 + .../api_docs/api/cudf.Series.cumprod.rst | 6 + .../api_docs/api/cudf.Series.cumsum.rst | 6 + .../api_docs/api/cudf.Series.describe.rst | 6 + .../source/api_docs/api/cudf.Series.diff.rst | 6 + .../source/api_docs/api/cudf.Series.drop.rst | 6 + .../api/cudf.Series.drop_duplicates.rst | 6 + .../api_docs/api/cudf.Series.dropna.rst | 6 + .../source/api_docs/api/cudf.Series.dt.rst | 6 + .../source/api_docs/api/cudf.Series.dtype.rst | 6 + .../source/api_docs/api/cudf.Series.empty.rst | 6 + .../source/api_docs/api/cudf.Series.eq.rst | 6 + .../api_docs/api/cudf.Series.equals.rst | 6 + .../api_docs/api/cudf.Series.explode.rst | 6 + .../api_docs/api/cudf.Series.factorize.rst | 6 + .../api_docs/api/cudf.Series.fillna.rst | 6 + .../api_docs/api/cudf.Series.floordiv.rst | 6 + .../source/api_docs/api/cudf.Series.ge.rst | 6 + .../api_docs/api/cudf.Series.groupby.rst | 6 + .../source/api_docs/api/cudf.Series.gt.rst | 6 + .../source/api_docs/api/cudf.Series.head.rst | 6 + .../source/api_docs/api/cudf.Series.iloc.rst | 6 + .../source/api_docs/api/cudf.Series.index.rst | 6 + .../api_docs/api/cudf.Series.is_monotonic.rst | 6 + .../cudf.Series.is_monotonic_decreasing.rst | 6 + .../cudf.Series.is_monotonic_increasing.rst | 6 + .../api_docs/api/cudf.Series.is_unique.rst | 6 + .../source/api_docs/api/cudf.Series.isin.rst | 6 + .../source/api_docs/api/cudf.Series.isna.rst | 6 + .../api_docs/api/cudf.Series.isnull.rst | 6 + .../source/api_docs/api/cudf.Series.items.rst | 6 + .../api_docs/api/cudf.Series.iteritems.rst | 6 + .../source/api_docs/api/cudf.Series.keys.rst | 6 + .../source/api_docs/api/cudf.Series.kurt.rst | 6 + .../api_docs/api/cudf.Series.kurtosis.rst | 6 + .../source/api_docs/api/cudf.Series.le.rst | 6 + .../source/api_docs/api/cudf.Series.loc.rst | 6 + .../source/api_docs/api/cudf.Series.lt.rst | 6 + .../source/api_docs/api/cudf.Series.map.rst | 6 + .../source/api_docs/api/cudf.Series.mask.rst | 6 + .../source/api_docs/api/cudf.Series.max.rst | 6 + .../source/api_docs/api/cudf.Series.mean.rst | 6 + .../api_docs/api/cudf.Series.median.rst | 6 + .../api_docs/api/cudf.Series.memory_usage.rst | 6 + .../source/api_docs/api/cudf.Series.min.rst | 6 + .../source/api_docs/api/cudf.Series.mod.rst | 6 + .../source/api_docs/api/cudf.Series.mode.rst | 6 + .../source/api_docs/api/cudf.Series.mul.rst | 6 + .../source/api_docs/api/cudf.Series.name.rst | 6 + .../source/api_docs/api/cudf.Series.ndim.rst | 6 + .../source/api_docs/api/cudf.Series.ne.rst | 6 + .../api_docs/api/cudf.Series.nlargest.rst | 6 + .../source/api_docs/api/cudf.Series.notna.rst | 6 + .../api_docs/api/cudf.Series.notnull.rst | 6 + .../api_docs/api/cudf.Series.nsmallest.rst | 6 + .../api_docs/api/cudf.Series.nunique.rst | 6 + .../source/api_docs/api/cudf.Series.pipe.rst | 6 + .../source/api_docs/api/cudf.Series.pow.rst | 6 + .../source/api_docs/api/cudf.Series.prod.rst | 6 + .../api_docs/api/cudf.Series.product.rst | 6 + .../api_docs/api/cudf.Series.quantile.rst | 6 + .../source/api_docs/api/cudf.Series.radd.rst | 6 + .../source/api_docs/api/cudf.Series.rank.rst | 6 + .../api_docs/api/cudf.Series.reindex.rst | 6 + .../api_docs/api/cudf.Series.rename.rst | 6 + .../api_docs/api/cudf.Series.repeat.rst | 6 + .../api_docs/api/cudf.Series.replace.rst | 6 + .../api_docs/api/cudf.Series.reset_index.rst | 6 + .../api_docs/api/cudf.Series.rfloordiv.rst | 6 + .../source/api_docs/api/cudf.Series.rmod.rst | 6 + .../source/api_docs/api/cudf.Series.rmul.rst | 6 + .../api_docs/api/cudf.Series.rolling.rst | 6 + .../source/api_docs/api/cudf.Series.round.rst | 6 + .../source/api_docs/api/cudf.Series.rpow.rst | 6 + docs/cudf/source/api_docs/api/cudf.Series.rst | 205 + .../source/api_docs/api/cudf.Series.rsub.rst | 6 + .../api_docs/api/cudf.Series.rtruediv.rst | 6 + .../api_docs/api/cudf.Series.sample.rst | 6 + .../api_docs/api/cudf.Series.searchsorted.rst | 6 + .../source/api_docs/api/cudf.Series.shape.rst | 6 + .../source/api_docs/api/cudf.Series.shift.rst | 6 + .../source/api_docs/api/cudf.Series.size.rst | 6 + .../source/api_docs/api/cudf.Series.skew.rst | 6 + .../api_docs/api/cudf.Series.sort_index.rst | 6 + .../api_docs/api/cudf.Series.sort_values.rst | 6 + .../source/api_docs/api/cudf.Series.std.rst | 6 + .../source/api_docs/api/cudf.Series.str.rst | 6 + .../source/api_docs/api/cudf.Series.sub.rst | 6 + .../source/api_docs/api/cudf.Series.sum.rst | 6 + .../source/api_docs/api/cudf.Series.tail.rst | 6 + .../source/api_docs/api/cudf.Series.take.rst | 6 + .../api_docs/api/cudf.Series.to_dict.rst | 6 + .../api_docs/api/cudf.Series.to_frame.rst | 6 + .../api_docs/api/cudf.Series.to_hdf.rst | 6 + .../api_docs/api/cudf.Series.to_json.rst | 6 + .../api_docs/api/cudf.Series.to_list.rst | 6 + .../api_docs/api/cudf.Series.to_string.rst | 6 + .../api_docs/api/cudf.Series.truediv.rst | 6 + .../api_docs/api/cudf.Series.unique.rst | 6 + .../api_docs/api/cudf.Series.update.rst | 6 + .../api_docs/api/cudf.Series.value_counts.rst | 6 + .../api_docs/api/cudf.Series.values.rst | 6 + .../source/api_docs/api/cudf.Series.var.rst | 6 + .../source/api_docs/api/cudf.Series.where.rst | 6 + .../api_docs/api/pandas.DataFrame.T.rst | 6 + .../api/pandas.DataFrame.__iter__.rst | 6 + .../api_docs/api/pandas.DataFrame.abs.rst | 6 + .../api_docs/api/pandas.DataFrame.add.rst | 6 + .../api/pandas.DataFrame.add_prefix.rst | 6 + .../api/pandas.DataFrame.add_suffix.rst | 6 + .../api_docs/api/pandas.DataFrame.agg.rst | 6 + .../api/pandas.DataFrame.aggregate.rst | 6 + .../api_docs/api/pandas.DataFrame.align.rst | 6 + .../api_docs/api/pandas.DataFrame.all.rst | 6 + .../api_docs/api/pandas.DataFrame.any.rst | 6 + .../api_docs/api/pandas.DataFrame.append.rst | 6 + .../api_docs/api/pandas.DataFrame.apply.rst | 6 + .../api/pandas.DataFrame.applymap.rst | 6 + .../api_docs/api/pandas.DataFrame.asfreq.rst | 6 + .../api_docs/api/pandas.DataFrame.asof.rst | 6 + .../api_docs/api/pandas.DataFrame.assign.rst | 6 + .../api_docs/api/pandas.DataFrame.astype.rst | 6 + .../api_docs/api/pandas.DataFrame.at.rst | 6 + .../api_docs/api/pandas.DataFrame.at_time.rst | 6 + .../api_docs/api/pandas.DataFrame.attrs.rst | 6 + .../api_docs/api/pandas.DataFrame.axes.rst | 6 + .../api/pandas.DataFrame.backfill.rst | 6 + .../api/pandas.DataFrame.between_time.rst | 6 + .../api_docs/api/pandas.DataFrame.bfill.rst | 6 + .../api_docs/api/pandas.DataFrame.bool.rst | 6 + .../api_docs/api/pandas.DataFrame.boxplot.rst | 6 + .../api_docs/api/pandas.DataFrame.clip.rst | 6 + .../api_docs/api/pandas.DataFrame.columns.rst | 6 + .../api_docs/api/pandas.DataFrame.combine.rst | 6 + .../api/pandas.DataFrame.combine_first.rst | 6 + .../api_docs/api/pandas.DataFrame.compare.rst | 6 + .../api/pandas.DataFrame.convert_dtypes.rst | 6 + .../api_docs/api/pandas.DataFrame.copy.rst | 6 + .../api_docs/api/pandas.DataFrame.corr.rst | 6 + .../api/pandas.DataFrame.corrwith.rst | 6 + .../api_docs/api/pandas.DataFrame.count.rst | 6 + .../api_docs/api/pandas.DataFrame.cov.rst | 6 + .../api_docs/api/pandas.DataFrame.cummax.rst | 6 + .../api_docs/api/pandas.DataFrame.cummin.rst | 6 + .../api_docs/api/pandas.DataFrame.cumprod.rst | 6 + .../api_docs/api/pandas.DataFrame.cumsum.rst | 6 + .../api/pandas.DataFrame.describe.rst | 6 + .../api_docs/api/pandas.DataFrame.diff.rst | 6 + .../api_docs/api/pandas.DataFrame.div.rst | 6 + .../api_docs/api/pandas.DataFrame.dot.rst | 6 + .../api_docs/api/pandas.DataFrame.drop.rst | 6 + .../api/pandas.DataFrame.drop_duplicates.rst | 6 + .../api/pandas.DataFrame.droplevel.rst | 6 + .../api_docs/api/pandas.DataFrame.dropna.rst | 6 + .../api_docs/api/pandas.DataFrame.dtypes.rst | 6 + .../api/pandas.DataFrame.duplicated.rst | 6 + .../api_docs/api/pandas.DataFrame.empty.rst | 6 + .../api_docs/api/pandas.DataFrame.eq.rst | 6 + .../api_docs/api/pandas.DataFrame.equals.rst | 6 + .../api_docs/api/pandas.DataFrame.eval.rst | 6 + .../api_docs/api/pandas.DataFrame.ewm.rst | 6 + .../api/pandas.DataFrame.expanding.rst | 6 + .../api_docs/api/pandas.DataFrame.explode.rst | 6 + .../api_docs/api/pandas.DataFrame.ffill.rst | 6 + .../api_docs/api/pandas.DataFrame.fillna.rst | 6 + .../api_docs/api/pandas.DataFrame.filter.rst | 6 + .../api_docs/api/pandas.DataFrame.first.rst | 6 + .../pandas.DataFrame.first_valid_index.rst | 6 + .../api/pandas.DataFrame.floordiv.rst | 6 + .../api/pandas.DataFrame.from_dict.rst | 6 + .../api/pandas.DataFrame.from_records.rst | 6 + .../api_docs/api/pandas.DataFrame.ge.rst | 6 + .../api_docs/api/pandas.DataFrame.get.rst | 6 + .../api_docs/api/pandas.DataFrame.groupby.rst | 6 + .../api_docs/api/pandas.DataFrame.gt.rst | 6 + .../api_docs/api/pandas.DataFrame.head.rst | 6 + .../api_docs/api/pandas.DataFrame.hist.rst | 6 + .../api_docs/api/pandas.DataFrame.iat.rst | 6 + .../api_docs/api/pandas.DataFrame.idxmax.rst | 6 + .../api_docs/api/pandas.DataFrame.idxmin.rst | 6 + .../api_docs/api/pandas.DataFrame.iloc.rst | 6 + .../api_docs/api/pandas.DataFrame.index.rst | 6 + .../api/pandas.DataFrame.infer_objects.rst | 6 + .../api_docs/api/pandas.DataFrame.info.rst | 6 + .../api_docs/api/pandas.DataFrame.insert.rst | 6 + .../api/pandas.DataFrame.interpolate.rst | 6 + .../api_docs/api/pandas.DataFrame.isin.rst | 6 + .../api_docs/api/pandas.DataFrame.isna.rst | 6 + .../api_docs/api/pandas.DataFrame.isnull.rst | 6 + .../api_docs/api/pandas.DataFrame.items.rst | 6 + .../api/pandas.DataFrame.iteritems.rst | 6 + .../api/pandas.DataFrame.iterrows.rst | 6 + .../api/pandas.DataFrame.itertuples.rst | 6 + .../api_docs/api/pandas.DataFrame.join.rst | 6 + .../api_docs/api/pandas.DataFrame.keys.rst | 6 + .../api_docs/api/pandas.DataFrame.kurt.rst | 6 + .../api/pandas.DataFrame.kurtosis.rst | 6 + .../api_docs/api/pandas.DataFrame.last.rst | 6 + .../api/pandas.DataFrame.last_valid_index.rst | 6 + .../api_docs/api/pandas.DataFrame.le.rst | 6 + .../api_docs/api/pandas.DataFrame.loc.rst | 6 + .../api_docs/api/pandas.DataFrame.lookup.rst | 6 + .../api_docs/api/pandas.DataFrame.lt.rst | 6 + .../api_docs/api/pandas.DataFrame.mad.rst | 6 + .../api_docs/api/pandas.DataFrame.mask.rst | 6 + .../api_docs/api/pandas.DataFrame.max.rst | 6 + .../api_docs/api/pandas.DataFrame.mean.rst | 6 + .../api_docs/api/pandas.DataFrame.median.rst | 6 + .../api_docs/api/pandas.DataFrame.melt.rst | 6 + .../api/pandas.DataFrame.memory_usage.rst | 6 + .../api_docs/api/pandas.DataFrame.merge.rst | 6 + .../api_docs/api/pandas.DataFrame.min.rst | 6 + .../api_docs/api/pandas.DataFrame.mod.rst | 6 + .../api_docs/api/pandas.DataFrame.mode.rst | 6 + .../api_docs/api/pandas.DataFrame.mul.rst | 6 + .../api_docs/api/pandas.DataFrame.ndim.rst | 6 + .../api_docs/api/pandas.DataFrame.ne.rst | 6 + .../api/pandas.DataFrame.nlargest.rst | 6 + .../api_docs/api/pandas.DataFrame.notna.rst | 6 + .../api_docs/api/pandas.DataFrame.notnull.rst | 6 + .../api/pandas.DataFrame.nsmallest.rst | 6 + .../api_docs/api/pandas.DataFrame.nunique.rst | 6 + .../api_docs/api/pandas.DataFrame.pad.rst | 6 + .../api/pandas.DataFrame.pct_change.rst | 6 + .../api_docs/api/pandas.DataFrame.pipe.rst | 6 + .../api_docs/api/pandas.DataFrame.pivot.rst | 6 + .../api/pandas.DataFrame.pivot_table.rst | 6 + .../api/pandas.DataFrame.plot.area.rst | 6 + .../api/pandas.DataFrame.plot.bar.rst | 6 + .../api/pandas.DataFrame.plot.barh.rst | 6 + .../api/pandas.DataFrame.plot.box.rst | 6 + .../api/pandas.DataFrame.plot.density.rst | 6 + .../api/pandas.DataFrame.plot.hexbin.rst | 6 + .../api/pandas.DataFrame.plot.hist.rst | 6 + .../api/pandas.DataFrame.plot.kde.rst | 6 + .../api/pandas.DataFrame.plot.line.rst | 6 + .../api/pandas.DataFrame.plot.pie.rst | 6 + .../api_docs/api/pandas.DataFrame.plot.rst | 6 + .../api/pandas.DataFrame.plot.scatter.rst | 6 + .../api_docs/api/pandas.DataFrame.pop.rst | 6 + .../api_docs/api/pandas.DataFrame.pow.rst | 6 + .../api_docs/api/pandas.DataFrame.prod.rst | 6 + .../api_docs/api/pandas.DataFrame.product.rst | 6 + .../api/pandas.DataFrame.quantile.rst | 6 + .../api_docs/api/pandas.DataFrame.query.rst | 6 + .../api_docs/api/pandas.DataFrame.radd.rst | 6 + .../api_docs/api/pandas.DataFrame.rank.rst | 6 + .../api_docs/api/pandas.DataFrame.rdiv.rst | 6 + .../api_docs/api/pandas.DataFrame.reindex.rst | 6 + .../api/pandas.DataFrame.reindex_like.rst | 6 + .../api_docs/api/pandas.DataFrame.rename.rst | 6 + .../api/pandas.DataFrame.rename_axis.rst | 6 + .../api/pandas.DataFrame.reorder_levels.rst | 6 + .../api_docs/api/pandas.DataFrame.replace.rst | 6 + .../api/pandas.DataFrame.resample.rst | 6 + .../api/pandas.DataFrame.reset_index.rst | 6 + .../api/pandas.DataFrame.rfloordiv.rst | 6 + .../api_docs/api/pandas.DataFrame.rmod.rst | 6 + .../api_docs/api/pandas.DataFrame.rmul.rst | 6 + .../api_docs/api/pandas.DataFrame.rolling.rst | 6 + .../api_docs/api/pandas.DataFrame.round.rst | 6 + .../api_docs/api/pandas.DataFrame.rpow.rst | 6 + .../source/api_docs/api/pandas.DataFrame.rst | 236 + .../api_docs/api/pandas.DataFrame.rsub.rst | 6 + .../api/pandas.DataFrame.rtruediv.rst | 6 + .../api_docs/api/pandas.DataFrame.sample.rst | 6 + .../api/pandas.DataFrame.select_dtypes.rst | 6 + .../api_docs/api/pandas.DataFrame.sem.rst | 6 + .../api/pandas.DataFrame.set_axis.rst | 6 + .../api/pandas.DataFrame.set_flags.rst | 6 + .../api/pandas.DataFrame.set_index.rst | 6 + .../api_docs/api/pandas.DataFrame.shape.rst | 6 + .../api_docs/api/pandas.DataFrame.shift.rst | 6 + .../api_docs/api/pandas.DataFrame.size.rst | 6 + .../api_docs/api/pandas.DataFrame.skew.rst | 6 + .../api/pandas.DataFrame.slice_shift.rst | 6 + .../api/pandas.DataFrame.sort_index.rst | 6 + .../api/pandas.DataFrame.sort_values.rst | 6 + .../api/pandas.DataFrame.sparse.density.rst | 6 + .../pandas.DataFrame.sparse.from_spmatrix.rst | 6 + .../api/pandas.DataFrame.sparse.to_coo.rst | 6 + .../api/pandas.DataFrame.sparse.to_dense.rst | 6 + .../api_docs/api/pandas.DataFrame.squeeze.rst | 6 + .../api_docs/api/pandas.DataFrame.stack.rst | 6 + .../api_docs/api/pandas.DataFrame.std.rst | 6 + .../api_docs/api/pandas.DataFrame.style.rst | 6 + .../api_docs/api/pandas.DataFrame.sub.rst | 6 + .../api_docs/api/pandas.DataFrame.sum.rst | 6 + .../api/pandas.DataFrame.swapaxes.rst | 6 + .../api/pandas.DataFrame.swaplevel.rst | 6 + .../api_docs/api/pandas.DataFrame.tail.rst | 6 + .../api_docs/api/pandas.DataFrame.take.rst | 6 + .../api/pandas.DataFrame.to_clipboard.rst | 6 + .../api_docs/api/pandas.DataFrame.to_csv.rst | 6 + .../api_docs/api/pandas.DataFrame.to_dict.rst | 6 + .../api/pandas.DataFrame.to_excel.rst | 6 + .../api/pandas.DataFrame.to_feather.rst | 6 + .../api_docs/api/pandas.DataFrame.to_gbq.rst | 6 + .../api_docs/api/pandas.DataFrame.to_hdf.rst | 6 + .../api_docs/api/pandas.DataFrame.to_html.rst | 6 + .../api_docs/api/pandas.DataFrame.to_json.rst | 6 + .../api/pandas.DataFrame.to_latex.rst | 6 + .../api/pandas.DataFrame.to_markdown.rst | 6 + .../api/pandas.DataFrame.to_parquet.rst | 6 + .../api/pandas.DataFrame.to_period.rst | 6 + .../api/pandas.DataFrame.to_pickle.rst | 6 + .../api/pandas.DataFrame.to_records.rst | 6 + .../api_docs/api/pandas.DataFrame.to_sql.rst | 6 + .../api/pandas.DataFrame.to_stata.rst | 6 + .../api/pandas.DataFrame.to_string.rst | 6 + .../api/pandas.DataFrame.to_timestamp.rst | 6 + .../api/pandas.DataFrame.to_xarray.rst | 6 + .../api/pandas.DataFrame.transform.rst | 6 + .../api/pandas.DataFrame.transpose.rst | 6 + .../api_docs/api/pandas.DataFrame.truediv.rst | 6 + .../api/pandas.DataFrame.truncate.rst | 6 + .../api_docs/api/pandas.DataFrame.tshift.rst | 6 + .../api/pandas.DataFrame.tz_convert.rst | 6 + .../api/pandas.DataFrame.tz_localize.rst | 6 + .../api_docs/api/pandas.DataFrame.unstack.rst | 6 + .../api_docs/api/pandas.DataFrame.update.rst | 6 + .../api/pandas.DataFrame.value_counts.rst | 6 + .../api_docs/api/pandas.DataFrame.values.rst | 6 + .../api_docs/api/pandas.DataFrame.var.rst | 6 + .../api_docs/api/pandas.DataFrame.where.rst | 6 + .../api_docs/api/pandas.DataFrame.xs.rst | 6 + .../cudf/source/api_docs/api/pandas.Flags.rst | 28 + .../source/api_docs/cudf.DataFrame.drop.rst | 6 + .../source/api_docs/cudf.DataFrame.where.rst | 6 + docs/cudf/source/api_docs/dataframe.rst | 389 + .../generated/pandas.Series.array.rst | 6 + docs/cudf/source/api_docs/index.rst | 15 + .../source/api_docs/pandas.DataFrame.drop.rst | 6 + .../api_docs/pandas.DataFrame.groupby.rst | 6 + docs/cudf/source/api_docs/series.rst | 601 ++ docs/cudf/source/basics/basics.rst | 54 + docs/cudf/source/basics/dask-cudf.md | 78 + docs/cudf/source/basics/groupby.md | 200 + docs/cudf/source/basics/index.rst | 14 + docs/cudf/source/basics/internals.md | 194 + .../cudf/source/basics/io-supported-types.rst | 62 + docs/cudf/source/basics/io.rst | 11 + docs/cudf/source/conf.py | 36 +- docs/cudf/source/cudf.DataFrame.drop.rst | 6 + docs/cudf/source/cudf.DataFrame.where.rst | 6 + .../source/generated/cudf.DataFrame.drop.rst | 6 + .../source/generated/cudf.DataFrame.where.rst | 6 + .../generated/pandas.DataFrame.drop.rst | 6 + .../generated/pandas.DataFrame.groupby.rst | 6 + .../source/generated/pandas.Series.array.rst | 6 + docs/cudf/source/index.rst | 16 +- docs/cudf/source/pandas.DataFrame.drop.rst | 6 + docs/cudf/source/pandas.DataFrame.groupby.rst | 6 + .../source/user_guide/10min-cudf-cupy.ipynb | 1334 ++++ docs/cudf/source/user_guide/10min.ipynb | 6487 +++++++++++++++++ .../Working-with-missing-data.ipynb | 3466 +++++++++ .../source/user_guide/guide-to-udfs.ipynb | 1716 +++++ docs/cudf/source/user_guide/index.rst | 12 + 501 files changed, 18478 insertions(+), 282 deletions(-) create mode 100644 docs/cudf/source/_static/RAPIDS-logo-purple.png create mode 100644 docs/cudf/source/api/cudf.core.dataframe.DataFrame.rst create mode 100644 docs/cudf/source/api/cudf.core.dataframe.extract_col.rst create mode 100644 docs/cudf/source/api/cudf.core.dataframe.from_pandas.rst create mode 100644 docs/cudf/source/api/cudf.core.dataframe.merge.rst create mode 100644 docs/cudf/source/api_docs/api.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.T.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.__iter__.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.add.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.agg.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.all.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.any.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.append.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.assign.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.astype.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.at.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.clip.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.columns.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.copy.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.corr.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.count.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cov.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cummax.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cummin.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cumprod.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cumsum.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.describe.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.div.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.drop.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.drop_duplicates.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.dropna.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.dtypes.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.empty.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.equals.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.explode.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.fillna.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.floordiv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.from_records.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.groupby.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.head.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.iat.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.iloc.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.index.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.info.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.insert.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.isin.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.isna.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.isnull.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.iteritems.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.iterrows.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.itertuples.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.join.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.keys.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.kurt.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.kurtosis.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.loc.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mask.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.max.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mean.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.melt.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.memory_usage.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.merge.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.min.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mod.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mode.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mul.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.ndim.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.nlargest.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.notna.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.notnull.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.nsmallest.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.pipe.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.pivot.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.pop.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.pow.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.prod.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.product.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.quantile.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.query.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.radd.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rank.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rdiv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.reindex.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rename.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.replace.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.reset_index.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rfloordiv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rmod.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rmul.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rolling.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.round.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rpow.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rsub.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rtruediv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sample.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.select_dtypes.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.set_index.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.shape.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.shift.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.size.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.skew.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sort_index.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sort_values.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.stack.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.std.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sub.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sum.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.tail.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.take.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_csv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_dict.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_feather.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_hdf.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_json.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_parquet.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_records.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_string.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.transpose.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.truediv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.unstack.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.update.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.values.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.var.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.where.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.__array__.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.__iter__.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.abs.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.add.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.all.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.any.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.append.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.argsort.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.astype.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cat.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.clip.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.copy.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.corr.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.count.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cov.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cummax.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cummin.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cumprod.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cumsum.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.describe.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.diff.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.drop.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.drop_duplicates.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.dropna.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.dt.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.dtype.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.empty.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.eq.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.equals.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.explode.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.factorize.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.fillna.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.floordiv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.ge.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.groupby.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.gt.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.head.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.iloc.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.index.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.is_monotonic.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_decreasing.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_increasing.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.is_unique.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.isin.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.isna.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.isnull.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.items.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.iteritems.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.keys.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.kurt.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.kurtosis.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.le.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.loc.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.lt.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.map.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mask.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.max.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mean.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.median.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.memory_usage.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.min.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mod.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mode.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mul.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.name.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.ndim.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.ne.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.nlargest.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.notna.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.notnull.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.nsmallest.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.nunique.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.pipe.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.pow.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.prod.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.product.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.quantile.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.radd.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rank.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.reindex.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rename.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.repeat.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.replace.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.reset_index.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rfloordiv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rmod.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rmul.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rolling.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.round.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rpow.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rsub.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rtruediv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sample.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.searchsorted.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.shape.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.shift.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.size.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.skew.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sort_index.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sort_values.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.std.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.str.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sub.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sum.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.tail.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.take.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_dict.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_frame.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_hdf.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_json.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_list.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_string.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.truediv.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.unique.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.update.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.value_counts.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.values.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.var.rst create mode 100644 docs/cudf/source/api_docs/api/cudf.Series.where.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.T.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.__iter__.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.abs.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.add.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.add_prefix.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.add_suffix.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.agg.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.aggregate.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.align.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.all.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.any.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.append.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.apply.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.applymap.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.asfreq.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.asof.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.assign.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.astype.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.at.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.at_time.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.attrs.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.axes.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.backfill.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.between_time.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.bfill.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.bool.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.boxplot.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.clip.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.columns.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.combine.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.combine_first.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.compare.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.convert_dtypes.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.copy.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.corr.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.corrwith.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.count.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cov.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cummax.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cummin.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cumprod.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cumsum.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.describe.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.diff.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.div.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.dot.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.drop.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.drop_duplicates.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.droplevel.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.dropna.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.dtypes.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.duplicated.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.empty.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.eq.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.equals.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.eval.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ewm.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.expanding.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.explode.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ffill.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.fillna.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.filter.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.first.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.first_valid_index.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.floordiv.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.from_dict.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.from_records.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ge.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.get.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.groupby.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.gt.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.head.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.hist.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.iat.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.idxmax.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.idxmin.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.iloc.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.index.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.infer_objects.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.info.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.insert.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.interpolate.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.isin.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.isna.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.isnull.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.items.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.iteritems.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.iterrows.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.itertuples.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.join.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.keys.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.kurt.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.kurtosis.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.last.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.last_valid_index.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.le.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.loc.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.lookup.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.lt.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mad.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mask.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.max.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mean.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.median.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.melt.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.memory_usage.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.merge.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.min.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mod.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mode.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mul.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ndim.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ne.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.nlargest.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.notna.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.notnull.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.nsmallest.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.nunique.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pad.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pct_change.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pipe.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pivot.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pivot_table.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.area.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.bar.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.barh.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.box.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.density.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hexbin.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hist.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.kde.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.line.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.pie.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.scatter.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pop.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pow.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.prod.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.product.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.quantile.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.query.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.radd.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rank.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rdiv.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.reindex.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.reindex_like.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rename.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rename_axis.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.reorder_levels.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.replace.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.resample.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.reset_index.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rfloordiv.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rmod.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rmul.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rolling.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.round.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rpow.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rsub.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rtruediv.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sample.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.select_dtypes.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sem.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.set_axis.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.set_flags.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.set_index.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.shape.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.shift.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.size.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.skew.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.slice_shift.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sort_index.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sort_values.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.density.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.from_spmatrix.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_coo.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_dense.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.squeeze.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.stack.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.std.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.style.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sub.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sum.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.swapaxes.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.swaplevel.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.tail.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.take.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_clipboard.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_csv.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_dict.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_excel.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_feather.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_gbq.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_hdf.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_html.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_json.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_latex.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_markdown.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_parquet.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_period.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_pickle.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_records.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_sql.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_stata.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_string.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_timestamp.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_xarray.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.transform.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.transpose.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.truediv.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.truncate.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.tshift.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.tz_convert.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.tz_localize.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.unstack.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.update.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.value_counts.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.values.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.var.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.where.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.xs.rst create mode 100644 docs/cudf/source/api_docs/api/pandas.Flags.rst create mode 100644 docs/cudf/source/api_docs/cudf.DataFrame.drop.rst create mode 100644 docs/cudf/source/api_docs/cudf.DataFrame.where.rst create mode 100644 docs/cudf/source/api_docs/dataframe.rst create mode 100644 docs/cudf/source/api_docs/generated/pandas.Series.array.rst create mode 100644 docs/cudf/source/api_docs/index.rst create mode 100644 docs/cudf/source/api_docs/pandas.DataFrame.drop.rst create mode 100644 docs/cudf/source/api_docs/pandas.DataFrame.groupby.rst create mode 100644 docs/cudf/source/api_docs/series.rst create mode 100644 docs/cudf/source/basics/basics.rst create mode 100644 docs/cudf/source/basics/dask-cudf.md create mode 100644 docs/cudf/source/basics/groupby.md create mode 100644 docs/cudf/source/basics/index.rst create mode 100644 docs/cudf/source/basics/internals.md create mode 100644 docs/cudf/source/basics/io-supported-types.rst create mode 100644 docs/cudf/source/basics/io.rst create mode 100644 docs/cudf/source/cudf.DataFrame.drop.rst create mode 100644 docs/cudf/source/cudf.DataFrame.where.rst create mode 100644 docs/cudf/source/generated/cudf.DataFrame.drop.rst create mode 100644 docs/cudf/source/generated/cudf.DataFrame.where.rst create mode 100644 docs/cudf/source/generated/pandas.DataFrame.drop.rst create mode 100644 docs/cudf/source/generated/pandas.DataFrame.groupby.rst create mode 100644 docs/cudf/source/generated/pandas.Series.array.rst create mode 100644 docs/cudf/source/pandas.DataFrame.drop.rst create mode 100644 docs/cudf/source/pandas.DataFrame.groupby.rst create mode 100644 docs/cudf/source/user_guide/10min-cudf-cupy.ipynb create mode 100644 docs/cudf/source/user_guide/10min.ipynb create mode 100644 docs/cudf/source/user_guide/Working-with-missing-data.ipynb create mode 100644 docs/cudf/source/user_guide/guide-to-udfs.ipynb create mode 100644 docs/cudf/source/user_guide/index.rst diff --git a/docs/cudf/source/_static/RAPIDS-logo-purple.png b/docs/cudf/source/_static/RAPIDS-logo-purple.png new file mode 100644 index 0000000000000000000000000000000000000000..d884e01374dcd5e62db937b24990074d2f584ff3 GIT binary patch literal 22593 zcmeEu_dk{IAOC%hnH?Ers|Xn-E3&01B&%an$d0V6Lq?HQcCsoXyX<)mB3TF7>yW*V zJuPJzQeB{eAc*$v9W^}&qCi3ryor(= z{Es44H}Rh{Zg(DeKoB(}@eizTBJc!)c%Zv#%KAQ^RwpK$IWz{zrdz%|vQ=FmS4oJjo1#VU@g{`ZJvBSruB1SwM@VgEgbFhC^#J(2Q4L^8n-r3ysz z-xEV9#P;744<$_ezb7RIxaEIONS^;~NaV!-c=%5r{--wo*zi9~`NxL;Ifs92_@8t5 zj}8BG4*#E=L&lYWMD2;Q5QJPf`!BR-9Tsl%IeL>pkY&KX$ep2vq6!A#e^B6Gz}LJ? z3gW(T?7v8VC7A&Pd`sH@UVN@%3HA|S{I`!J0~{a)`QU%=<%SX8n-V4*&WRE6!?`Dv z!GE06CTm0>wE^#?fpF|-&WL<~u`e@{|M~Xecq$QEMD}_w38pY(se@s;>e#GA!7Nw3 zqm}fiAPbqv=1TX{osFTxbZ*ul+wIMCh?M6ab&*>?A(=D5uq$`5S*(K5LFRKo4Q;#| z%Iea~q7+Ts34}u{DV=mff)*5c`M(7}ZbP3!(a=3!qaD%H5e>49xOERU|N3)O`E2A@ z;vlvpH7tH$-u90#|0)LOcG~k)Cr2rhK`W<=RiwqC^x)HGbrY+|Esg%8(#aZ~Lp(v& znc}eHBg~BvzJawfaEV(udXIX)aMdSUKGu6Pry_6CtlESsv{=3a7N-i)tQrIwE9eNO zR&DW4h&eBc+O~adnQwL?Eb^1$?j3sUdTK7+7$*PetT}U=V3HDdbyg^f8gsU*%Iv0p zg-^Jf?$@&zN@Og9kK$J8FLjc5mqKGgf!U+x!@8^|ha=kt=8lJ*ejn9&{8LG+(zzif zAY#8i-b>fBc3PPQvYwaf18(hkkb{-Po#kuTG_&8(uf@0*Cy`IwU3r5J@lxEPzMq6z>r3<8=n2r{q+smUP zAC`cLm#|7v`Fu{vx5}IbsHW8#rD%w}s6BGwnA!IDy74;IB#nC6O|0cNXMA^9Lu~aUs+YH7VXH`p zZMeFDU1#@cN^`~d)vgcUUn@P-SAMGxm(Jd3#rR7M4U=Kck&USkHDe5DVT$R5AdeN( zzLtkFS<^kfptSn#pts{;d^G8_M!u^t7&4>?P*3=tzeJHP-}#fLL2F2uF+3@^A|+`8~|aefy_FXWld zuWeTJMUV`=>-(*@`&OOg^7m_a&j_Kz;E%4#n6myw!>p=gF*?hLB| zoOkH<6cF5-WX+jKtb2rXgc5T8GDyq{*raF!*K0R-a_^gDKmN14q%E=)4CfS${)jto zAjK>G)BPQGXz+Rbal!JCJxAK#OWg1Wv@|z#K%Pf0*Pc?5DCL~|Dta-8UFzjkT(I8A ztAdvdY=31DH(zZx;S05Zul5}*&a)Ww%(%d&1QuCr^J6|dbN^KTJB3rD=m0?~tn}MS zZBUt#=345kWag1}_7V=}= z?|6GDzK%k_i@BO4yA`|4_FbcIlQRb$HHWfAymu$tHf;Z6k0F zGhktIlqT4XYe2b~Aaj92l~m7|R$nBweVAt??QI#$l{;K=)^~}V!I{Uty<@Z3_0X5SEcLV49~Wm+*TSO%7i;n`|VbBo#1EAQJ0Z7 z6nwHjlFe@rH;!8v+43}!ReJf6b3XkEa+%0CMLPFm9s!LP+B< z8UcxuHJF%{+UL_5!oeI1=}(b)#05e6YdBfysl@^r)70<@|qLmDY zWCtOXV?#gqc|P`_qg*uJeY6QUhb=FkIGx36ArzSg+>j>1_8Aud^czNz6lRPiNWaz( z*+``I;rUG6s7ic0IiKy`G)cG#HJ&h?kwOQRqoZ!W= z*KcXQP&;y5$OMFv0YdNn3U{t`0thPea7;;;E`9*Gitn_X=grGr#BA>qZ0D#M{`63d zX3}T)oIa0&EU!14_HB=tGK_zylXV|I@=R)DfGi{rQ%QPh4UAN#wrF|XK6aHyg&|oA zGfePT2akLu_64)(1+!ufF*zqF3U0gY&2oW`fp22lL8pAD=L!4LgP%>dZLnFDE0gTV zfaUq^YHGUGd)Uj|*KtppRW@3j6hv|MO4#U>i;6Wvv4@HKCbvM$9{U<1aIBzL!sgHU zV4(g-_RI23UqAH9nbwwSus%!T%(^Z6CD?ibMKZNpY~Ai;r{iN9YLw@|1yXV>9QWf#ubpU>sWf<6sCE$T)xmhD{qj#~~6V%>866^5yF zI9WDVHfF6BD#;>=82n-39V>U%3^N~Ou&2#`%1Jo#n~j_{@-2Mskgy%gNpB-#yM3*V z;>uep05bT}S<<^6R&H6p-1^X*y1_k(bL&q}v6Cph{4W6S>G?56`J^E0+e6t>N+gWfava#_9bKZ9aWzr7T<$(4p~ zgPx=jdFP%o!x}P0@$Jr=`-{eJn@#Uh>R;g&t?i6w!BU49 zaV$LJfG{6krbT5b9Vj-Nnygfot8;JuIXc?qk4ZSgc@pR%NJ*HqYr>Wh0wF38Vl6ke zTG!keI@3e-l;>m<5E+Rrezfp51IMUH%anJgHcX7eAJr+sNBo@*YbjqI)|40o`v!QF zZG}nEVAvIBNf-w(hXFM@)YvZw_0N?qZ%vhmg4 z9>30m%;b}CRcY{6)KHi~rW!l^Iu%an9uYS(;9A z?l!phth?i)kuOQ6r@-M2$?v&8g)qUET-Lu+-sK<|-}<-0-R-`=*g~mo0b)=HOo`}zRVsY;o_f{ zPXfZnqq7DWh6CwzX@GN7|MAqZSt_J~ePc#B!*$6aX@t{B0${UX-!z;&?4FH}ZReV0 zkQ?=F)hp*N%iMb8dpSEZ5?_wn_q{+Ld@Bg14v?#EL94WLnBFzDnwb4k-buZsGm!X~$~jj#-zUpSUmjm2VByJjzgYJM{fq)|(ucr;EHR()Gl({_@Tv zq@=kp40m6l4jt}Jd7@=A)C(>~3NF^t!A>qhej|@@g=NiduNab{5Zo{%M7>BwWT^Td z`ttSBwB~Gq8@l)ec8X+Pyrrt3vE~?>2zb>0ce|;(bAD#msjxI8YEP` zjLXifwQa7@ejgIHe^|Ds`Z@Kj5gAQ15Wj9H%R)$YWV)=0%++s4-ut4TgI-dX8y=H9 z0}7n`K!Tdw`Q`E-@0sv!qVDtxVddF{`?AfnILRRl6BOGHVyxs~f}+7Vc}*B}&|6MF zb}c1_*H730ee)hH7G$})spB8M#piJ{%k93_$NQaz-hX!|XEs0eH+ezv_l4JYJeGo$ z+$qPcSraY{(e$Rrx}tgxU#&SjVJRuUEG`J7T>irn7=i8f-3&R8cuYdgG_@P*rf+Ab{YF`h4@QxW25$kEpiiuEbX;~+20|QNYqohenIPr z;p-plx_^N6D>YfJES6M6`s1Z35%y!bWY0N2cfB>Md^A4v{N1k0h{67|uH+CWaoYnfq*^wgP7ZSZU%pM^SkSyosPU$2Qb z95%~P6hCk z0-k9xVVIV5M9=>p&k;UImp3{XArV_2+lzE#jwII>~bpj&(= zSP@c^bZ1{oner|OhpNJQwHAbnA*C!L4J%QGv^Dv8k#`Kx5di|A^qh2OhmQ7_aItvU zpYgRb5P*k(232fy&h|y;p1KYL=30hAvvlG~7p4~=BL?{jU6_9c6&v!(zwI92kJWsO z+`f7R*a^WQ`yM&z)Nft$nXvGE9Oew5+*g3Wsin_hL2~rSZU5;z?5!HUbT^Xi3jpDTk! zkIt9Amm2SwbReTu>H>u8>##L*sXLtd$)#-E)p}U=|7HOSBUPBiLGhc^)|&ez7I|T? z+#7^LPc}DB5+zlzPK^h$T{vl8lIRU3PiE^wqX@be7H)XvEW6Z^y~h{ei{KdbMxG2DBp3~V8hToAUn4vqn5 zs#FGx5qjH&=bk<@d%;gio^cznfzM|IZFW+&MdcSe_9mUQ<}YAjePvHn#Z#az4zo(< zO+bNyuXju-v8Y@YO|44l|Ab#0k^~ryA=oQ=_2V*@b}k{{y^xf)Go*aymy57`Dnrtl zP=&TW?z%MwH_6+lGyb>H|clk$90C^%IY!Wf`u$!wPe!y zaPf-!vD8cG(W0VU49%bZ>d8KS`Ai(@2NeP(Qprxu(Kgbx7r(azPnwm6W*nr_oo22g z*cXR@_DM?U!PD+ic5*#^qFA4h500cfGQnnPMf`%UB!dW&w`=xdj_!xe`@En(alB4+ zY1m@_O!Hp|wDJ`ZQgjHD*KNwqG&RLGz2?$6{fSV`?c0*#&}Cu-sbzsrC73wH^Yn!f zijdW2y;5)VVX7QpRmnpYOD|_B`k;4N2apW*B?xL|OX5Z(DYP_7&ww!dyPukP@u$x# zP(BY6ptrEw{tlPz3x5x>{k7+Xr1U~kL7jj}A9gzNOmhhZ$^}#*uq$DJR)@JTgM&kq z?bZ82h$`>ut@ayy&TO0x>h!xE5+?|sYzC;56`Z`?xq(FF&&@A7`qYwhT_Ogei4 zBoh;mC~81PQbJFRRs)*5#a*LkcFbO!Uqv<^kJMFFr1uJulcIIMHL#}((p{(ux)~{+ zJR)!QHLk&%Vwf@Aj;w9wi3lp_^x5CBk68yjY!%G)eb`GKWv$ z%UI41_S=v_YL?cE7C1AVuzc z(^P|2I;hP_im@S`iI_cJ{&@duW^FdY4zpz~rzhN_*NoLaz^=)5uN(`jZi`;1f#8;r zJfz`nh$FKaM~dpP5m#B7hCcoa$OR&DOG(HJilDs?!x7XS>kU@@_%W9QQCndzIOh+0 zmB=oz3r;78e5MTMr87Xwd6eYRn7c4JuhKv|pi`D{AwW9qs)EDf+lhj@ChfV{f%ih4 z8@Ahj$6jfquxC@TE%)lR=zUFIA>}0vrx>`f{tZ4^ug-RYZP4Nl$DfVRfgLb{IBw8Z zm>%S`>hi6B1Z7{E5J_*e?f>^5cB>Q3RSGP!&psNGAj@KeH%B(d8wtI`cF?h}VIE9! zAAZ^B`_(dTS@;=dOvJ!G)h!Ro9%btE{@s@^w;xC;*B`o_2UlQ2lor-e0g$pETr616 zPD;xJ-=;}7E#q4Xm56R`8J`*#I;z>|t6sK*fR3FLNw=(T-*!c~TPUwg>TW5&yd%=; zYP-GjY;NGXI<+-0%f=&@PtoR2!Uo9op@*PM=J-QfNHBvc^0lZVY~hPR^_uRb2r;5rUN%>Zw!fE;E^Jd@jEIep&6TrFYOw=)y{17el)nD zBSlQ|^`3<53^i3B?#}D4c!_f27F!5I;l-O*E2GRwn^;xDN)mdZU80YQ(5LEw+S z8LNwz`#zqwlU;$@XXTK+OaH`pS*UMx9Q$7NN&g*6ngMo%GPWjq;5>YqJA!H&gSeee z6$)Fv2<|}HY;A39{53AnvFxVZqo_4OfmqfbcEJrNZ(fii-Gkx~XT~!~ zE0t=h(=SDQx?;TI(Rm7xc2Gl*;;g3SUj|ANeKk-$c)Vqryi(_5|3hSkMAXQ_`qyV? zO?`G~K{NP{Rg1F16nym&XmpAfc`~1c9xC#R(;rkFFK8&6+O?LXLX>R&f2 zb1OO`A@vR1oM>lV_WaCwVO683id!X^Dbasc_k89o!-1THdZt3@VbwY#ymF35F_t!B zi;yy=o)4O?7lHKUBtC6*W^OPno>&CpTz%=VL#F<7D^3<=kc{STfsp0YZv}S2-WLNK zO=b$ZB$*%s_QS_Buq|ICb(!qd^ThpX&~lCLfDIQ0)igj(S}rTw*InD|825_iQVng7 zB}vH}ZFNxeI}$rmm=BSE%MT|@0uDV+RE*wJd?Q>guo_nnJWbiAjX$q4)xdt*ZMSar zhi)5#CbIcOxT>QOT`|ll5wNp-jy5q)b<)*+@wkt?^S1u!dipX}WiEPROex~d+i*iW z*t|sZth?6kpOf>V2LYg5*At2+$~j%unCd0PM#KV}l{k7HSTQEfgtp=*|H?YBy3#C& zH}Y7XRqVs`@u6(bzC@@2fRb>X4<@9d1WLb#P7kvCpf6YNPU)ZeORe#Tl?(25yCt#< zBv)xVJna==3zR)fHrL&6MaOg6RH3!z->_Z$I5iM`3&{8qkix zPon(Sr%I)IT|J(Z6lp>_#J1H_{AW!&#@59`x#uE-Y<~Q2K%~;yC z-ffxJM!wC*VX9v2)!(maI)x&4hXr5TIihUrp#7fv9C3K80vZt95q zMeGFK+|LWzFWzVhqxLTK9ZuLWY7{!verf2qeJ^0Ba|7w>F1fLFTJgJehoW>|2rS{GxdlSr zrAaBX-MN1e0de3Lt{~6>HCt3@FSTPlxnqY&kHws}oYIqUl8kD{wrIa!7w9|;fReG( zpOyY|n{!GFNU$VmU+QVjd%OF$^UhT{TZTU`J8UIp08Z;>Bq;i+yFl=j)B7V%3JnoK zI0f7L6xBipC|SzDIXGMdcq=r)Z1`*Hy)vALKeMlk-HS?MNRxd}=QL8^d>QAhVwr2P zo|JVVY2iZ16gHybZ-`}v0SM~DVuk5B%!PQD;X_pV12P(174`PICD9Zr^=VJ)_pf7~ zU!^2RfZ&J3S4*FI+ys)GP@voqsdu$+RzyIn9?&ogMaL91Xke(O(aJyXJ!}z~OD1hp z@*v(xF-Pb*KeNd5D3(YCp~`!xfYlY|o2zoX;w9gCdtR(vZ%RA^n!Hvbw^CwKpEnG! ze5Ab^G9~=Q7r97W&8;~5DEl6d*s3<%>=@vegn+a};G|FSwSJIWx7 z1fC|Qs8QdESyp05Cu>BgZgYV8Y)oEoFfk`9VORC+c4RM8*Tu7|rTKAubDnvYrbCO< z$I^?4nTbF?#&jWFl=Ibe?vgl@0kqN@aZSK}^;YG8P*-2lDSOA5B{l{)wr`WPDWgR! zPW;{xhe>)j-357zvd%YR$=NSA2Y#9xL5rEWW*8E=BRcJ>?7ouy5+K?xCj}Sd3Q(UY zTf{r9Z^NbHrMHS--r|N^h!>=9ZS(|X#RZBBBuUbI`AS%n!&oY%5>LO?gxyjz$(6J? zjK^BMmvW{%b}6_A`E(XG^OeX>yUOHk8Fz@MJ(bc)^wq`%g*O`%@#yTV^e~RLz#Twn zM6Oja5fUzAR%FlYIhRioe0fJX9yikAlEzN_H<#WQlj!iA(EhHU9kJbhE}0TD@wvf( zxsxe%*Q`d0l`Z!*NFAl5ow^&&5m*EexqI3WF4123;9E556s*Z&7(OkAOB&~T@qqcn zEREU!v@MA4SMvswqjUxf@$Q{|DWxruEKok7Yl~y{j>uRz?b4rkqu_)yK~~hEDE;Np z=rI()#q^~icH7s+|L}eM#gP@KFpCv~)QPsXjlW(N{X^Cyt&UBOYDZ*(*16r#+}Wf` zXe!4tpEwGr(j z7pGekWoxN8Rw;oKvI?t%0Qr=h$|g%LHy%=QO#p_FyI1G&RI}|YB2ZxMmgbDR%x5aL zSF?%ZFZ2$m*%=7%+7b?O%>1_w=g(a|-`}ye#i1fznjWHMoX^5ZS^!!`4^3pnHKcU?33D_c z8$d<3#UFSFH;k!@{LGMTwi`x8MDkT~NnJc4%f8Mr%t_}?OVRpJ6DwLCX~;Pqe$gfc z3GfMQsw1#nB)u4QVDTW-e|hiH@I=YGlj~q4p>T4+j>e43Qnd+NAtSEwJN&JcR^eyX zfyU;Brf7E7(PvoM9*VfR^mPhI-ro3A{os1_&0gEwhJA?x1270D9d^n&WCgN#X?+qi z5W*K&OL)O_N*N;S!ePdcMMOPF8#I@S6@RTizG4@USmfyi(*$)Rh~Q%1a*#eEU(Z-) zAc)xgx{NJu+xT;*w1VzP)Rf(0DkvP2Zv>QxZdyz^z};r%LQNFyOx9MKs`Q%_JzkFX zEKxEgL4ClS^l*ny9s-gpV0ceGt)8i+KK+*fd(j_E{E1om-~LrQN;{wKDh?_C2?TMk z2j^*f669T$i{C%2Rynv~bVdV#t@z7RJQs>a&w}lGYL5L!_rgjFU*rXiF|XD8eW1Sq zT^pZ$gaWiMPKz-Z6ftno<@Wx&ir=kOi%__ zht1_sRLPS7X|A_NW(U-`9L%VC5CCr|`U=s!k!OSnGVwFaplXvnS_unCY`80!N_MPG zhQ@d>ui8y}qDKZ9dIO4ZRh@+wf<-#m7g04ukLi9l+JOsimU{I@El=4`{BZEF3ZTEI z4%qVEj5;WVq3~#5N^J6xMOngc?I_g|g5jp{N!SDfVGHSMXaP?U0SQdoK%?TU>elLz z6?Tb+Ql)S_U82)FbY_bFr!WTnqzSm z6QziIkk7W~++0zM#-XId6s%kMp`P0Bn0x zXGf%eo)h8lEB@eQ)2ouql+$9RS6Qt&Z{ho8XLJoY!9;+s{t}mR6r>?|Yc8?IFUn=o z-YD?~1Rye~WQy!zC6Pa9=MH~Y8U!)r>%*D@LCw+^E1DO;*&U2OOMu_AVf$8kL)w_F z&Mcz!7&;~{?F*Mr^o#zq-%+?2-9A$z?RxnIKp@ z?h#wT{T(mkA(-E;P9i(f9ge&p-%f*)u9<0u4u(4c#)nimQ`Pv5p6)|U z6T2%fafy;_F)+V4dZCzz_-(sg-QO?%auqmpY}i;(4YLA$D6`HzcgwrdrcL6(mvS9O z$n~zdE{b}y>m=sSU!1>cdHa3MrJT4vZ{M_y4$}d?lQ1}C(4#+EtTfs0HEd`cH(j?& za9dv4&42`@25S5k1SgxgPe4BC!`SgXfISLzI9-S-=oJ<;#gl`?_lU~Z(uY-^}4 z)jlh|0RZ+p)+^ekgJPq%%AoNdnfE~kpsXAB5*`8Ojk{il_wvo+<4$Frlmhj;sCUm2 zny4ZYa=Uez<)4==*FH;M{T%!^olpME{sp#_vxV&0aNvja@40FEOU7 z$~d>Y)>?9QlfP%*W7#e&W1V{#*AO^`RUEzgQYI7yeV+|TFbbI}e({9#+`qaI;ktam zzS)a(y_;tO4Mev{q&*`I@p7cx#m6plo>KewOB`i5EM+kgZ(mRITWg#}7M~^j;FUmT zT52idp00a&E(YSezJWmo3ZRbZa;`j(0+&>(dkWt&lL*9xIComLB_!Wz3xf=p2tUXk z-1b+p_x0}*|G2=nA?dq8_zlyw6eCPuCn>pFY3M(MR0RD`Gsrn7I}GVEPm7Jc zvx~ZI+fTtu+d}TU^P8S_Rr*$#kfr4USW?htfKC*)c(q{qPA`=>l^{b2Q*u6XF*TtK zf?beKi9yka5v2Mb5p{eG+J{V|pXmq=hAn z4(TB+O6fZ6(JqpSm3&bdV`vT=zD_)^z0en7-|>a2%B|X9|L)2VEP>c*|A}~? z40=*I#r&$4EtKdY5Kj+2f-`Iep0{`eO>Jm{B&>8x2s)>9&_SmjNT@dRCA-k$M#)j1 zbxZsj*xLdqWQw%NRW&gq^UO;N>*Ob0!7tB3ad!?n-f~hjpwkV_gFzeDhxoCLDbhHRxYnk3pOO^*-+G~@=l2wbng%7x83ULYxFo4}y(*#)<6dZg$RD?GExn2YXF#V3m3V_U1&e7_j5_9a zrEocwBBm6ai%2s7ib{&MF1v%eMhK8I{aEb6xZqI8eiLB7V)V}lmh=ya)37X?`s%}+V>JFYm zO4Bg3wmpm%T}o?!7L2AYSbV$vHxaLkr^a|BE!dHurOpH;-@|6PhCg6!+(56j3p1_M zKD)o?7uGnoiQ8YVeI6g=m7AXDg0v$F;#<`1BG3di8m_r53@`9}7wEeYVkYopQipX(cg=e~ieQZ#?O^1=waeu}fz?}@S6jdK3m#!Ep9J~sdyn6CUxWFymT%~jf4#`S z3G=*|!4W)5sbktU1qd(uN6Kz*e|vQ&M)FtgrMALg3qpgvrbsOnlS59y|7T8KV&w0^c2*Ob1v z9G|NGf%!X*Ys#0%ACogkS9Y2F2vt|isPij>Vb|p#0dZOWDj6+?h>W*bsvKLikR_DC!p&Zzw>_e=i%t8K3r}mv+<42r zNIt~{y&?LUE4EQ{4)cb4H)@7unpDr^SeoV_21uZX(mNewKLu#bas>@fy_lS}ZiOeX zu=nSMeGUQKger*FN0THaD}??^gLmg1OxVMlCBIno zDSdXS_=m2Zbn(-tpN}dt@fE?hh*nJf#5L8I#O^5LeSF>+uI9Zm=YNYIs~sF?Hws@* z7fIL%Xh%4ZnAQF1a=h@_vG@4)q>H1X9hwV7Nj5WmSmlqrz50MquAdOQ*NWR>7#czo zBsmCP@nsw?->+|0W^R}}b37T!ZjiA~0h!hpqJ;~yM(e}8X~QOT7-7Cqg_D+8pm*rW zI8b^EJpZEX0$5FIT1fsd@j-AqmzwZ;V)UouzB5W>;^H>}E9I`4JjY(3x~)^l)sVPs zF&rbkCrAP9@rIORqQyw-Q$@15i*8dCk43X?37eb?V1aL#mrVr@^_UpqsaL@V;>grw zkpw?j<@I;^WG6OoP`v`-53Q|UwOyUL+`#?q=^JC!{N-zu!QNNb=~0rq*<<)axYsJH z00)v&&UJR0@Z@}3C(HdctL=x^-YI-Ks66Bt+6`POkkveqz2)fSDgs&RSg#~9zDTPf z*OI*TGQg#P&%^9Y@TvD=E$TcbCchU~`4C343Wm>-sX`>VnHJ&bWL0$hcIJlY7rf-@ zvxEcr@lF;*kR5}__-XvkT6~M3M(-#+WO+^%)O!_U#G{Lj_5W14RT!^F$hn>z;aMC)x;6JnqwE&w3d?xq^92mzw9W)cW!yA2jGGCv

1np#V%;7Q6~;Fg zxT)hjZEy;&1;tj&bi`=Hk6oduPLkPC0ndZ)og_O$d8`mJ;uU1ad_=I&Keja=ufAU% zLn88gLZ3zcL}HJ8kmZQN`g@!2)pO5Ki#3N%(+g*YMZhinn3%oNFI2+?zEim5Uys0( zKfBuk1*Gk)apuCwq4n!K6VFH+xWCkIqrw_=+)ezZn8wD-uGxOyxR@l6p)Ys(l|GA$H(>BLODi-f+y0l z9xlDQtH1z!;Bq%5bHk;k$S-*(wV_-?7NR{)jKbXcam(TxJh1AevC@&tV(nOw$$Dx) z@)KorkJzvM;?RZ|>pHSk;#@&}l+e_c(n$E6$%g>}EV@)8E@O1#tuyvux@B@HR~RHX&Brm-|k3(Zxk^A^Zs)l3we$!znfng8Z)Q)RDavnp%*1UA%vmGErOgGRFy{ z1$J-Kl&kQQg<8)sU(@8w?V7UaE^Cv2qXhTv`ic_@-wV5MP6V1R0k=Hm^YK8$+=+uX zYf;ZF{$Tq=N(g)a>yx%!5T*8o>R^b`+@-GVVf~xp)3*|QZ0ny9RZ&Qv`jDODAQa|- zf1yd2YTcc>t}cCl0Nz%?hp@=sNg_2d56H%wcxJzrX{3qhRWfv&;{C?%kV&JClVqSk zGZv>@dkV-WA*F9b=uoik)w8sEi(i!vl?7|=sq=Y3!2!py!VvoBW9LNA#MM}mWD-kI za(vx#q4F-)^Xb~&iA~~cFhF{R<%KP~juI{yW@oz8h z3m+fc5Z>k?JYSUFqacB%7K-em<`MO2>QN?^506|RB~D_hp{)-Ka+q!BxI`O&o!&>m zg(LIK*%R!gz+x~L1##I;S`RZ}S`f<3*|s$>=nqk`^AHGSJU@azM|`J)VTq^X_^?|Z zk=9$(X@KV6guG^4yCuGERt&5wmA1HB6&Uv5YE#H-VGt9jyI6j(X254==En-^>*2L* z6JL)_aM~i?z{MW~MfH&8Vkx3j6Kzmq(O-Jfs^4m&`$05J`mr6p$9(g3`$%3(U~5h} zD#kCV#ChDQHz0~Z2F$NQGiM1ZTu-pr`Sg^(BgTtlX5NDi(vW2h@p6VV{1TNWc=Pc6 zkG{Ad8!>K``QLs_KF|yUSROsc`M-2nA{;Sda)ZJXvWCV2OMAr zi)ku=O27ag&FmAD_+1|5fI+ng;+bITJ4?7{0w5>8rWHAh&`<53r;Lh3#EG3Nelh;8 zABJNA8$5doMGxjP2(bnPe}V1Cgr`>*p($T*az`?5KgV1n%BijBDy6S;8k9uLwaats zCAVzq@^AS6HkI_LeioYD#WFE8%S;C(n5J<+U44)oh@@;bKNEacTN=vvH?7L>axG!> zu7PAVkYUN9+e{dTiybvg+`C==j6Js{GKcA$vS?s7H?PW6<=GF)w}I#lh@&d833*7J z@P@ql$NHyzpOU~QTi||5(S85$DI1`xY2$-cM-jp{miEypU7Q zRgWvK$Y1q;)1LcbvEPQ3S2t+U_@06-yxlme7-d4c~v`Z4Ui? z=4gtCfhbj!X&2rdFl2u3bS_vQMpCAAK<#Tv9rWuv3%s%{DdniJ2A0GM<$*Y!akKZs z{!n<)cr7I&=Zc5S^Ycv1IEiEdf{yT_3REK2WYIR$?DU=Hh5TvE*H2?3*0O<4Pp|6K z1BD?DIA$2Ke!o!4{4mwDlLJcWnNzm@j1$@%(IcZNxqCn@TQFK{J2_~p2Oxb!K1xa| zEk_i{K@hMZSC3RQd8{aKtZUDFPWCTdi@v55af zPrSe4PRBs6-`UfRSbSPYZ|8m7*1co_7BaL_R47Wf`1HmpfSckw}F5 z>f^b7!Of9nfNPI?NAtP@!tdn;P5-(c6h3+HE7>h#VIoue@&TUP*=(h}LK=5^qhdJa znOTcDDlagYAIR#CRmGx$w3N6!=h6B=YwtuG2jocot5nVNcg|0{2h8Fude?`{{&cAEO`*Z;XyW;0K91L+<_0}oWHhr?4iGV#`{B5MbE-_#S)&R<4*7)#jN+u9-^Na z`6MaDFz(3~D|>QH{Bg>Uz%;tgOpwy;CX1wo6nhHOkkrc2>Iw~p&Rc^ovD?FG6d8}e zH}6i;-Qu<9=noKaZv`C5aPf`2CJC}i43KNgG2Oig9BX#|bHl?P&j}!{^{SJC7hYh4 zkM1!CwNtE9F@Yl>JvDp8UGt-2?_Oq^1?SP6f^JS>IyF~ksF;aC-h=Rl+Vz_COVi1> zMrE$272m~7M+W?|x)6%m7mj*iF?lR)d90aaR54KXW=(QR`|Msk&w>A9VpL;?BKPF3 z_uZT~l?q3kbZ*ixaq$YwT>+Ohu^R!dGdGh9MvIEiQ2eRnk7xU>fg-8gI(;^qd zd^H`nztG^5ZL8rjq+(&?!X>i4zztx2^>XKDT%2K)s;DhBrVrZhgJy7-L;ATBf=Lffc=)9|${}$No0#<Nlm|x(8>^JKpuX_D=bxt@5S+8|O<{GKl@N3({fwqh#`M;=0Gh zpH-RSA7`i~zL>7~Av^Jd%G#)d^;Wydk~2wc~g;_etX%|4q5uUsu;px7R(y zb_1B^fr<VYUc^PI1z|?So)Vy(JIjpZF6Zik~ zib=UXyKV?A>-M_$|DWay))mdbMGg!MGo}DnMIVZ1P}BV;5f}OYd1yk-^vCXR_SS8_ zb@{h-{NwDWr-4=cCP@WOsJZ9Cb~89!{&(dUC(r~ zWitsP1)Q7)nlAk3(GsA!48otU-(0l2BoN4yTOY3@<|hqg0f7S(aDNemsRFSZ$nk)m z!8ZYM1|P&RAT2^*F9TT(D!}>)0zAP^0I?@Q0s+VxH4q#&qhSaRhS6jL4hDwNoB|Gq u(ZXT0aDdjdz`. + +Binary operator functions +~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.add + DataFrame.sub + DataFrame.mul + DataFrame.div + DataFrame.truediv + DataFrame.floordiv + DataFrame.mod + DataFrame.pow + DataFrame.dot + DataFrame.radd + DataFrame.rsub + DataFrame.rmul + DataFrame.rdiv + DataFrame.rtruediv + DataFrame.rfloordiv + DataFrame.rmod + DataFrame.rpow + DataFrame.lt + DataFrame.gt + DataFrame.le + DataFrame.ge + DataFrame.ne + DataFrame.eq + DataFrame.combine + DataFrame.combine_first + +Function application, GroupBy & window +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.apply + DataFrame.applymap + DataFrame.pipe + DataFrame.agg + DataFrame.aggregate + DataFrame.transform + DataFrame.groupby + DataFrame.rolling + DataFrame.expanding + DataFrame.ewm + +.. _api.dataframe.stats: + +Computations / descriptive stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.abs + DataFrame.all + DataFrame.any + DataFrame.clip + DataFrame.corr + DataFrame.corrwith + DataFrame.count + DataFrame.cov + DataFrame.cummax + DataFrame.cummin + DataFrame.cumprod + DataFrame.cumsum + DataFrame.describe + DataFrame.diff + DataFrame.eval + DataFrame.kurt + DataFrame.kurtosis + DataFrame.mad + DataFrame.max + DataFrame.mean + DataFrame.median + DataFrame.min + DataFrame.mode + DataFrame.pct_change + DataFrame.prod + DataFrame.product + DataFrame.quantile + DataFrame.rank + DataFrame.round + DataFrame.sem + DataFrame.skew + DataFrame.sum + DataFrame.std + DataFrame.var + DataFrame.nunique + DataFrame.value_counts + +Reindexing / selection / label manipulation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.add_prefix + DataFrame.add_suffix + DataFrame.align + DataFrame.at_time + DataFrame.between_time + DataFrame.drop + DataFrame.drop_duplicates + DataFrame.duplicated + DataFrame.equals + DataFrame.filter + DataFrame.first + DataFrame.head + DataFrame.idxmax + DataFrame.idxmin + DataFrame.last + DataFrame.reindex + DataFrame.reindex_like + DataFrame.rename + DataFrame.rename_axis + DataFrame.reset_index + DataFrame.sample + DataFrame.set_axis + DataFrame.set_index + DataFrame.tail + DataFrame.take + DataFrame.truncate + +.. _api.dataframe.missing: + +Missing data handling +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.backfill + DataFrame.bfill + DataFrame.dropna + DataFrame.ffill + DataFrame.fillna + DataFrame.interpolate + DataFrame.isna + DataFrame.isnull + DataFrame.notna + DataFrame.notnull + DataFrame.pad + DataFrame.replace + +Reshaping, sorting, transposing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.droplevel + DataFrame.pivot + DataFrame.pivot_table + DataFrame.reorder_levels + DataFrame.sort_values + DataFrame.sort_index + DataFrame.nlargest + DataFrame.nsmallest + DataFrame.swaplevel + DataFrame.stack + DataFrame.unstack + DataFrame.swapaxes + DataFrame.melt + DataFrame.explode + DataFrame.squeeze + DataFrame.to_xarray + DataFrame.T + DataFrame.transpose + +Combining / comparing / joining / merging +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.append + DataFrame.assign + DataFrame.compare + DataFrame.join + DataFrame.merge + DataFrame.update + +Time Series-related +~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.asfreq + DataFrame.asof + DataFrame.shift + DataFrame.slice_shift + DataFrame.tshift + DataFrame.first_valid_index + DataFrame.last_valid_index + DataFrame.resample + DataFrame.to_period + DataFrame.to_timestamp + DataFrame.tz_convert + DataFrame.tz_localize + +.. _api.frame.flags: + +Flags +~~~~~ + +Flags refer to attributes of the pandas object. Properties of the dataset (like +the date is was recorded, the URL it was accessed from, etc.) should be stored +in :attr:`DataFrame.attrs`. + +.. autosummary:: + :toctree: api/ + + Flags + + +.. _api.frame.metadata: + +Metadata +~~~~~~~~ + +:attr:`DataFrame.attrs` is a dictionary for storing global metadata for this DataFrame. + +.. warning:: ``DataFrame.attrs`` is considered experimental and may change without warning. + +.. autosummary:: + :toctree: api/ + + DataFrame.attrs + + +.. _api.dataframe.plotting: + +Plotting +~~~~~~~~ +``DataFrame.plot`` is both a callable method and a namespace attribute for +specific plotting methods of the form ``DataFrame.plot.``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_callable.rst + + DataFrame.plot + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + DataFrame.plot.area + DataFrame.plot.bar + DataFrame.plot.barh + DataFrame.plot.box + DataFrame.plot.density + DataFrame.plot.hexbin + DataFrame.plot.hist + DataFrame.plot.kde + DataFrame.plot.line + DataFrame.plot.pie + DataFrame.plot.scatter + +.. autosummary:: + :toctree: api/ + + DataFrame.boxplot + DataFrame.hist + + +.. _api.frame.sparse: + +Sparse accessor +~~~~~~~~~~~~~~~ + +Sparse-dtype specific methods and attributes are provided under the +``DataFrame.sparse`` accessor. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + DataFrame.sparse.density + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + DataFrame.sparse.from_spmatrix + DataFrame.sparse.to_coo + DataFrame.sparse.to_dense + + +Serialization / IO / conversion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.from_dict + DataFrame.from_records + DataFrame.to_parquet + DataFrame.to_pickle + DataFrame.to_csv + DataFrame.to_hdf + DataFrame.to_sql + DataFrame.to_dict + DataFrame.to_excel + DataFrame.to_json + DataFrame.to_html + DataFrame.to_feather + DataFrame.to_latex + DataFrame.to_stata + DataFrame.to_gbq + DataFrame.to_records + DataFrame.to_string + DataFrame.to_clipboard + DataFrame.to_markdown + DataFrame.style diff --git a/docs/cudf/source/api_docs/generated/pandas.Series.array.rst b/docs/cudf/source/api_docs/generated/pandas.Series.array.rst new file mode 100644 index 00000000000..e0954c01d1a --- /dev/null +++ b/docs/cudf/source/api_docs/generated/pandas.Series.array.rst @@ -0,0 +1,6 @@ +pandas.Series.array +=================== + +.. currentmodule:: pandas + +.. autoproperty:: Series.array \ No newline at end of file diff --git a/docs/cudf/source/api_docs/index.rst b/docs/cudf/source/api_docs/index.rst new file mode 100644 index 00000000000..94d9132d4c9 --- /dev/null +++ b/docs/cudf/source/api_docs/index.rst @@ -0,0 +1,15 @@ +============= +API reference +============= + +This is a simple demonstration site to show off a few visual +and structural elements of the theme. Click the sections on +the left sidebar to see how various elements look on this theme. + +.. toctree:: + :maxdepth: 2 + :caption: API Documentation + + series + dataframe + diff --git a/docs/cudf/source/api_docs/pandas.DataFrame.drop.rst b/docs/cudf/source/api_docs/pandas.DataFrame.drop.rst new file mode 100644 index 00000000000..9ee305369db --- /dev/null +++ b/docs/cudf/source/api_docs/pandas.DataFrame.drop.rst @@ -0,0 +1,6 @@ +pandas.DataFrame.drop +===================== + +.. currentmodule:: pandas + +.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/api_docs/pandas.DataFrame.groupby.rst b/docs/cudf/source/api_docs/pandas.DataFrame.groupby.rst new file mode 100644 index 00000000000..f8872657308 --- /dev/null +++ b/docs/cudf/source/api_docs/pandas.DataFrame.groupby.rst @@ -0,0 +1,6 @@ +pandas.DataFrame.groupby +======================== + +.. currentmodule:: pandas + +.. automethod:: DataFrame.groupby \ No newline at end of file diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst new file mode 100644 index 00000000000..6e0943930a1 --- /dev/null +++ b/docs/cudf/source/api_docs/series.rst @@ -0,0 +1,601 @@ +====== +Series +====== +.. currentmodule:: cudf + +Constructor +----------- +.. autosummary:: + :toctree: api/ + + Series + +Attributes +---------- +**Axes** + +.. autosummary:: + :toctree: api/ + + Series.index + Series.array + Series.values + Series.dtype + Series.shape + Series.nbytes + Series.ndim + Series.size + Series.T + Series.memory_usage + Series.hasnans + Series.empty + Series.dtypes + Series.name + Series.flags + Series.set_flags + +Conversion +---------- +.. autosummary:: + :toctree: api/ + + Series.astype + Series.convert_dtypes + Series.infer_objects + Series.copy + Series.bool + Series.to_numpy + Series.to_period + Series.to_timestamp + Series.to_list + Series.__array__ + +Indexing, iteration +------------------- +.. autosummary:: + :toctree: api/ + + Series.get + Series.at + Series.iat + Series.loc + Series.iloc + Series.__iter__ + Series.items + Series.iteritems + Series.keys + Series.pop + Series.item + Series.xs + +For more information on ``.at``, ``.iat``, ``.loc``, and +``.iloc``, see the :ref:`indexing documentation `. + +Binary operator functions +------------------------- +.. autosummary:: + :toctree: api/ + + Series.add + Series.sub + Series.mul + Series.div + Series.truediv + Series.floordiv + Series.mod + Series.pow + Series.radd + Series.rsub + Series.rmul + Series.rdiv + Series.rtruediv + Series.rfloordiv + Series.rmod + Series.rpow + Series.combine + Series.combine_first + Series.round + Series.lt + Series.gt + Series.le + Series.ge + Series.ne + Series.eq + Series.product + Series.dot + +Function application, GroupBy & window +-------------------------------------- +.. autosummary:: + :toctree: api/ + + Series.apply + Series.agg + Series.aggregate + Series.transform + Series.map + Series.groupby + Series.rolling + Series.expanding + Series.ewm + Series.pipe + +.. _api.series.stats: + +Computations / descriptive stats +-------------------------------- +.. autosummary:: + :toctree: api/ + + Series.abs + Series.all + Series.any + Series.autocorr + Series.between + Series.clip + Series.corr + Series.count + Series.cov + Series.cummax + Series.cummin + Series.cumprod + Series.cumsum + Series.describe + Series.diff + Series.factorize + Series.kurt + Series.mad + Series.max + Series.mean + Series.median + Series.min + Series.mode + Series.nlargest + Series.nsmallest + Series.pct_change + Series.prod + Series.quantile + Series.rank + Series.sem + Series.skew + Series.std + Series.sum + Series.var + Series.kurtosis + Series.unique + Series.nunique + Series.is_unique + Series.is_monotonic + Series.is_monotonic_increasing + Series.is_monotonic_decreasing + Series.value_counts + +Reindexing / selection / label manipulation +------------------------------------------- +.. autosummary:: + :toctree: api/ + + Series.align + Series.drop + Series.droplevel + Series.drop_duplicates + Series.duplicated + Series.equals + Series.first + Series.head + Series.idxmax + Series.idxmin + Series.isin + Series.last + Series.reindex + Series.reindex_like + Series.rename + Series.rename_axis + Series.reset_index + Series.sample + Series.set_axis + Series.take + Series.tail + Series.truncate + Series.where + Series.mask + Series.add_prefix + Series.add_suffix + Series.filter + +Missing data handling +--------------------- +.. autosummary:: + :toctree: api/ + + Series.backfill + Series.bfill + Series.dropna + Series.ffill + Series.fillna + Series.interpolate + Series.isna + Series.isnull + Series.notna + Series.notnull + Series.pad + Series.replace + +Reshaping, sorting +------------------ +.. autosummary:: + :toctree: api/ + + Series.argsort + Series.argmin + Series.argmax + Series.reorder_levels + Series.sort_values + Series.sort_index + Series.swaplevel + Series.unstack + Series.explode + Series.searchsorted + Series.ravel + Series.repeat + Series.squeeze + Series.view + +Combining / comparing / joining / merging +----------------------------------------- +.. autosummary:: + :toctree: api/ + + Series.append + Series.compare + Series.update + +Time Series-related +------------------- +.. autosummary:: + :toctree: api/ + + Series.asfreq + Series.asof + Series.shift + Series.first_valid_index + Series.last_valid_index + Series.resample + Series.tz_convert + Series.tz_localize + Series.at_time + Series.between_time + Series.tshift + Series.slice_shift + +Accessors +--------- + +pandas provides dtype-specific methods under various accessors. +These are separate namespaces within :class:`Series` that only apply +to specific data types. + +=========================== ================================= +Data Type Accessor +=========================== ================================= +Datetime, Timedelta, Period :ref:`dt ` +String :ref:`str ` +Categorical :ref:`cat ` +Sparse :ref:`sparse ` +=========================== ================================= + +.. _api.series.dt: + +Datetimelike properties +~~~~~~~~~~~~~~~~~~~~~~~ + +``Series.dt`` can be used to access the values of the series as +datetimelike and return several properties. +These can be accessed like ``Series.dt.``. + +Datetime properties +^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.dt.date + Series.dt.time + Series.dt.timetz + Series.dt.year + Series.dt.month + Series.dt.day + Series.dt.hour + Series.dt.minute + Series.dt.second + Series.dt.microsecond + Series.dt.nanosecond + Series.dt.week + Series.dt.weekofyear + Series.dt.dayofweek + Series.dt.day_of_week + Series.dt.weekday + Series.dt.dayofyear + Series.dt.day_of_year + Series.dt.quarter + Series.dt.is_month_start + Series.dt.is_month_end + Series.dt.is_quarter_start + Series.dt.is_quarter_end + Series.dt.is_year_start + Series.dt.is_year_end + Series.dt.is_leap_year + Series.dt.daysinmonth + Series.dt.days_in_month + Series.dt.tz + Series.dt.freq + +Datetime methods +^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.dt.to_period + Series.dt.to_pydatetime + Series.dt.tz_localize + Series.dt.tz_convert + Series.dt.normalize + Series.dt.strftime + Series.dt.round + Series.dt.floor + Series.dt.ceil + Series.dt.month_name + Series.dt.day_name + +Period properties +^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.dt.qyear + Series.dt.start_time + Series.dt.end_time + +Timedelta properties +^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.dt.days + Series.dt.seconds + Series.dt.microseconds + Series.dt.nanoseconds + Series.dt.components + +Timedelta methods +^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.dt.to_pytimedelta + Series.dt.total_seconds + + +.. _api.series.str: + +String handling +~~~~~~~~~~~~~~~ + +``Series.str`` can be used to access the values of the series as +strings and apply several methods to it. These can be accessed like +``Series.str.``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.str.capitalize + Series.str.casefold + Series.str.cat + Series.str.center + Series.str.contains + Series.str.count + Series.str.decode + Series.str.encode + Series.str.endswith + Series.str.extract + Series.str.extractall + Series.str.find + Series.str.findall + Series.str.get + Series.str.index + Series.str.join + Series.str.len + Series.str.ljust + Series.str.lower + Series.str.lstrip + Series.str.match + Series.str.normalize + Series.str.pad + Series.str.partition + Series.str.repeat + Series.str.replace + Series.str.rfind + Series.str.rindex + Series.str.rjust + Series.str.rpartition + Series.str.rstrip + Series.str.slice + Series.str.slice_replace + Series.str.split + Series.str.rsplit + Series.str.startswith + Series.str.strip + Series.str.swapcase + Series.str.title + Series.str.translate + Series.str.upper + Series.str.wrap + Series.str.zfill + Series.str.isalnum + Series.str.isalpha + Series.str.isdigit + Series.str.isspace + Series.str.islower + Series.str.isupper + Series.str.istitle + Series.str.isnumeric + Series.str.isdecimal + Series.str.get_dummies + +.. + The following is needed to ensure the generated pages are created with the + correct template (otherwise they would be created in the Series/Index class page) + +.. + .. autosummary:: + :toctree: api/ + :template: autosummary/accessor.rst + + Series.str + Series.cat + Series.dt + Series.sparse + DataFrame.sparse + Index.str + +.. _api.series.cat: + +Categorical accessor +~~~~~~~~~~~~~~~~~~~~ + +Categorical-dtype specific methods and attributes are available under +the ``Series.cat`` accessor. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.cat.categories + Series.cat.ordered + Series.cat.codes + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.cat.rename_categories + Series.cat.reorder_categories + Series.cat.add_categories + Series.cat.remove_categories + Series.cat.remove_unused_categories + Series.cat.set_categories + Series.cat.as_ordered + Series.cat.as_unordered + + +.. _api.series.sparse: + +Sparse accessor +~~~~~~~~~~~~~~~ + +Sparse-dtype specific methods and attributes are provided under the +``Series.sparse`` accessor. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_attribute.rst + + Series.sparse.npoints + Series.sparse.density + Series.sparse.fill_value + Series.sparse.sp_values + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.sparse.from_coo + Series.sparse.to_coo + +.. _api.series.flags: + +Flags +~~~~~ + +Flags refer to attributes of the pandas object. Properties of the dataset (like +the date is was recorded, the URL it was accessed from, etc.) should be stored +in :attr:`Series.attrs`. + +.. autosummary:: + :toctree: api/ + + Flags + +.. _api.series.metadata: + +Metadata +~~~~~~~~ + +:attr:`Series.attrs` is a dictionary for storing global metadata for this Series. + +.. warning:: ``Series.attrs`` is considered experimental and may change without warning. + +.. autosummary:: + :toctree: api/ + + Series.attrs + + +Plotting +-------- +``Series.plot`` is both a callable method and a namespace attribute for +specific plotting methods of the form ``Series.plot.``. + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_callable.rst + + Series.plot + +.. autosummary:: + :toctree: api/ + :template: autosummary/accessor_method.rst + + Series.plot.area + Series.plot.bar + Series.plot.barh + Series.plot.box + Series.plot.density + Series.plot.hist + Series.plot.kde + Series.plot.line + Series.plot.pie + +.. autosummary:: + :toctree: api/ + + Series.hist + +Serialization / IO / conversion +------------------------------- +.. autosummary:: + :toctree: api/ + + Series.to_pickle + Series.to_csv + Series.to_dict + Series.to_excel + Series.to_frame + Series.to_xarray + Series.to_hdf + Series.to_sql + Series.to_json + Series.to_string + Series.to_clipboard + Series.to_latex + Series.to_markdown diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst new file mode 100644 index 00000000000..15b4b43662b --- /dev/null +++ b/docs/cudf/source/basics/basics.rst @@ -0,0 +1,54 @@ +Basics +====== + + +Supported Dtypes +---------------- + +cuDF uses dtypes for Series or individual columns of a DataFrame. cuDF uses NumPy dtypes, NumPy provides support for ``float``, ``int``, ``bool``, +``'timedelta64[s]'``, ``'timedelta64[ms]'``, ``'timedelta64[us]'``, ``'timedelta64[ns]'``, ``'datetime64[s]'``, ``'datetime64[ms]'``, +``'datetime64[us]'``, ``'datetime64[ns]'`` (note that NumPy does not support timezone-aware datetimes). + + +The following table lists all of cudf types. For methods requiring dtype arguments, strings can be specified as indicated. See the respective documentation sections for more on each type. + + ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +| Kind of Data | Data Type | Scalar | String Aliases | ++========================+==================+=====================================================================================+=============================================+ +| Integer | | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_, | ``'int8'``, ``'int16'``, ``'int32'``, | +| | | np.uint32_, np.uint64_ | ``'int64'``, ``'uint8'``, ``'uint16'``, | +| | | | ``'uint32'``, ``'uint64'`` | ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +| Float | | np.float32_, np.float64_ | ``'float32'``, ``'float64'`` | ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +| Strings | | `str `_ | ``'string'``, ``'object'`` | ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +| Datetime | | np.datetime64_ | ``'datetime64[s]'``, ``'datetime64[ms]'``, | +| | | | ``'datetime64[us]'``, ``'datetime64[ns]'`` | ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +| Timedelta | | np.timedelta64_ | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,| +| (duration type) | | | ``'timedelta64[us]'``, ``'timedelta64[ns]'``| ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +| Categorical | CategoricalDtype | (none) | ``'category'`` | ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +| Boolean | | np.bool_ | ``'bool'`` | ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +| Decimal | Decimal64Dtype | (none) | (none) | ++------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + +**Note: All dtypes above are Nullable** + +.. _np.int8: +.. _np.int16: +.. _np.int32: +.. _np.int64: +.. _np.uint8: +.. _np.uint16: +.. _np.uint32: +.. _np.uint64: +.. _np.float32: +.. _np.float64: +.. _np.bool: https://numpy.org/doc/stable/user/basics.types.html +.. _np.datetime64: https://numpy.org/doc/stable/reference/arrays.datetime.html#basic-datetimes +.. _np.timedelta64: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-and-timedelta-arithmetic diff --git a/docs/cudf/source/basics/dask-cudf.md b/docs/cudf/source/basics/dask-cudf.md new file mode 100644 index 00000000000..92ef4eb1c46 --- /dev/null +++ b/docs/cudf/source/basics/dask-cudf.md @@ -0,0 +1,78 @@ +Multi-GPU with Dask-cuDF +======================== + +cuDF is a single-GPU library. For Multi-GPU cuDF solutions we use [Dask](https://dask.org/) and the [dask-cudf package](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf), which is able to scale cuDF across multiple GPUs on a single machine, or multiple GPUs across many machines in a cluster. + +[Dask DataFrame](http://docs.dask.org/en/latest/dataframe.html) was originally designed to scale Pandas, orchestrating many Pandas DataFrames spread across many CPUs into a cohesive parallel DataFrame. Because cuDF currently implements only a subset of Pandas’s API, not all Dask DataFrame operations work with cuDF. + +The following is tested and expected to work: + +What works +---------- + +- Data ingestion + - ``dask_cudf.read_csv`` + - Use standard Dask ingestion with Pandas, then convert to cuDF (For + Parquet and other formats this is often decently fast) +- Linear operations + - Element-wise operations: ``df.x + df.y``, ``df ** 2`` + - Assignment: ``df['z'] = df.x + df.y`` + - Row-wise selections: ``df[df.x > 0]`` + - Loc: ``df.loc['2001-01-01': '2005-02-02']`` + - Date time/string accessors: ``df.timestamp.dt.dayofweek`` + - ... and most similar operations in this category that are already implemented in cuDF +- Reductions + - Like ``sum``, ``mean``, ``max``, ``count``, and so on on ``Series`` objects + - Support for reductions on full dataframes + - ``std`` + - Custom reductions with [dask.dataframe.reduction](http://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.Series.reduction) +- Groupby aggregations + - On single columns: ``df.groupby('x').y.max()`` + - With custom aggregations: + - groupby standard deviation + - grouping on multiple columns + - groupby agg for multiple outputs +- Joins: + - On full unsorted columns: ``left.merge(right, on='id')`` (expensive) + - On sorted indexes: ``left.merge(right, left_index=True, right_index=True)`` (fast) + - On large and small dataframes: ``left.merge(cudf_df, on='id')`` (fast) +- Rolling operations +- Converting to and from other forms + - Dask + Pandas to Dask + cuDF ``df.map_partitions(cudf.from_pandas)`` + - Dask + cuDF to Dask + Pandas ``df.map_partitions(lambda df: df.to_pandas())`` + - cuDF to Dask + cuDF: ``dask.dataframe.from_pandas(df, npartitions=20)`` + - Dask + cuDF to cuDF: ``df.compute()`` + +Additionally all generic Dask operations, like ``compute``, ``persist``, +``visualize`` and so on work regardless. + + +Developing the API +------------------ + +Above we mention the following: + +> and most similar operations in this category that are already implemented in cuDF + +This is because it is difficult to create a comprehensive list of operations in +the cuDF and Pandas libraries. The API is large enough to be difficult to track +effectively. For any operation that operates row-wise like ``fillna`` or +``query`` things will likely, but not certainly work. If operations don't work +it is often due to a slight inconsistency between Pandas and cuDF that is +generally easy to fix. We encourage users to look at the [cuDF issue +tracker](https://github.com/rapidsai/cudf/issues) to see if their issue has +already been reported and, if not, +[raise a new issue](https://github.com/rapidsai/cudf/issues/new). + + +Navigating the API +------------------ + +This project reuses the +[Dask DataFrame](https://docs.dask.org/en/latest/dataframe.html) project, which +was originally designed for Pandas, with the newer library cuDF. Because we use +the same Dask classes for both projects there are often methods that are +implemented for Pandas, but not yet for cuDF. As a result users looking at the +full Dask DataFrame API can be misleading, and often lead to frustration when +operations that are advertised in the Dask API do not work as expected with +cuDF. We apologize for this in advance. diff --git a/docs/cudf/source/basics/groupby.md b/docs/cudf/source/basics/groupby.md new file mode 100644 index 00000000000..8a0e5dddba0 --- /dev/null +++ b/docs/cudf/source/basics/groupby.md @@ -0,0 +1,200 @@ +GroupBy +======= + +cuDF supports a small (but important) subset of +Pandas' [groupby API](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html). + +## Summary of supported operations + +1. Grouping by one or more columns +1. Basic aggregations such as "sum", "mean", etc. +1. Quantile aggregation +1. A "collect" or `list` aggregation for collecting values in a group into lists +1. Automatic exclusion of columns with unsupported dtypes ("nuisance" columns) when aggregating +1. Iterating over the groups of a GroupBy object +1. `GroupBy.groups` API that returns a mapping of group keys to row labels +1. `GroupBy.apply` API for performing arbitrary operations on each group. Note that + this has very limited functionality compared to the equivalent Pandas function. + See the section on [apply](#groupby-apply) for more details. +1. `GroupBy.pipe` similar to [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls). + +## Grouping + +A GroupBy object is created by grouping the values of a `Series` or `DataFrame` +by one or more columns: + +```python +import cudf + +>>> df = cudf.DataFrame({'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]}) +>>> df +>>> gb1 = df.groupby('a') # grouping by a single column +>>> gb2 = df.groupby(['a', 'b']) # grouping by multiple columns +>>> gb3 = df.groupby(cudf.Series(['a', 'a', 'b', 'b', 'b'])) # grouping by an external column +``` + +``` warning:: + cuDF uses `sort=False` by default to achieve better performance, which provides no gaurentee to the group order in outputs. This deviates from Pandas default behavior. + + For example: + + .. code-block:: python + + >>> df = cudf.DataFrame({'a' : [2, 2, 1], 'b' : [42, 21, 11]}) + >>> df.groupby('a').sum() + b + a + 2 63 + 1 11 + >>> df.to_pandas().groupby('a').sum() + b + a + 1 11 + 2 63 + + Setting `sort=True` will produce Pandas-like output, but with some performance penalty: + + .. code-block:: python + + >>> df.groupby('a', sort=True).sum() + b + a + 1 11 + 2 63 + +``` + +### Grouping by index levels + +You can also group by one or more levels of a MultiIndex: + +```python +>>> df = cudf.DataFrame( +... {'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]} +... ).set_index(['a', 'b']) +... +>>> df.groupby(level='a') +``` + +### The `Grouper` object + +A `Grouper` can be used to disambiguate between columns and levels when they have the same name: + +```python +>>> df + b c +b +1 1 1 +1 1 2 +1 2 3 +2 2 4 +2 3 5 +>>> df.groupby('b', level='b') # ValueError: Cannot specify both by and level +>>> df.groupby([cudf.Grouper(key='b'), cudf.Grouper(level='b')]) # OK +``` + +## Aggregation + +Aggregations on groups is supported via the `agg` method: + +```python +>>> df + a b c +0 1 1 1 +1 1 1 2 +2 1 2 3 +3 2 2 4 +4 2 3 5 +>>> df.groupby('a').agg('sum') + b c +a +1 4 6 +2 5 9 +>>> df.groupby('a').agg({'b': ['sum', 'min'], 'c': 'mean'}) + b c + sum min mean +a +1 4 1 2.0 +2 5 2 4.5 +``` + +The following table summarizes the available aggregations and the types that support them: + +| Aggregations\dtypes | Numeric | Datetime | String | Categorical | List | Struct | Interval | Decimal | +| ------------------- | -------- | ------- | -------- | ----------- | ---- | ------ | -------- | ------- | +| count | ✅ | ✅ | ✅ | ✅ | | | | ✅ | +| size | ✅ | ✅ | ✅ | ✅ | | | | ✅ | +| sum | ✅ | ✅ | | | | | | ✅ | +| idxmin | ✅ | ✅ | | | | | | ✅ | +| idxmax | ✅ | ✅ | | | | | | ✅ | +| min | ✅ | ✅ | ✅ | | | | | ✅ | +| max | ✅ | ✅ | ✅ | | | | | ✅ | +| mean | ✅ | ✅ | | | | | | | +| var | ✅ | ✅ | | | | | | | +| std | ✅ | ✅ | | | | | | | +| quantile | ✅ | ✅ | | | | | | | +| median | ✅ | ✅ | | | | | | | +| nunique | ✅ | ✅ | ✅ | ✅ | | | | ✅ | +| nth | ✅ | ✅ | ✅ | | | | | ✅ | +| collect | ✅ | ✅ | ✅ | | ✅ | | | ✅ | +| unique | ✅ | ✅ | ✅ | ✅ | | | | | + +## GroupBy apply + +To apply function on each group, use the `GroupBy.apply()` method: + +```python +>>> df + a b c +0 1 1 1 +1 1 1 2 +2 1 2 3 +3 2 2 4 +4 2 3 5 +>>> df.groupby('a').apply(lambda x: x.max() - x.min()) + a b c +a +0 0 1 2 +1 0 1 1 +``` + +### Limitations + +* `apply` works by applying the provided function to each group sequentially, + and concatenating the results together. **This can be very slow**, especially + for a large number of small groups. For a small number of large groups, it + can give acceptable performance + +* The results may not always match Pandas exactly. For example, cuDF may return + a `DataFrame` containing a single column where Pandas returns a `Series`. + Some post-processing may be required to match Pandas behavior. + +* cuDF does not support some of the exceptional cases that Pandas supports with + `apply`, such as [`describe`](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#flexible-apply). + +## Rolling window calculations + +Use the `GroupBy.rolling()` method to perform rolling window calculations on each group: + +```python +>>> df + a b c +0 1 1 1 +1 1 1 2 +2 1 2 3 +3 2 2 4 +4 2 3 5 +``` + +Rolling window sum on each group with a window size of 2: + +```python +>>> df.groupby('a').rolling(2).sum() + a b c +a +1 0 + 1 2 2 3 + 2 2 3 5 +2 3 + 4 4 5 9 +``` diff --git a/docs/cudf/source/basics/index.rst b/docs/cudf/source/basics/index.rst new file mode 100644 index 00000000000..d4cb7259d17 --- /dev/null +++ b/docs/cudf/source/basics/index.rst @@ -0,0 +1,14 @@ +====== +Basics +====== + + +.. toctree:: + :maxdepth: 2 + + basics + io.rst + groupby.md + dask-cudf.md + internals.md + diff --git a/docs/cudf/source/basics/internals.md b/docs/cudf/source/basics/internals.md new file mode 100644 index 00000000000..d0a2a324c17 --- /dev/null +++ b/docs/cudf/source/basics/internals.md @@ -0,0 +1,194 @@ +cuDF internals +============== + +The cuDF API closely matches that of the [Pandas](https://pandas.pydata.org/) library. +Thus, we have the types `cudf.Series`, `cudf.DataFrame` and `cudf.Index` which look and +feel very much like their Pandas counterparts. + +Under the hood, however, cuDF uses data structures very different from Pandas. In this document, +we describe these internal data structures. + +## Column + +Columns are cuDF's core data structure and they are modeled after +the [Apache Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html). + +A column represents a sequence of values, any number of which may be "null". Columns are +specialized based on the type of data they contain. Thus we have `NumericalColumn`, `StringColumn`, +`DatetimeColumn`, etc., + +A column is composed of the following: + +* A **data type**, specifying the type of each element. +* A **data buffer** that may store the data for the column elements. + Some column types do not have a data buffer, instead storing data in the children columns. +* A **mask buffer** whose bits represent the validity (null or not null) of each element. + Columns whose elements are all "valid" may not have a mask buffer. Mask buffers are padded + to 64 bytes. +* A tuple of **children** columns, which enable the representation complex types such as + columns with non-fixed width elements such as strings or lists. +* A **size** indicating the number of elements in the column. +* An integer **offset**: a column may represent a "slice" of another column, + in which case this offset represents the first element of the slice. The size of + the column then gives the extent of the slice. A column that is not a slice + has an offset of 0. + +For example, the `NumericalColumn` backing a Series with 1000 elements of type 'int32' +and containing nulls is composed of: + +1. A data buffer of size 4000 bytes (sizeof(int32) * 1000) +2. A mask buffer of size 128 bytes (1000/8 padded to a multiple of 64 bytes) +3. No children columns + +As another example, the `StringColumn` backing the Series +`['do', 'you', 'have', 'any', 'cheese?']` is composed of: + +1. No data buffer +2. No mask buffer as there are no nulls in the Series +3. Two children columns: + - A column of 8-bit characters `['d', 'o', 'y', 'o', 'u', h' ... '?']` + - A column of "offsets" to the characters column (in this case, `[0, 2, 5, 9, 12, 19]`) + +## Buffer + +The data and mask buffers of a column represent data in GPU memory (a.k.a *device memory*), +and are object of type `cudf.core.buffer.Buffer`. + +Buffers can be constructed from array-like objects that live either on the host (e.g., numpy arrays) +or the device (e.g., cupy arrays). Arrays must be of `uint8` dtype or viewed as such. + +When constructing a Buffer from a host object such as a numpy array, new device memory is allocated: + +```python +>>> from cudf.core.buffer import Buffer +>>> buf = Buffer(np.array([1, 2, 3], dtype='int64').view("uint8")) +>>> print(buf.ptr) # address of new device memory allocation +140050901762560 +>>> print(buf.size) +24 +>>> print(buf._owner) + +``` + +cuDF uses the [RMM](https://github.com/rapidsai/rmm) library for allocating device memory. +You can read more about device memory allocation with RMM +[here](https://github.com/rapidsai/rmm#devicebuffers). + +When constructing a Buffer from a device object such as a CuPy array, no new device memory is +allocated. Instead, the Buffer points to the existing allocation, keeping a reference to the device +array: + +```python +>>> import cupy as cp +>>> c_ary = cp.asarray([1, 2, 3], dtype='int64') +>>> buf = Buffer(c_ary.view("uint8")) +>>> print(c_ary.data.mem.ptr) +140050901762560 +>>> print(buf.ptr) +140050901762560 +>>> print(buf.size) +24 +>>> print(buf._owner is c_ary) +True +``` + +An uninitialized block of device memory can be allocated with `Buffer.empty`: + +```python +>>> buf = Buffer.empty(10) +>>> print(buf.size) +10 +>>> print(buf._owner) + +``` + +## ColumnAccessor + +cuDF `Series`, `DataFrame` and `Index` are all subclasses of an internal `Frame` class. +The underlying data structure of `Frame` is an ordered, dictionary-like object +known as `ColumnAccessor`, which can be accessed via the `._data` attribute: + +```python +>>> a = cudf.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) +>>> a._data +ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) +``` + +ColumnAccessor is an ordered mapping of column labels to columns. In addition to behaving +like an OrderedDict, it supports things like selecting multiple columns (both by index and label), as well as hierarchical indexing. + +```python +>>> from cudf.core.column_accessor import ColumnAccessor +``` + +The values of a ColumnAccessor are coerced to Columns during construction: + +```python +>>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) +>>> ca['x'] + +>>> ca['y'] + +>>> ca.pop('x') + +>>> ca +ColumnAccessor(OrderedColumnDict([('y', )]), multiindex=False, level_names=(None,)) +``` + +Columns can be inserted at a specified location: + +```python +>>> ca.insert('z', [3, 4, 5], loc=1) +>>> ca +ColumnAccessor(OrderedColumnDict([('x', ), ('z', ), ('y', )]), multiindex=False, level_names=(None,)) +``` + +Selecting columns by index: + +```python +>>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c'], 'z': [4, 5, 6]}) +>>> ca.select_by_index(1) +ColumnAccessor(OrderedColumnDict([('y', )]), multiindex=False, level_names=(None,)) +>>> ca.select_by_index([0, 1]) +ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) +>>> ca.select_by_index(slice(1, 3)) +ColumnAccessor(OrderedColumnDict([('y', ), ('z', )]), multiindex=False, level_names=(None,)) +``` + +Selecting columns by label: + +```python +>>> ca.select_by_label(['y', 'z']) +ColumnAccessor(OrderedColumnDict([('y', ), ('z', )]), multiindex=False, level_names=(None,)) +>>> ca.select_by_label(slice('x', 'y')) +ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) +``` + +A ColumnAccessor with tuple keys (and constructed with `multiindex=True`) +can be hierarchically indexed: + +```python +>>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], 'b': [4, 5, 6]}, multiindex=True) +>>> ca.select_by_label('a') +ColumnAccessor(OrderedColumnDict([('b', ), ('c', )]), multiindex=False, level_names=(None,)) +>>> ca.select_by_label(('a', 'b')) +ColumnAccessor(OrderedColumnDict([(('a', 'b'), )]), multiindex=False, level_names=(None,)) +``` + +"Wildcard" indexing is also allowed: + +```python +>>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], ('d', 'b'): [4, 5, 6]}, multiindex=True) +>>> ca.select_by_label((slice(None), 'b')) +ColumnAccessor(OrderedColumnDict([(('a', 'b'), ), (('d', 'b'), )]), multiindex=True, level_names=(None, None)) +``` + +Finally, ColumnAccessors can convert to Pandas `Index` or `MultiIndex` objects: + +```python +>>> ca.to_pandas_index() +MultiIndex([('a', 'b'), + ('a', 'c'), + ('d', 'b')], + ) +``` diff --git a/docs/cudf/source/basics/io-supported-types.rst b/docs/cudf/source/basics/io-supported-types.rst new file mode 100644 index 00000000000..e09e155ef92 --- /dev/null +++ b/docs/cudf/source/basics/io-supported-types.rst @@ -0,0 +1,62 @@ +I/O Supported dtypes +==================== + +The following table lists are compatible cudf types for each supported IO format. + +.. rst-class:: io-supported-types-table +.. table:: + :widths: 15 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+-----------------+--------+--------+--------+--------+ + | | CSV | Parquet | JSON | ORC | AVRO | HDF | DLPack | Feather | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | Data Type | Writer | Reader | Writer | Reader | Writer | Reader | Writer | Reader | Reader | Writer | Reader | Writer | Reader | Writer | Reader | + +=======================+========+========+========+========+========+========+========+========+========+========+========+========+========+========+========+ + | int8 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | int16 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | int32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | int64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | uint8 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | uint16 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | uint32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | uint64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | float32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | float64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | bool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | str | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | category | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | list | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | timedelta64[s] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | timedelta64[ms] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | timedelta64[us] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | timedelta64[ns] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | datetime64[s] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | datetime64[ms] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | datetime64[us] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | datetime64[ns] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | struct | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + | decimal | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | + +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ diff --git a/docs/cudf/source/basics/io.rst b/docs/cudf/source/basics/io.rst new file mode 100644 index 00000000000..5186473ae10 --- /dev/null +++ b/docs/cudf/source/basics/io.rst @@ -0,0 +1,11 @@ +~~~~~~~~~~~~~~ +Input / Output +~~~~~~~~~~~~~~ + +This page contains Input / Output related APIs in cuDF. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + io-supported-types.rst \ No newline at end of file diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 18ffbacca1f..eac1d36d7aa 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -52,7 +52,7 @@ copybutton_prompt_text = ">>> " - +autosummary_generate = True ipython_mplbackend = "str" # Add any paths that contain templates here, relative to this directory. @@ -77,9 +77,9 @@ # built documents. # # The short X.Y version. -version = '0.20' +version = "0.20" # The full version, including alpha/beta/rc tags. -release = '0.20.0' +release = "0.20.0" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -99,27 +99,33 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False +html_theme_options = { + "external_links": [], + "github_url": "https://github.com/rapidsai/cudf", + "twitter_url": "https://twitter.com/rapidsai", + "show_toc_level": 1, + "navbar_align": "right", +} -# -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "sphinx_rtd_theme" - +html_theme = "pydata_sphinx_theme" +html_logo = "_static/RAPIDS-logo-purple.png" # on_rtd is whether we are on readthedocs.org on_rtd = os.environ.get("READTHEDOCS", None) == "True" -if not on_rtd: - # only import and set the theme if we're building docs locally - # otherwise, readthedocs.org uses their theme by default, - # so no need to specify it - import sphinx_rtd_theme - - html_theme = "sphinx_rtd_theme" - html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - +# if not on_rtd: +# # only import and set the theme if we're building docs locally +# # otherwise, readthedocs.org uses their theme by default, +# # so no need to specify it +# import sphinx_rtd_theme +# +# html_theme = "pydata_sphinx_theme" +# # html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +# # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/cudf/source/cudf.DataFrame.drop.rst b/docs/cudf/source/cudf.DataFrame.drop.rst new file mode 100644 index 00000000000..6d46566674d --- /dev/null +++ b/docs/cudf/source/cudf.DataFrame.drop.rst @@ -0,0 +1,6 @@ +cudf.DataFrame.drop +=================== + +.. currentmodule:: cudf + +.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/cudf.DataFrame.where.rst b/docs/cudf/source/cudf.DataFrame.where.rst new file mode 100644 index 00000000000..c2035bf11b0 --- /dev/null +++ b/docs/cudf/source/cudf.DataFrame.where.rst @@ -0,0 +1,6 @@ +cudf.DataFrame.where +==================== + +.. currentmodule:: cudf + +.. automethod:: DataFrame.where \ No newline at end of file diff --git a/docs/cudf/source/generated/cudf.DataFrame.drop.rst b/docs/cudf/source/generated/cudf.DataFrame.drop.rst new file mode 100644 index 00000000000..6d46566674d --- /dev/null +++ b/docs/cudf/source/generated/cudf.DataFrame.drop.rst @@ -0,0 +1,6 @@ +cudf.DataFrame.drop +=================== + +.. currentmodule:: cudf + +.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/generated/cudf.DataFrame.where.rst b/docs/cudf/source/generated/cudf.DataFrame.where.rst new file mode 100644 index 00000000000..c2035bf11b0 --- /dev/null +++ b/docs/cudf/source/generated/cudf.DataFrame.where.rst @@ -0,0 +1,6 @@ +cudf.DataFrame.where +==================== + +.. currentmodule:: cudf + +.. automethod:: DataFrame.where \ No newline at end of file diff --git a/docs/cudf/source/generated/pandas.DataFrame.drop.rst b/docs/cudf/source/generated/pandas.DataFrame.drop.rst new file mode 100644 index 00000000000..9ee305369db --- /dev/null +++ b/docs/cudf/source/generated/pandas.DataFrame.drop.rst @@ -0,0 +1,6 @@ +pandas.DataFrame.drop +===================== + +.. currentmodule:: pandas + +.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/generated/pandas.DataFrame.groupby.rst b/docs/cudf/source/generated/pandas.DataFrame.groupby.rst new file mode 100644 index 00000000000..f8872657308 --- /dev/null +++ b/docs/cudf/source/generated/pandas.DataFrame.groupby.rst @@ -0,0 +1,6 @@ +pandas.DataFrame.groupby +======================== + +.. currentmodule:: pandas + +.. automethod:: DataFrame.groupby \ No newline at end of file diff --git a/docs/cudf/source/generated/pandas.Series.array.rst b/docs/cudf/source/generated/pandas.Series.array.rst new file mode 100644 index 00000000000..e0954c01d1a --- /dev/null +++ b/docs/cudf/source/generated/pandas.Series.array.rst @@ -0,0 +1,6 @@ +pandas.Series.array +=================== + +.. currentmodule:: pandas + +.. autoproperty:: Series.array \ No newline at end of file diff --git a/docs/cudf/source/index.rst b/docs/cudf/source/index.rst index bba0ed824b1..950694f69a7 100644 --- a/docs/cudf/source/index.rst +++ b/docs/cudf/source/index.rst @@ -1,20 +1,16 @@ Welcome to cuDF's documentation! ================================= + + .. toctree:: :maxdepth: 2 :caption: Contents: - api.rst - 10min.ipynb - basics.rst - io.rst - groupby.md - dask-cudf.md - 10min-cudf-cupy.ipynb - guide-to-udfs.ipynb - internals.md - Working-with-missing-data.ipynb + user_guide/index + basics/index + api_docs/index + Indices and tables ================== diff --git a/docs/cudf/source/pandas.DataFrame.drop.rst b/docs/cudf/source/pandas.DataFrame.drop.rst new file mode 100644 index 00000000000..9ee305369db --- /dev/null +++ b/docs/cudf/source/pandas.DataFrame.drop.rst @@ -0,0 +1,6 @@ +pandas.DataFrame.drop +===================== + +.. currentmodule:: pandas + +.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/pandas.DataFrame.groupby.rst b/docs/cudf/source/pandas.DataFrame.groupby.rst new file mode 100644 index 00000000000..f8872657308 --- /dev/null +++ b/docs/cudf/source/pandas.DataFrame.groupby.rst @@ -0,0 +1,6 @@ +pandas.DataFrame.groupby +======================== + +.. currentmodule:: pandas + +.. automethod:: DataFrame.groupby \ No newline at end of file diff --git a/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb b/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb new file mode 100644 index 00000000000..0985291f3c2 --- /dev/null +++ b/docs/cudf/source/user_guide/10min-cudf-cupy.ipynb @@ -0,0 +1,1334 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 10 Minutes to cuDF and CuPy\n", + "\n", + "This notebook provides introductory examples of how you can use cuDF and CuPy together to take advantage of CuPy array functionality (such as advanced linear algebra operations)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import timeit\n", + "\n", + "import cupy as cp\n", + "import cudf" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Converting a cuDF DataFrame to a CuPy Array\n", + "\n", + "If we want to convert a cuDF DataFrame to a CuPy ndarray, There are multiple ways to do it:\n", + "\n", + "1. We can use the [dlpack](https://github.com/dmlc/dlpack) interface.\n", + "\n", + "2. We can also use `DataFrame.values`.\n", + "\n", + "3. We can also convert via the [CUDA array interface](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) by using cuDF's `as_gpu_matrix` and CuPy's `asarray` functionality." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "44.1 µs ± 689 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", + "209 µs ± 2.77 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", + "208 µs ± 3.14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" + ] + } + ], + "source": [ + "nelem = 10000\n", + "df = cudf.DataFrame({'a':range(nelem),\n", + " 'b':range(500, nelem + 500),\n", + " 'c':range(1000, nelem + 1000)}\n", + " )\n", + "\n", + "%timeit arr_cupy = cp.fromDlpack(df.to_dlpack())\n", + "%timeit arr_cupy = df.values\n", + "%timeit arr_cupy = cp.asarray(df.as_gpu_matrix())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 500, 1000],\n", + " [ 1, 501, 1001],\n", + " [ 2, 502, 1002],\n", + " ...,\n", + " [ 9997, 10497, 10997],\n", + " [ 9998, 10498, 10998],\n", + " [ 9999, 10499, 10999]])" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arr_cupy = cp.fromDlpack(df.to_dlpack())\n", + "arr_cupy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Converting a cuDF Series to a CuPy Array" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are also multiple ways to convert a cuDF Series to a CuPy array:\n", + "\n", + "1. We can pass the Series to `cupy.asarray` as cuDF Series exposes [`__cuda_array_interface__`](https://docs-cupy.chainer.org/en/stable/reference/interoperability.html).\n", + "2. We can leverage the dlpack interface `to_dlpack()`. \n", + "3. We can also use `Series.values` \n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "22.1 µs ± 518 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", + "58.3 µs ± 647 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", + "80.2 µs ± 647 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" + ] + } + ], + "source": [ + "col = 'a'\n", + "\n", + "%timeit cola_cupy = cp.asarray(df[col])\n", + "%timeit cola_cupy = cp.fromDlpack(df[col].to_dlpack())\n", + "%timeit cola_cupy = df[col].values" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, ..., 9997, 9998, 9999])" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cola_cupy = cp.asarray(df[col])\n", + "cola_cupy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From here, we can proceed with normal CuPy workflows, such as reshaping the array, getting the diagonal, or calculating the norm." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 1, 2, ..., 197, 198, 199],\n", + " [ 200, 201, 202, ..., 397, 398, 399],\n", + " [ 400, 401, 402, ..., 597, 598, 599],\n", + " ...,\n", + " [9400, 9401, 9402, ..., 9597, 9598, 9599],\n", + " [9600, 9601, 9602, ..., 9797, 9798, 9799],\n", + " [9800, 9801, 9802, ..., 9997, 9998, 9999]])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reshaped_arr = cola_cupy.reshape(50, 200)\n", + "reshaped_arr" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 201, 402, 603, 804, 1005, 1206, 1407, 1608, 1809, 2010,\n", + " 2211, 2412, 2613, 2814, 3015, 3216, 3417, 3618, 3819, 4020, 4221,\n", + " 4422, 4623, 4824, 5025, 5226, 5427, 5628, 5829, 6030, 6231, 6432,\n", + " 6633, 6834, 7035, 7236, 7437, 7638, 7839, 8040, 8241, 8442, 8643,\n", + " 8844, 9045, 9246, 9447, 9648, 9849])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reshaped_arr.diagonal()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(577306.967739)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cp.linalg.norm(reshaped_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Converting a CuPy Array to a cuDF DataFrame\n", + "\n", + "We can also convert a CuPy ndarray to a cuDF DataFrame. Like before, there are multiple ways to do it:\n", + "\n", + "1. **Easiest;** We can directly use the `DataFrame` constructor.\n", + "\n", + "2. We can use CUDA array interface with the `DataFrame` constructor.\n", + "\n", + "3. We can also use the [dlpack](https://github.com/dmlc/dlpack) interface.\n", + "\n", + "For the latter two cases, we'll need to make sure that our CuPy array is Fortran contiguous in memory (if it's not already). We can either transpose the array or simply coerce it to be Fortran contiguous beforehand." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13.1 ms ± 193 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%timeit reshaped_df = cudf.DataFrame(reshaped_arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...190191192193194195196197198199
00123456789...190191192193194195196197198199
1200201202203204205206207208209...390391392393394395396397398399
2400401402403404405406407408409...590591592593594595596597598599
3600601602603604605606607608609...790791792793794795796797798799
4800801802803804805806807808809...990991992993994995996997998999
\n", + "

5 rows × 200 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \\\n", + "0 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \n", + "1 200 201 202 203 204 205 206 207 208 209 ... 390 391 392 393 \n", + "2 400 401 402 403 404 405 406 407 408 409 ... 590 591 592 593 \n", + "3 600 601 602 603 604 605 606 607 608 609 ... 790 791 792 793 \n", + "4 800 801 802 803 804 805 806 807 808 809 ... 990 991 992 993 \n", + "\n", + " 194 195 196 197 198 199 \n", + "0 194 195 196 197 198 199 \n", + "1 394 395 396 397 398 399 \n", + "2 594 595 596 597 598 599 \n", + "3 794 795 796 797 798 799 \n", + "4 994 995 996 997 998 999 \n", + "\n", + "[5 rows x 200 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reshaped_df = cudf.DataFrame(reshaped_arr)\n", + "reshaped_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can check whether our array is Fortran contiguous by using cupy.isfortran or looking at the [flags](https://docs-cupy.chainer.org/en/stable/reference/generated/cupy.ndarray.html#cupy.ndarray.flags) of the array." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cp.isfortran(reshaped_arr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this case, we'll need to convert it before going to a cuDF DataFrame. In the next two cells, we create the DataFrame by leveraging dlpack and the CUDA array interface, respectively." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4.9 ms ± 26.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "\n", + "fortran_arr = cp.asfortranarray(reshaped_arr)\n", + "reshaped_df = cudf.DataFrame(fortran_arr)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "5.1 ms ± 23.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" + ] + } + ], + "source": [ + "%%timeit\n", + "\n", + "fortran_arr = cp.asfortranarray(reshaped_arr)\n", + "reshaped_df = cudf.from_dlpack(fortran_arr.toDlpack())" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...190191192193194195196197198199
00123456789...190191192193194195196197198199
1200201202203204205206207208209...390391392393394395396397398399
2400401402403404405406407408409...590591592593594595596597598599
3600601602603604605606607608609...790791792793794795796797798799
4800801802803804805806807808809...990991992993994995996997998999
\n", + "

5 rows × 200 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \\\n", + "0 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \n", + "1 200 201 202 203 204 205 206 207 208 209 ... 390 391 392 393 \n", + "2 400 401 402 403 404 405 406 407 408 409 ... 590 591 592 593 \n", + "3 600 601 602 603 604 605 606 607 608 609 ... 790 791 792 793 \n", + "4 800 801 802 803 804 805 806 807 808 809 ... 990 991 992 993 \n", + "\n", + " 194 195 196 197 198 199 \n", + "0 194 195 196 197 198 199 \n", + "1 394 395 396 397 398 399 \n", + "2 594 595 596 597 598 599 \n", + "3 794 795 796 797 798 799 \n", + "4 994 995 996 997 998 999 \n", + "\n", + "[5 rows x 200 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fortran_arr = cp.asfortranarray(reshaped_arr)\n", + "reshaped_df = cudf.DataFrame(fortran_arr)\n", + "reshaped_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Converting a CuPy Array to a cuDF Series\n", + "\n", + "To convert an array to a Series, we can directly pass the array to the `Series` constructor." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 201\n", + "2 402\n", + "3 603\n", + "4 804\n", + "dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf.Series(reshaped_arr.diagonal()).head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Interweaving CuDF and CuPy for Smooth PyData Workflows\n", + "\n", + "RAPIDS libraries and the entire GPU PyData ecosystem are developing quickly, but sometimes a one library may not have the functionality you need. One example of this might be taking the row-wise sum (or mean) of a Pandas DataFrame. cuDF's support for row-wise operations isn't mature, so you'd need to either transpose the DataFrame or write a UDF and explicitly calculate the sum across each row. Transposing could lead to hundreds of thousands of columns (which cuDF wouldn't perform well with) depending on your data's shape, and writing a UDF can be time intensive.\n", + "\n", + "By leveraging the interoperability of the GPU PyData ecosystem, this operation becomes very easy. Let's take the row-wise sum of our previously reshaped cuDF DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...190191192193194195196197198199
00123456789...190191192193194195196197198199
1200201202203204205206207208209...390391392393394395396397398399
2400401402403404405406407408409...590591592593594595596597598599
3600601602603604605606607608609...790791792793794795796797798799
4800801802803804805806807808809...990991992993994995996997998999
\n", + "

5 rows × 200 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \\\n", + "0 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \n", + "1 200 201 202 203 204 205 206 207 208 209 ... 390 391 392 393 \n", + "2 400 401 402 403 404 405 406 407 408 409 ... 590 591 592 593 \n", + "3 600 601 602 603 604 605 606 607 608 609 ... 790 791 792 793 \n", + "4 800 801 802 803 804 805 806 807 808 809 ... 990 991 992 993 \n", + "\n", + " 194 195 196 197 198 199 \n", + "0 194 195 196 197 198 199 \n", + "1 394 395 396 397 398 399 \n", + "2 594 595 596 597 598 599 \n", + "3 794 795 796 797 798 799 \n", + "4 994 995 996 997 998 999 \n", + "\n", + "[5 rows x 200 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reshaped_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can just transform it into a CuPy array and use the `axis` argument of `sum`." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 19900, 59900, 99900, 139900, 179900, 219900, 259900,\n", + " 299900, 339900, 379900, 419900, 459900, 499900, 539900,\n", + " 579900, 619900, 659900, 699900, 739900, 779900, 819900,\n", + " 859900, 899900, 939900, 979900, 1019900, 1059900, 1099900,\n", + " 1139900, 1179900, 1219900, 1259900, 1299900, 1339900, 1379900,\n", + " 1419900, 1459900, 1499900, 1539900, 1579900, 1619900, 1659900,\n", + " 1699900, 1739900, 1779900, 1819900, 1859900, 1899900, 1939900,\n", + " 1979900])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_arr = cp.fromDlpack(reshaped_df.to_dlpack())\n", + "new_arr.sum(axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With just that single line, we're able to seamlessly move between data structures in this ecosystem, giving us enormous flexibility without sacrificing speed." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Converting a cuDF DataFrame to a CuPy Sparse Matrix\n", + "\n", + "We can also convert a DataFrame or Series to a CuPy sparse matrix. We might want to do this if downstream processes expect CuPy sparse matrices as an input.\n", + "\n", + "The sparse matrix data structure is defined by three dense arrays. We'll define a small helper function for cleanliness." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def cudf_to_cupy_sparse_matrix(data, sparseformat='column'):\n", + " \"\"\"Converts a cuDF object to a CuPy Sparse Column matrix.\n", + " \"\"\"\n", + " if sparseformat not in ('row', 'column',):\n", + " raise ValueError(\"Let's focus on column and row formats for now.\")\n", + " \n", + " _sparse_constructor = cp.sparse.csc_matrix\n", + " if sparseformat == 'row':\n", + " _sparse_constructor = cp.sparse.csr_matrix\n", + "\n", + " return _sparse_constructor(cp.fromDlpack(data.to_dlpack()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can define a sparsely populated DataFrame to illustrate this conversion to either sparse matrix format." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "df = cudf.DataFrame()\n", + "nelem = 10000\n", + "nonzero = 1000\n", + "for i in range(20):\n", + " arr = cp.random.normal(5, 5, nelem)\n", + " arr[cp.random.choice(arr.shape[0], nelem-nonzero, replace=False)] = 0\n", + " df['a' + str(i)] = arr" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
a0a1a2a3a4a5a6a7a8a9a10a11a12a13a14a15a16a17a18a19
00.00.00.0000000.00.00.00.00.00.00.00.0000000.000000.016.8229590.00.0000000.00.00.00.000000
10.00.00.0000000.00.00.00.00.00.00.00.0000000.000000.00.0000000.00.0000000.00.00.00.000000
20.00.06.6189720.00.00.00.00.00.00.00.0000002.256780.00.0000000.00.0000000.00.00.00.000000
30.00.00.0000000.00.00.00.00.00.00.00.0000000.000000.00.0000000.02.7158020.00.00.00.000000
40.00.00.0000000.00.00.00.00.00.00.04.2965680.000000.00.0000000.00.0000000.00.00.04.865495
\n", + "
" + ], + "text/plain": [ + " a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 \\\n", + "0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.00000 \n", + "1 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.00000 \n", + "2 0.0 0.0 6.618972 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 2.25678 \n", + "3 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.00000 \n", + "4 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.296568 0.00000 \n", + "\n", + " a12 a13 a14 a15 a16 a17 a18 a19 \n", + "0 0.0 16.822959 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", + "1 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", + "2 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", + "3 0.0 0.000000 0.0 2.715802 0.0 0.0 0.0 0.000000 \n", + "4 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 4.865495 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sparse_data = cudf_to_cupy_sparse_matrix(df)\n", + "sparse_data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From here, we could continue our workflow with a CuPy sparse matrix.\n", + "\n", + "For a full list of the functionality built into these libraries, we encourage you to check out the API docs for [cuDF](https://docs.rapids.ai/api/cudf/nightly/) and [CuPy](https://docs-cupy.chainer.org/en/stable/index.html)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb new file mode 100644 index 00000000000..d31a3b426d7 --- /dev/null +++ b/docs/cudf/source/user_guide/10min.ipynb @@ -0,0 +1,6487 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "10 Minutes to cuDF and Dask-cuDF\n", + "=======================\n", + "\n", + "Modeled after 10 Minutes to Pandas, this is a short introduction to cuDF and Dask-cuDF, geared mainly for new users.\n", + "\n", + "### What are these Libraries?\n", + "\n", + "[cuDF](https://github.com/rapidsai/cudf) is a Python GPU DataFrame library (built on the Apache Arrow columnar memory format) for loading, joining, aggregating, filtering, and otherwise manipulating tabular data using a DataFrame style API.\n", + "\n", + "[Dask](https://dask.org/) is a flexible library for parallel computing in Python that makes scaling out your workflow smooth and simple. On the CPU, Dask uses Pandas to execute operations in parallel on DataFrame partitions.\n", + "\n", + "[Dask-cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed by cuDF GPU DataFrames as opposed to Pandas DataFrames. For instance, when you call dask_cudf.read_csv(...), your cluster’s GPUs do the work of parsing the CSV file(s) with underlying cudf.read_csv().\n", + "\n", + "\n", + "### When to use cuDF and Dask-cuDF\n", + "\n", + "If your workflow is fast enough on a single GPU or your data comfortably fits in memory on a single GPU, you would want to use cuDF. If you want to distribute your workflow across multiple GPUs, have more data than you can fit in memory on a single GPU, or want to analyze data spread across many files at once, you would want to use Dask-cuDF." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import cupy as cp\n", + "import pandas as pd\n", + "import cudf\n", + "import dask_cudf\n", + "\n", + "cp.random.seed(12)\n", + "\n", + "#### Portions of this were borrowed and adapted from the\n", + "#### cuDF cheatsheet, existing cuDF documentation,\n", + "#### and 10 Minutes to Pandas." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Object Creation\n", + "---------------" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creating a `cudf.Series` and `dask_cudf.Series`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 4\n", + "dtype: int64" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = cudf.Series([1,2,3,None,4])\n", + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 4\n", + "dtype: int64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = dask_cudf.from_cudf(s, npartitions=2) \n", + "ds.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creating a `cudf.DataFrame` and a `dask_cudf.DataFrame` by specifying values for each column." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00190
11181
22172
33163
44154
55145
66136
77127
88118
99109
1010910
1111811
1212712
1313613
1414514
1515415
1616316
1717217
1818118
1919019
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 19 0\n", + "1 1 18 1\n", + "2 2 17 2\n", + "3 3 16 3\n", + "4 4 15 4\n", + "5 5 14 5\n", + "6 6 13 6\n", + "7 7 12 7\n", + "8 8 11 8\n", + "9 9 10 9\n", + "10 10 9 10\n", + "11 11 8 11\n", + "12 12 7 12\n", + "13 13 6 13\n", + "14 14 5 14\n", + "15 15 4 15\n", + "16 16 3 16\n", + "17 17 2 17\n", + "18 18 1 18\n", + "19 19 0 19" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = cudf.DataFrame({'a': list(range(20)),\n", + " 'b': list(reversed(range(20))),\n", + " 'c': list(range(20))\n", + " })\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00190
11181
22172
33163
44154
55145
66136
77127
88118
99109
1010910
1111811
1212712
1313613
1414514
1515415
1616316
1717217
1818118
1919019
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 19 0\n", + "1 1 18 1\n", + "2 2 17 2\n", + "3 3 16 3\n", + "4 4 15 4\n", + "5 5 14 5\n", + "6 6 13 6\n", + "7 7 12 7\n", + "8 8 11 8\n", + "9 9 10 9\n", + "10 10 9 10\n", + "11 11 8 11\n", + "12 12 7 12\n", + "13 13 6 13\n", + "14 14 5 14\n", + "15 15 4 15\n", + "16 16 3 16\n", + "17 17 2 17\n", + "18 18 1 18\n", + "19 19 0 19" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf = dask_cudf.from_cudf(df, npartitions=2) \n", + "ddf.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creating a `cudf.DataFrame` from a pandas `Dataframe` and a `dask_cudf.Dataframe` from a `cudf.Dataframe`.\n", + "\n", + "*Note that best practice for using Dask-cuDF is to read data directly into a `dask_cudf.DataFrame` with something like `read_csv` (discussed below).*" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
000.1
110.2
22null
330.3
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 0 0.1\n", + "1 1 0.2\n", + "2 2 null\n", + "3 3 0.3" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pdf = pd.DataFrame({'a': [0, 1, 2, 3],'b': [0.1, 0.2, None, 0.3]})\n", + "gdf = cudf.DataFrame.from_pandas(pdf)\n", + "gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
000.1
110.2
22null
330.3
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 0 0.1\n", + "1 1 0.2\n", + "2 2 null\n", + "3 3 0.3" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dask_gdf = dask_cudf.from_cudf(gdf, npartitions=2)\n", + "dask_gdf.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Viewing Data\n", + "-------------" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Viewing the top rows of a GPU dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00190
11181
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 19 0\n", + "1 1 18 1" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00190
11181
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 19 0\n", + "1 1 18 1" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.head(2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Sorting by values." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
1919019
1818118
1717217
1616316
1515415
1414514
1313613
1212712
1111811
1010910
99109
88118
77127
66136
55145
44154
33163
22172
11181
00190
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "19 19 0 19\n", + "18 18 1 18\n", + "17 17 2 17\n", + "16 16 3 16\n", + "15 15 4 15\n", + "14 14 5 14\n", + "13 13 6 13\n", + "12 12 7 12\n", + "11 11 8 11\n", + "10 10 9 10\n", + "9 9 10 9\n", + "8 8 11 8\n", + "7 7 12 7\n", + "6 6 13 6\n", + "5 5 14 5\n", + "4 4 15 4\n", + "3 3 16 3\n", + "2 2 17 2\n", + "1 1 18 1\n", + "0 0 19 0" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values(by='b')" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
1919019
1818118
1717217
1616316
1515415
1414514
1313613
1212712
1111811
1010910
99109
88118
77127
66136
55145
44154
33163
22172
11181
00190
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "19 19 0 19\n", + "18 18 1 18\n", + "17 17 2 17\n", + "16 16 3 16\n", + "15 15 4 15\n", + "14 14 5 14\n", + "13 13 6 13\n", + "12 12 7 12\n", + "11 11 8 11\n", + "10 10 9 10\n", + "9 9 10 9\n", + "8 8 11 8\n", + "7 7 12 7\n", + "6 6 13 6\n", + "5 5 14 5\n", + "4 4 15 4\n", + "3 3 16 3\n", + "2 2 17 2\n", + "1 1 18 1\n", + "0 0 19 0" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.sort_values(by='b').compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Selection\n", + "------------\n", + "\n", + "## Getting" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Selecting a single column, which initially yields a `cudf.Series` or `dask_cudf.Series`. Calling `compute` results in a `cudf.Series` (equivalent to `df.a`)." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 2\n", + "3 3\n", + "4 4\n", + "5 5\n", + "6 6\n", + "7 7\n", + "8 8\n", + "9 9\n", + "10 10\n", + "11 11\n", + "12 12\n", + "13 13\n", + "14 14\n", + "15 15\n", + "16 16\n", + "17 17\n", + "18 18\n", + "19 19\n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['a']" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 2\n", + "3 3\n", + "4 4\n", + "5 5\n", + "6 6\n", + "7 7\n", + "8 8\n", + "9 9\n", + "10 10\n", + "11 11\n", + "12 12\n", + "13 13\n", + "14 14\n", + "15 15\n", + "16 16\n", + "17 17\n", + "18 18\n", + "19 19\n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf['a'].compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Selection by Label" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Selecting rows from index 2 to index 5 from columns 'a' and 'b'." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
2217
3316
4415
5514
\n", + "
" + ], + "text/plain": [ + " a b\n", + "2 2 17\n", + "3 3 16\n", + "4 4 15\n", + "5 5 14" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.loc[2:5, ['a', 'b']]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
2217
3316
4415
5514
\n", + "
" + ], + "text/plain": [ + " a b\n", + "2 2 17\n", + "3 3 16\n", + "4 4 15\n", + "5 5 14" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.loc[2:5, ['a', 'b']].compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Selection by Position" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Selecting via integers and integer slices, like numpy/pandas. Note that this functionality is not available for Dask-cuDF DataFrames." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "a 0\n", + "b 19\n", + "c 0\n", + "Name: 0, dtype: int64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
0019
1118
2217
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 0 19\n", + "1 1 18\n", + "2 2 17" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[0:3, 0:2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also select elements of a `DataFrame` or `Series` with direct index access." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
33163
44154
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "3 3 16 3\n", + "4 4 15 4" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[3:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3 null\n", + "4 4\n", + "dtype: int64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s[3:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Boolean Indexing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Selecting rows in a `DataFrame` or `Series` by direct Boolean indexing." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00190
11181
22172
33163
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 19 0\n", + "1 1 18 1\n", + "2 2 17 2\n", + "3 3 16 3" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.b > 15]" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00190
11181
22172
33163
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 19 0\n", + "1 1 18 1\n", + "2 2 17 2\n", + "3 3 16 3" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf[ddf.b > 15].compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Selecting values from a `DataFrame` where a Boolean condition is met, via the `query` API." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
1616316
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "16 16 3 16" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.query(\"b == 3\")" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
1616316
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "16 16 3 16" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.query(\"b == 3\").compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also pass local variables to Dask-cuDF queries, via the `local_dict` keyword. With standard cuDF, you may either use the `local_dict` keyword or directly pass the variable via the `@` keyword. Supported logical operators include `>`, `<`, `>=`, `<=`, `==`, and `!=`." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
1616316
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "16 16 3 16" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf_comparator = 3\n", + "df.query(\"b == @cudf_comparator\")" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
1616316
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "16 16 3 16" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dask_cudf_comparator = 3\n", + "ddf.query(\"b == @val\", local_dict={'val':dask_cudf_comparator}).compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the `isin` method for filtering." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00190
55145
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 19 0\n", + "5 5 14 5" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.a.isin([0, 5])]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## MultiIndex" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "cuDF supports hierarchical indexing of DataFrames using MultiIndex. Grouping hierarchically (see `Grouping` below) automatically produces a DataFrame with a MultiIndex." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MultiIndex(levels=[0 a\n", + "1 b\n", + "dtype: object, 0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "dtype: int64],\n", + "codes= 0 1\n", + "0 0 0\n", + "1 0 1\n", + "2 1 2\n", + "3 1 3)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "arrays = [['a', 'a', 'b', 'b'], [1, 2, 3, 4]]\n", + "tuples = list(zip(*arrays))\n", + "idx = cudf.MultiIndex.from_tuples(tuples)\n", + "idx" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This index can back either axis of a DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
firstsecond
a10.0826540.967955
20.3994170.441425
b30.7842970.793582
40.0703030.271711
\n", + "
" + ], + "text/plain": [ + " first second\n", + "a 1 0.082654 0.967955\n", + " 2 0.399417 0.441425\n", + "b 3 0.784297 0.793582\n", + " 4 0.070303 0.271711" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf1 = cudf.DataFrame({'first': cp.random.rand(4), 'second': cp.random.rand(4)})\n", + "gdf1.index = idx\n", + "gdf1" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
1234
first0.3433820.0037000.200430.581614
second0.9078120.1015120.241790.224180
\n", + "
" + ], + "text/plain": [ + " a b \n", + " 1 2 3 4\n", + "first 0.343382 0.003700 0.20043 0.581614\n", + "second 0.907812 0.101512 0.24179 0.224180" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf2 = cudf.DataFrame({'first': cp.random.rand(4), 'second': cp.random.rand(4)}).T\n", + "gdf2.columns = idx\n", + "gdf2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Accessing values of a DataFrame with a MultiIndex. Note that slicing is not yet supported." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
firstsecond
01
b30.7842970.793582
\n", + "
" + ], + "text/plain": [ + " first second\n", + "0 1 \n", + "b 3 0.784297 0.793582" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf1.loc[('b', 3)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Missing Data\n", + "------------" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Missing data can be replaced by using the `fillna` method." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 999\n", + "4 4\n", + "dtype: int64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.fillna(999)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 999\n", + "4 4\n", + "dtype: int64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.fillna(999).compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Operations\n", + "------------" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Stats" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculating descriptive statistics for a `Series`." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2.5, 1.666666666666666)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.mean(), s.var()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2.5, 1.6666666666666667)" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds.mean().compute(), ds.var().compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Applymap" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Applying functions to a `Series`. Note that applying user defined functions directly with Dask-cuDF is not yet implemented. For now, you can use [map_partitions](http://docs.dask.org/en/stable/dataframe-api.html#dask.dataframe.DataFrame.map_partitions) to apply a function to each partition of the distributed dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 10\n", + "1 11\n", + "2 12\n", + "3 13\n", + "4 14\n", + "5 15\n", + "6 16\n", + "7 17\n", + "8 18\n", + "9 19\n", + "10 20\n", + "11 21\n", + "12 22\n", + "13 23\n", + "14 24\n", + "15 25\n", + "16 26\n", + "17 27\n", + "18 28\n", + "19 29\n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def add_ten(num):\n", + " return num + 10\n", + "\n", + "df['a'].applymap(add_ten)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 10\n", + "1 11\n", + "2 12\n", + "3 13\n", + "4 14\n", + "5 15\n", + "6 16\n", + "7 17\n", + "8 18\n", + "9 19\n", + "10 20\n", + "11 21\n", + "12 22\n", + "13 23\n", + "14 24\n", + "15 25\n", + "16 26\n", + "17 27\n", + "18 28\n", + "19 29\n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf['a'].map_partitions(add_ten).compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Histogramming" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Counting the number of occurrences of each unique value of variable." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 1\n", + "5 1\n", + "6 1\n", + "7 1\n", + "8 1\n", + "9 1\n", + "10 1\n", + "11 1\n", + "12 1\n", + "13 1\n", + "14 1\n", + "15 1\n", + "16 1\n", + "17 1\n", + "18 1\n", + "19 1\n", + "Name: a, dtype: int32" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.a.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 1\n", + "2 1\n", + "3 1\n", + "4 1\n", + "5 1\n", + "6 1\n", + "7 1\n", + "8 1\n", + "9 1\n", + "10 1\n", + "11 1\n", + "12 1\n", + "13 1\n", + "14 1\n", + "15 1\n", + "16 1\n", + "17 1\n", + "18 1\n", + "19 1\n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.a.value_counts().compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## String Methods" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like pandas, cuDF provides string processing methods in the `str` attribute of `Series`. Full documentation of string methods is a work in progress. Please see the cuDF API documentation for more information." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 a\n", + "1 b\n", + "2 c\n", + "3 aaba\n", + "4 baca\n", + "5 None\n", + "6 caba\n", + "7 dog\n", + "8 cat\n", + "dtype: object" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = cudf.Series(['A', 'B', 'C', 'Aaba', 'Baca', None, 'CABA', 'dog', 'cat'])\n", + "s.str.lower()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 a\n", + "1 b\n", + "2 c\n", + "3 aaba\n", + "4 baca\n", + "5 None\n", + "6 caba\n", + "7 dog\n", + "8 cat\n", + "dtype: object" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = dask_cudf.from_cudf(s, npartitions=2)\n", + "ds.str.lower().compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concat" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Concatenating `Series` and `DataFrames` row-wise." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 5\n", + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 5\n", + "dtype: int64" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s = cudf.Series([1, 2, 3, None, 5])\n", + "cudf.concat([s, s])" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 5\n", + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 5\n", + "dtype: int64" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds2 = dask_cudf.from_cudf(s, npartitions=2)\n", + "dask_cudf.concat([ds2, ds2]).compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Join" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Performing SQL style merges. Note that the dataframe order is not maintained, but may be restored post-merge by sorting by the index." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keyvals_avals_b
0a10.0100.0
1c12.0101.0
2e14.0102.0
3b11.0null
4d13.0null
\n", + "
" + ], + "text/plain": [ + " key vals_a vals_b\n", + "0 a 10.0 100.0\n", + "1 c 12.0 101.0\n", + "2 e 14.0 102.0\n", + "3 b 11.0 null\n", + "4 d 13.0 null" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_a = cudf.DataFrame()\n", + "df_a['key'] = ['a', 'b', 'c', 'd', 'e']\n", + "df_a['vals_a'] = [float(i + 10) for i in range(5)]\n", + "\n", + "df_b = cudf.DataFrame()\n", + "df_b['key'] = ['a', 'c', 'e']\n", + "df_b['vals_b'] = [float(i+100) for i in range(3)]\n", + "\n", + "merged = df_a.merge(df_b, on=['key'], how='left')\n", + "merged" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
keyvals_avals_b
0a10.0100.0
1c12.0101.0
2b11.0null
0e14.0102.0
1d13.0null
\n", + "
" + ], + "text/plain": [ + " key vals_a vals_b\n", + "0 a 10.0 100.0\n", + "1 c 12.0 101.0\n", + "2 b 11.0 null\n", + "0 e 14.0 102.0\n", + "1 d 13.0 null" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf_a = dask_cudf.from_cudf(df_a, npartitions=2)\n", + "ddf_b = dask_cudf.from_cudf(df_b, npartitions=2)\n", + "\n", + "merged = ddf_a.merge(ddf_b, on=['key'], how='left').compute()\n", + "merged" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Append" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Appending values from another `Series` or array-like object." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 5\n", + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 5\n", + "dtype: int64" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s.append(s)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 5\n", + "0 1\n", + "1 2\n", + "2 3\n", + "3 null\n", + "4 5\n", + "dtype: int64" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds2.append(ds2).compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Grouping" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like pandas, cuDF and Dask-cuDF support the Split-Apply-Combine groupby paradigm." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "df['agg_col1'] = [1 if x % 2 == 0 else 0 for x in range(len(df))]\n", + "df['agg_col2'] = [1 if x % 3 == 0 else 0 for x in range(len(df))]\n", + "\n", + "ddf = dask_cudf.from_cudf(df, npartitions=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Grouping and then applying the `sum` function to the grouped data." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcagg_col2
agg_col1
0100901003
190100904
\n", + "
" + ], + "text/plain": [ + " a b c agg_col2\n", + "agg_col1 \n", + "0 100 90 100 3\n", + "1 90 100 90 4" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('agg_col1').sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcagg_col2
agg_col1
0100901003
190100904
\n", + "
" + ], + "text/plain": [ + " a b c agg_col2\n", + "agg_col1 \n", + "0 100 90 100 3\n", + "1 90 100 90 4" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.groupby('agg_col1').sum().compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Grouping hierarchically then applying the `sum` function to grouped data." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
agg_col1agg_col2
00736073
1273027
10546054
1364036
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "agg_col1 agg_col2 \n", + "0 0 73 60 73\n", + " 1 27 30 27\n", + "1 0 54 60 54\n", + " 1 36 40 36" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby(['agg_col1', 'agg_col2']).sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
agg_col1agg_col2
11364036
00736073
10546054
01273027
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "agg_col1 agg_col2 \n", + "1 1 36 40 36\n", + "0 0 73 60 73\n", + "1 0 54 60 54\n", + "0 1 27 30 27" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.groupby(['agg_col1', 'agg_col2']).sum().compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Grouping and applying statistical functions to specific columns, using `agg`." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
agg_col1
0199.0100
11810.090
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "agg_col1 \n", + "0 19 9.0 100\n", + "1 18 10.0 90" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('agg_col1').agg({'a':'max', 'b':'mean', 'c':'sum'})" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
agg_col1
0199.0100
11810.090
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "agg_col1 \n", + "0 19 9.0 100\n", + "1 18 10.0 90" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.groupby('agg_col1').agg({'a':'max', 'b':'mean', 'c':'sum'}).compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Transpose" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Transposing a dataframe, using either the `transpose` method or `T` property. Currently, all columns must have the same type. Transposing is not currently implemented in Dask-cuDF." + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
014
125
236
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 4\n", + "1 2 5\n", + "2 3 6" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample = cudf.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})\n", + "sample" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
a123
b456
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "a 1 2 3\n", + "b 4 5 6" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sample.transpose()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Time Series\n", + "------------\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`DataFrames` supports `datetime` typed columns, which allow users to interact with and filter data based on specific timestamps." + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datevalue
02018-11-200.986051
12018-11-210.232034
22018-11-220.397617
32018-11-230.103839
\n", + "
" + ], + "text/plain": [ + " date value\n", + "0 2018-11-20 0.986051\n", + "1 2018-11-21 0.232034\n", + "2 2018-11-22 0.397617\n", + "3 2018-11-23 0.103839" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import datetime as dt\n", + "\n", + "date_df = cudf.DataFrame()\n", + "date_df['date'] = pd.date_range('11/20/2018', periods=72, freq='D')\n", + "date_df['value'] = cp.random.sample(len(date_df))\n", + "\n", + "search_date = dt.datetime.strptime('2018-11-23', '%Y-%m-%d')\n", + "date_df.query('date <= @search_date')" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datevalue
02018-11-200.986051
12018-11-210.232034
22018-11-220.397617
32018-11-230.103839
\n", + "
" + ], + "text/plain": [ + " date value\n", + "0 2018-11-20 0.986051\n", + "1 2018-11-21 0.232034\n", + "2 2018-11-22 0.397617\n", + "3 2018-11-23 0.103839" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "date_ddf = dask_cudf.from_cudf(date_df, npartitions=2)\n", + "date_ddf.query('date <= @search_date', local_dict={'search_date':search_date}).compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Categoricals\n", + "------------" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`DataFrames` support categorical columns." + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgrade
01a
12b
23b
34a
45a
56e
\n", + "
" + ], + "text/plain": [ + " id grade\n", + "0 1 a\n", + "1 2 b\n", + "2 3 b\n", + "3 4 a\n", + "4 5 a\n", + "5 6 e" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf = cudf.DataFrame({\"id\": [1, 2, 3, 4, 5, 6], \"grade\":['a', 'b', 'b', 'a', 'a', 'e']})\n", + "gdf['grade'] = gdf['grade'].astype('category')\n", + "gdf" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgrade
01a
12b
23b
34a
45a
56e
\n", + "
" + ], + "text/plain": [ + " id grade\n", + "0 1 a\n", + "1 2 b\n", + "2 3 b\n", + "3 4 a\n", + "4 5 a\n", + "5 6 e" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dgdf = dask_cudf.from_cudf(gdf, npartitions=2)\n", + "dgdf.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Accessing the categories of a column. Note that this is currently not supported in Dask-cuDF." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "StringIndex(['a' 'b' 'e'], dtype='object')" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf.grade.cat.categories" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Accessing the underlying code values of each categorical observation." + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 1\n", + "3 0\n", + "4 0\n", + "5 2\n", + "dtype: int8" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf.grade.cat.codes" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 1\n", + "2 1\n", + "3 0\n", + "4 0\n", + "5 2\n", + "dtype: int8" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dgdf.grade.cat.codes.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Converting Data Representation\n", + "--------------------------------" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pandas" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Converting a cuDF and Dask-cuDF `DataFrame` to a pandas `DataFrame`." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
\n", + "
" + ], + "text/plain": [ + " a b c agg_col1 agg_col2\n", + "0 0 19 0 1 1\n", + "1 1 18 1 0 0\n", + "2 2 17 2 1 0\n", + "3 3 16 3 0 1\n", + "4 4 15 4 1 0" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head().to_pandas()" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
\n", + "
" + ], + "text/plain": [ + " a b c agg_col1 agg_col2\n", + "0 0 19 0 1 1\n", + "1 1 18 1 0 0\n", + "2 2 17 2 1 0\n", + "3 3 16 3 0 1\n", + "4 4 15 4 1 0" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.compute().head().to_pandas()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Numpy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Converting a cuDF or Dask-cuDF `DataFrame` to a numpy `ndarray`." + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 19, 0, 1, 1],\n", + " [ 1, 18, 1, 0, 0],\n", + " [ 2, 17, 2, 1, 0],\n", + " [ 3, 16, 3, 0, 1],\n", + " [ 4, 15, 4, 1, 0],\n", + " [ 5, 14, 5, 0, 0],\n", + " [ 6, 13, 6, 1, 1],\n", + " [ 7, 12, 7, 0, 0],\n", + " [ 8, 11, 8, 1, 0],\n", + " [ 9, 10, 9, 0, 1],\n", + " [10, 9, 10, 1, 0],\n", + " [11, 8, 11, 0, 0],\n", + " [12, 7, 12, 1, 1],\n", + " [13, 6, 13, 0, 0],\n", + " [14, 5, 14, 1, 0],\n", + " [15, 4, 15, 0, 1],\n", + " [16, 3, 16, 1, 0],\n", + " [17, 2, 17, 0, 0],\n", + " [18, 1, 18, 1, 1],\n", + " [19, 0, 19, 0, 0]])" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.as_matrix()" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 0, 19, 0, 1, 1],\n", + " [ 1, 18, 1, 0, 0],\n", + " [ 2, 17, 2, 1, 0],\n", + " [ 3, 16, 3, 0, 1],\n", + " [ 4, 15, 4, 1, 0],\n", + " [ 5, 14, 5, 0, 0],\n", + " [ 6, 13, 6, 1, 1],\n", + " [ 7, 12, 7, 0, 0],\n", + " [ 8, 11, 8, 1, 0],\n", + " [ 9, 10, 9, 0, 1],\n", + " [10, 9, 10, 1, 0],\n", + " [11, 8, 11, 0, 0],\n", + " [12, 7, 12, 1, 1],\n", + " [13, 6, 13, 0, 0],\n", + " [14, 5, 14, 1, 0],\n", + " [15, 4, 15, 0, 1],\n", + " [16, 3, 16, 1, 0],\n", + " [17, 2, 17, 0, 0],\n", + " [18, 1, 18, 1, 1],\n", + " [19, 0, 19, 0, 0]])" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.compute().as_matrix()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Converting a cuDF or Dask-cuDF `Series` to a numpy `ndarray`." + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19])" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['a'].to_array()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", + " 17, 18, 19])" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf['a'].compute().to_array()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Arrow" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Converting a cuDF or Dask-cuDF `DataFrame` to a PyArrow `Table`." + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pyarrow.Table\n", + "a: int64\n", + "b: int64\n", + "c: int64\n", + "agg_col1: int64\n", + "agg_col2: int64\n", + "metadata\n", + "--------\n", + "{b'pandas': b'{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"'\n", + " b'stop\": 20, \"step\": 1}], \"column_indexes\": [{\"name\": null, \"field'\n", + " b'_name\": null, \"pandas_type\": \"unicode\", \"numpy_type\": \"object\", '\n", + " b'\"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\": [{\"name\": \"a\", \"'\n", + " b'field_name\": \"a\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\",'\n", + " b' \"metadata\": null}, {\"name\": \"b\", \"field_name\": \"b\", \"pandas_typ'\n", + " b'e\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}, {\"name\": '\n", + " b'\"c\", \"field_name\": \"c\", \"pandas_type\": \"int64\", \"numpy_type\": \"i'\n", + " b'nt64\", \"metadata\": null}, {\"name\": \"agg_col1\", \"field_name\": \"ag'\n", + " b'g_col1\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadat'\n", + " b'a\": null}, {\"name\": \"agg_col2\", \"field_name\": \"agg_col2\", \"panda'\n", + " b's_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}], \"cr'\n", + " b'eator\": {\"library\": \"pyarrow\", \"version\": \"0.15.0\"}, \"pandas_ver'\n", + " b'sion\": \"0.25.3\"}'}" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.to_arrow()" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pyarrow.Table\n", + "a: int64\n", + "b: int64\n", + "c: int64\n", + "agg_col1: int64\n", + "agg_col2: int64\n", + "__index_level_0__: int64\n", + "metadata\n", + "--------\n", + "{b'pandas': b'{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na'\n", + " b'me\": null, \"field_name\": null, \"pandas_type\": \"unicode\", \"numpy_'\n", + " b'type\": \"object\", \"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\":'\n", + " b' [{\"name\": \"a\", \"field_name\": \"a\", \"pandas_type\": \"int64\", \"nump'\n", + " b'y_type\": \"int64\", \"metadata\": null}, {\"name\": \"b\", \"field_name\":'\n", + " b' \"b\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\":'\n", + " b' null}, {\"name\": \"c\", \"field_name\": \"c\", \"pandas_type\": \"int64\",'\n", + " b' \"numpy_type\": \"int64\", \"metadata\": null}, {\"name\": \"agg_col1\", '\n", + " b'\"field_name\": \"agg_col1\", \"pandas_type\": \"int64\", \"numpy_type\": '\n", + " b'\"int64\", \"metadata\": null}, {\"name\": \"agg_col2\", \"field_name\": \"'\n", + " b'agg_col2\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metad'\n", + " b'ata\": null}, {\"name\": null, \"field_name\": \"__index_level_0__\", \"'\n", + " b'pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}]'\n", + " b', \"creator\": {\"library\": \"pyarrow\", \"version\": \"0.15.0\"}, \"panda'\n", + " b's_version\": \"0.25.3\"}'}" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf.compute().to_arrow()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Getting Data In/Out\n", + "------------------------\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CSV" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Writing to a CSV file." + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [], + "source": [ + "if not os.path.exists('example_output'):\n", + " os.mkdir('example_output')\n", + " \n", + "df.to_csv('example_output/foo.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "ddf.compute().to_csv('example_output/foo_dask.csv', index=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reading from a csv file." + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
\n", + "
" + ], + "text/plain": [ + " a b c agg_col1 agg_col2\n", + "0 0 19 0 1 1\n", + "1 1 18 1 0 0\n", + "2 2 17 2 1 0\n", + "3 3 16 3 0 1\n", + "4 4 15 4 1 0\n", + "5 5 14 5 0 0\n", + "6 6 13 6 1 1\n", + "7 7 12 7 0 0\n", + "8 8 11 8 1 0\n", + "9 9 10 9 0 1\n", + "10 10 9 10 1 0\n", + "11 11 8 11 0 0\n", + "12 12 7 12 1 1\n", + "13 13 6 13 0 0\n", + "14 14 5 14 1 0\n", + "15 15 4 15 0 1\n", + "16 16 3 16 1 0\n", + "17 17 2 17 0 0\n", + "18 18 1 18 1 1\n", + "19 19 0 19 0 0" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = cudf.read_csv('example_output/foo.csv')\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
\n", + "
" + ], + "text/plain": [ + " a b c agg_col1 agg_col2\n", + "0 0 19 0 1 1\n", + "1 1 18 1 0 0\n", + "2 2 17 2 1 0\n", + "3 3 16 3 0 1\n", + "4 4 15 4 1 0\n", + "5 5 14 5 0 0\n", + "6 6 13 6 1 1\n", + "7 7 12 7 0 0\n", + "8 8 11 8 1 0\n", + "9 9 10 9 0 1\n", + "10 10 9 10 1 0\n", + "11 11 8 11 0 0\n", + "12 12 7 12 1 1\n", + "13 13 6 13 0 0\n", + "14 14 5 14 1 0\n", + "15 15 4 15 0 1\n", + "16 16 3 16 1 0\n", + "17 17 2 17 0 0\n", + "18 18 1 18 1 1\n", + "19 19 0 19 0 0" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf = dask_cudf.read_csv('example_output/foo_dask.csv')\n", + "ddf.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reading all CSV files in a directory into a single `dask_cudf.DataFrame`, using the star wildcard." + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
\n", + "
" + ], + "text/plain": [ + " a b c agg_col1 agg_col2\n", + "0 0 19 0 1 1\n", + "1 1 18 1 0 0\n", + "2 2 17 2 1 0\n", + "3 3 16 3 0 1\n", + "4 4 15 4 1 0\n", + "5 5 14 5 0 0\n", + "6 6 13 6 1 1\n", + "7 7 12 7 0 0\n", + "8 8 11 8 1 0\n", + "9 9 10 9 0 1\n", + "10 10 9 10 1 0\n", + "11 11 8 11 0 0\n", + "12 12 7 12 1 1\n", + "13 13 6 13 0 0\n", + "14 14 5 14 1 0\n", + "15 15 4 15 0 1\n", + "16 16 3 16 1 0\n", + "17 17 2 17 0 0\n", + "18 18 1 18 1 1\n", + "19 19 0 19 0 0\n", + "0 0 19 0 1 1\n", + "1 1 18 1 0 0\n", + "2 2 17 2 1 0\n", + "3 3 16 3 0 1\n", + "4 4 15 4 1 0\n", + "5 5 14 5 0 0\n", + "6 6 13 6 1 1\n", + "7 7 12 7 0 0\n", + "8 8 11 8 1 0\n", + "9 9 10 9 0 1\n", + "10 10 9 10 1 0\n", + "11 11 8 11 0 0\n", + "12 12 7 12 1 1\n", + "13 13 6 13 0 0\n", + "14 14 5 14 1 0\n", + "15 15 4 15 0 1\n", + "16 16 3 16 1 0\n", + "17 17 2 17 0 0\n", + "18 18 1 18 1 1\n", + "19 19 0 19 0 0" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf = dask_cudf.read_csv('example_output/*.csv')\n", + "ddf.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Parquet" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Writing to parquet files, using the CPU via PyArrow." + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_parquet('example_output/temp_parquet')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reading parquet files with a GPU-accelerated parquet reader." + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
\n", + "
" + ], + "text/plain": [ + " a b c agg_col1 agg_col2\n", + "0 0 19 0 1 1\n", + "1 1 18 1 0 0\n", + "2 2 17 2 1 0\n", + "3 3 16 3 0 1\n", + "4 4 15 4 1 0\n", + "5 5 14 5 0 0\n", + "6 6 13 6 1 1\n", + "7 7 12 7 0 0\n", + "8 8 11 8 1 0\n", + "9 9 10 9 0 1\n", + "10 10 9 10 1 0\n", + "11 11 8 11 0 0\n", + "12 12 7 12 1 1\n", + "13 13 6 13 0 0\n", + "14 14 5 14 1 0\n", + "15 15 4 15 0 1\n", + "16 16 3 16 1 0\n", + "17 17 2 17 0 0\n", + "18 18 1 18 1 1\n", + "19 19 0 19 0 0" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = cudf.read_parquet('example_output/temp_parquet')\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Writing to parquet files from a `dask_cudf.DataFrame` using PyArrow under the hood." + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "ddf.to_parquet('example_files') " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ORC" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Reading ORC files." + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
boolean1byte1short1int1long1float1double1bytes1string1middle.list.int1middle.list.string1list.int1list.string1mapmap.int1map.string1
0False110246553692233720368547758071.0-15.0\u0000\u0001\u0002\u0003\u0004hi3bye4chani5chani
1True10020486553692233720368547758072.0-5.0bye0bye0mauddib1mauddib
\n", + "
" + ], + "text/plain": [ + " boolean1 byte1 short1 int1 long1 float1 double1 \\\n", + "0 False 1 1024 65536 9223372036854775807 1.0 -15.0 \n", + "1 True 100 2048 65536 9223372036854775807 2.0 -5.0 \n", + "\n", + " bytes1 string1 middle.list.int1 middle.list.string1 list.int1 \\\n", + "0 \u0000\u0001\u0002\u0003\u0004 hi 3 bye 4 \n", + "1 bye 0 bye 0 \n", + "\n", + " list.string1 map map.int1 map.string1 \n", + "0 chani 5 chani \n", + "1 mauddib 1 mauddib " + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = cudf.read_orc('/rapids/cudf/python/cudf/cudf/tests/data/orc/TestOrcFile.test1.orc')\n", + "df2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Dask Performance Tips\n", + "--------------------------------\n", + "\n", + "Like Apache Spark, Dask operations are [lazy](https://en.wikipedia.org/wiki/Lazy_evaluation). Instead of being executed at that moment, most operations are added to a task graph and the actual evaluation is delayed until the result is needed.\n", + "\n", + "Sometimes, though, we want to force the execution of operations. Calling `persist` on a Dask collection fully computes it (or actively computes it in the background), persisting the result into memory. When we're using distributed systems, we may want to wait until `persist` is finished before beginning any downstream operations. We can enforce this contract by using `wait`. Wrapping an operation with `wait` will ensure it doesn't begin executing until all necessary upstream operations have finished.\n", + "\n", + "The snippets below provide basic examples, using `LocalCUDACluster` to create one dask-worker per GPU on the local machine. For more detailed information about `persist` and `wait`, please see the Dask documentation for [persist](https://docs.dask.org/en/latest/api.html#dask.persist) and [wait](https://docs.dask.org/en/latest/futures.html#distributed.wait). Wait relies on the concept of Futures, which is beyond the scope of this tutorial. For more information on Futures, see the Dask [Futures](https://docs.dask.org/en/latest/futures.html) documentation. For more information about multi-GPU clusters, please see the [dask-cuda](https://github.com/rapidsai/dask-cuda) library (documentation is in progress)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First, we set up a GPU cluster. With our `client` set up, Dask-cuDF computation will be distributed across the GPUs in the cluster." + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "

Client

\n", + "\n", + "
\n", + "

Cluster

\n", + "
    \n", + "
  • Workers: 4
  • \n", + "
  • Cores: 4
  • \n", + "
  • Memory: 404.32 GB
  • \n", + "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import time\n", + "\n", + "from dask.distributed import Client, wait\n", + "from dask_cuda import LocalCUDACluster\n", + "\n", + "cluster = LocalCUDACluster()\n", + "client = Client(cluster)\n", + "client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Persisting Data\n", + "Next, we create our Dask-cuDF DataFrame and apply a transformation, storing the result as a new column." + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Dask DataFrame Structure:
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
npartitions=5
0int64int64int64
2000000.........
............
8000000.........
9999999.........
\n", + "
\n", + "
Dask Name: assign, 20 tasks
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nrows = 10000000\n", + "\n", + "df2 = cudf.DataFrame({'a': cp.arange(nrows), 'b': cp.arange(nrows)})\n", + "ddf2 = dask_cudf.from_cudf(df2, npartitions=5)\n", + "ddf2['c'] = ddf2['a'] + 5\n", + "ddf2" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thu Jun 4 05:36:08 2020 \n", + "+-----------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 440.64.00 Driver Version: 440.64.00 CUDA Version: 10.2 |\n", + "|-------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", + "|===============================+======================+======================|\n", + "| 0 Tesla T4 On | 00000000:3B:00.0 Off | 0 |\n", + "| N/A 38C P0 27W / 70W | 743MiB / 15109MiB | 2% Default |\n", + "+-------------------------------+----------------------+----------------------+\n", + "| 1 Tesla T4 On | 00000000:5E:00.0 Off | 0 |\n", + "| N/A 41C P0 26W / 70W | 110MiB / 15109MiB | 0% Default |\n", + "+-------------------------------+----------------------+----------------------+\n", + "| 2 Tesla T4 On | 00000000:AF:00.0 Off | 0 |\n", + "| N/A 35C P0 27W / 70W | 110MiB / 15109MiB | 0% Default |\n", + "+-------------------------------+----------------------+----------------------+\n", + "| 3 Tesla T4 On | 00000000:D8:00.0 Off | 0 |\n", + "| N/A 34C P0 27W / 70W | 110MiB / 15109MiB | 0% Default |\n", + "+-------------------------------+----------------------+----------------------+\n", + " \n", + "+-----------------------------------------------------------------------------+\n", + "| Processes: GPU Memory |\n", + "| GPU PID Type Process name Usage |\n", + "|=============================================================================|\n", + "| 0 57229 C ...sets/pgali/envs/new_cudf_env/bin/python 633MiB |\n", + "| 0 57341 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", + "| 1 57337 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", + "| 2 57336 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", + "| 3 57335 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", + "+-----------------------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "!nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Because Dask is lazy, the computation has not yet occurred. We can see that there are twenty tasks in the task graph and we've used about 800 MB of memory. We can force computation by using `persist`. By forcing execution, the result is now explicitly in memory and our task graph only contains one task per partition (the baseline)." + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Dask DataFrame Structure:
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
npartitions=5
0int64int64int64
2000000.........
............
8000000.........
9999999.........
\n", + "
\n", + "
Dask Name: assign, 5 tasks
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ddf2 = ddf2.persist()\n", + "ddf2" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Thu Jun 4 05:36:09 2020 \n", + "+-----------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 440.64.00 Driver Version: 440.64.00 CUDA Version: 10.2 |\n", + "|-------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", + "|===============================+======================+======================|\n", + "| 0 Tesla T4 On | 00000000:3B:00.0 Off | 0 |\n", + "| N/A 38C P0 27W / 70W | 743MiB / 15109MiB | 0% Default |\n", + "+-------------------------------+----------------------+----------------------+\n", + "| 1 Tesla T4 On | 00000000:5E:00.0 Off | 0 |\n", + "| N/A 42C P0 26W / 70W | 110MiB / 15109MiB | 0% Default |\n", + "+-------------------------------+----------------------+----------------------+\n", + "| 2 Tesla T4 On | 00000000:AF:00.0 Off | 0 |\n", + "| N/A 35C P0 27W / 70W | 110MiB / 15109MiB | 0% Default |\n", + "+-------------------------------+----------------------+----------------------+\n", + "| 3 Tesla T4 On | 00000000:D8:00.0 Off | 0 |\n", + "| N/A 34C P0 27W / 70W | 110MiB / 15109MiB | 0% Default |\n", + "+-------------------------------+----------------------+----------------------+\n", + " \n", + "+-----------------------------------------------------------------------------+\n", + "| Processes: GPU Memory |\n", + "| GPU PID Type Process name Usage |\n", + "|=============================================================================|\n", + "| 0 57229 C ...sets/pgali/envs/new_cudf_env/bin/python 633MiB |\n", + "| 0 57341 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", + "| 1 57337 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", + "| 2 57336 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", + "| 3 57335 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", + "+-----------------------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "!nvidia-smi" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Because we forced computation, we now have a larger object in distributed GPU memory." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Wait\n", + "Depending on our workflow or distributed computing setup, we may want to `wait` until all upstream tasks have finished before proceeding with a specific function. This section shows an example of this behavior, adapted from the Dask documentation.\n", + "\n", + "First, we create a new Dask DataFrame and define a function that we'll map to every partition in the dataframe." + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "nrows = 10000000\n", + "\n", + "df1 = cudf.DataFrame({'a': cp.arange(nrows), 'b': cp.arange(nrows)})\n", + "ddf1 = dask_cudf.from_cudf(df1, npartitions=100)\n", + "\n", + "def func(df):\n", + " time.sleep(cp.random.randint(1, 60))\n", + " return (df + 5) * 3 - 11" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This function will do a basic transformation of every column in the dataframe, but the time spent in the function will vary due to the `time.sleep` statement randomly adding 1-60 seconds of time. We'll run this on every partition of our dataframe using `map_partitions`, which adds the task to our task-graph, and store the result. We can then call `persist` to force execution." + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "results_ddf = ddf2.map_partitions(func)\n", + "results_ddf = results_ddf.persist()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "However, some partitions will be done **much** sooner than others. If we had downstream processes that should wait for all partitions to be completed, we can enforce that behavior using `wait`." + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DoneAndNotDoneFutures(done={, , , , }, not_done=set())" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wait(results_ddf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With `wait`, we can safely proceed on in our workflow." + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/cudf/source/user_guide/Working-with-missing-data.ipynb b/docs/cudf/source/user_guide/Working-with-missing-data.ipynb new file mode 100644 index 00000000000..54fe774060e --- /dev/null +++ b/docs/cudf/source/user_guide/Working-with-missing-data.ipynb @@ -0,0 +1,3466 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Working with missing data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this section, we will discuss missing (also referred to as `NA`) values in cudf. cudf supports having missing values in all dtypes. These missing values are represented by ``. These values are also referenced as \"null values\"." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. [How to Detect missing values](#How-to-Detect-missing-values)\n", + "2. [Float dtypes and missing data](#Float-dtypes-and-missing-data)\n", + "3. [Datetimes](#Datetimes)\n", + "4. [Calculations with missing data](#Calculations-with-missing-data)\n", + "5. [Sum/product of Null/nans](#Sum/product-of-Null/nans)\n", + "6. [NA values in GroupBy](#NA-values-in-GroupBy)\n", + "7. [Inserting missing data](#Inserting-missing-data)\n", + "8. [Filling missing values: fillna](#Filling-missing-values:-fillna)\n", + "9. [Filling with cudf Object](#Filling-with-cudf-Object)\n", + "10. [Dropping axis labels with missing data: dropna](#Dropping-axis-labels-with-missing-data:-dropna)\n", + "11. [Replacing generic values](#Replacing-generic-values)\n", + "12. [String/regular expression replacement](#String/regular-expression-replacement)\n", + "13. [Numeric replacement](#Numeric-replacement)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How to Detect missing values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To detect missing values, you can use `isna()` and `notna()` functions." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import cudf\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "df = cudf.DataFrame({'a': [1, 2, None, 4], 'b':[0.1, None, 2.3, 17.17]})" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
010.1
12<NA>
2<NA>2.3
3417.17
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 0.1\n", + "1 2 \n", + "2 2.3\n", + "3 4 17.17" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
0FalseFalse
1FalseTrue
2TrueFalse
3FalseFalse
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 False False\n", + "1 False True\n", + "2 True False\n", + "3 False False" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 True\n", + "1 True\n", + "2 False\n", + "3 True\n", + "Name: a, dtype: bool" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['a'].notna()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "One has to be mindful that in Python (and NumPy), the nan's don’t compare equal, but None's do. Note that cudf/NumPy uses the fact that `np.nan != np.nan`, and treats `None` like `np.nan`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "None == None" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.nan == np.nan" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So as compared to above, a scalar equality comparison versus a None/np.nan doesn’t provide useful information.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 False\n", + "Name: b, dtype: bool" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['b'] == np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "s = cudf.Series([None, 1, 2])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 \n", + "1 1\n", + "2 2\n", + "dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "dtype: bool" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s == None" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "s = cudf.Series([1, 2, np.nan], nan_as_null=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1.0\n", + "1 2.0\n", + "2 NaN\n", + "dtype: float64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "dtype: bool" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "s == np.nan" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Float dtypes and missing data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Because ``NaN`` is a float, a column of integers with even one missing values is cast to floating-point dtype. However this doesn't happen by default.\n", + "\n", + "By default if a ``NaN`` value is passed to `Series` constructor, it is treated as `` value. " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 \n", + "dtype: int64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf.Series([1, 2, np.nan])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hence to consider a ``NaN`` as ``NaN`` you will have to pass `nan_as_null=False` parameter into `Series` constructor." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1.0\n", + "1 2.0\n", + "2 NaN\n", + "dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf.Series([1, 2, np.nan], nan_as_null=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Datetimes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For `datetime64` types, cudf doesn't support having `NaT` values. Instead these values which are specific to numpy and pandas are considered as null values(``) in cudf. The actual underlying value of `NaT` is `min(int64)` and cudf retains the underlying value when converting a cudf object to pandas object.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2012-01-01 00:00:00.000000\n", + "1 \n", + "2 2012-01-01 00:00:00.000000\n", + "dtype: datetime64[us]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "datetime_series = cudf.Series([pd.Timestamp(\"20120101\"), pd.NaT, pd.Timestamp(\"20120101\")])\n", + "datetime_series" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2012-01-01\n", + "1 NaT\n", + "2 2012-01-01\n", + "dtype: datetime64[ns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datetime_series.to_pandas()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "any operations on rows having `` values in `datetime` column will result in `` value at the same location in resulting column:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0 days 00:00:00\n", + "1 \n", + "2 0 days 00:00:00\n", + "dtype: timedelta64[us]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datetime_series - datetime_series" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Calculations with missing data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Null values propagate naturally through arithmetic operations between pandas objects." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "df1 = cudf.DataFrame({'a':[1, None, 2, 3, None], 'b':cudf.Series([np.nan, 2, 3.2, 0.1, 1], nan_as_null=False)})" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "df2 = cudf.DataFrame({'a':[1, 11, 2, 34, 10], 'b':cudf.Series([0.23, 22, 3.2, None, 1])})" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
01NaN
1<NA>2.0
223.2
330.1
4<NA>1.0
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 NaN\n", + "1 2.0\n", + "2 2 3.2\n", + "3 3 0.1\n", + "4 1.0" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
010.23
11122.0
223.2
334<NA>
4101.0
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 0.23\n", + "1 11 22.0\n", + "2 2 3.2\n", + "3 34 \n", + "4 10 1.0" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
02NaN
1<NA>24.0
246.4
337<NA>
4<NA>2.0
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 2 NaN\n", + "1 24.0\n", + "2 4 6.4\n", + "3 37 \n", + "4 2.0" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1 + df2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "While summing the data along a series, `NA` values will be treated as `0`." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 \n", + "2 2\n", + "3 3\n", + "4 \n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1['a']" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1['a'].sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since `NA` values are treated as `0`, the mean would result to 2 in this case `(1 + 0 + 2 + 3 + 0)/5 = 2`" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.0" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1['a'].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To preserve `NA` values in the above calculations, `sum` & `mean` support `skipna` parameter.\n", + "By default it's value is\n", + "set to `True`, we can change it to `False` to preserve `NA` values." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nan" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1['a'].sum(skipna=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nan" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1['a'].mean(skipna=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Cumulative methods like `cumsum` and `cumprod` ignore `NA` values by default." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 \n", + "2 3\n", + "3 6\n", + "4 \n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1['a'].cumsum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To preserve `NA` values in cumulative methods, provide `skipna=False`." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1['a'].cumsum(skipna=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sum/product of Null/nans" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The sum of an empty or all-NA Series of a DataFrame is 0." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf.Series([np.nan], nan_as_null=False).sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nan" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf.Series([np.nan], nan_as_null=False).sum(skipna=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf.Series([], dtype='float64').sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The product of an empty or all-NA Series of a DataFrame is 1." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf.Series([np.nan], nan_as_null=False).prod()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "nan" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf.Series([np.nan], nan_as_null=False).prod(skipna=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.0" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cudf.Series([], dtype='float64').prod()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## NA values in GroupBy" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`NA` groups in GroupBy are automatically excluded. For example:" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
01NaN
1<NA>2.0
223.2
330.1
4<NA>1.0
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 NaN\n", + "1 2.0\n", + "2 2 3.2\n", + "3 3 0.1\n", + "4 1.0" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
b
a
23.2
1NaN
30.1
\n", + "
" + ], + "text/plain": [ + " b\n", + "a \n", + "2 3.2\n", + "1 NaN\n", + "3 0.1" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.groupby('a').mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is also possible to include `NA` in groups by passing `dropna=False`" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
b
a
23.2
1NaN
30.1
<NA>1.5
\n", + "
" + ], + "text/plain": [ + " b\n", + "a \n", + "2 3.2\n", + "1 NaN\n", + "3 0.1\n", + " 1.5" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.groupby('a', dropna=False).mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Inserting missing data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "All dtypes support insertion of missing value by assignment. Any specific location in series can made null by assigning it to `None`." + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [], + "source": [ + "series = cudf.Series([1, 2, 3, 4])" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 3\n", + "3 4\n", + "dtype: int64" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "series[2] = None" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "1 2\n", + "2 \n", + "3 4\n", + "dtype: int64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filling missing values: fillna" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`fillna()` can fill in `NA` & `NaN` values with non-NA data." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
01NaN
1<NA>2.0
223.2
330.1
4<NA>1.0
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 NaN\n", + "1 2.0\n", + "2 2 3.2\n", + "3 3 0.1\n", + "4 1.0" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 10.0\n", + "1 2.0\n", + "2 3.2\n", + "3 0.1\n", + "4 1.0\n", + "Name: b, dtype: float64" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1['b'].fillna(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Filling with cudf Object" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series must match the columns of the frame you wish to fill. The use case of this is to fill a DataFrame with the mean of that column." + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "import cupy as cp\n", + "dff = cudf.DataFrame(cp.random.randn(10, 3), columns=list('ABC'))" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "dff.iloc[3:5, 0] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "dff.iloc[4:6, 1] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "dff.iloc[5:8, 2] = np.nan" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
00.7712450.0510241.199239
1-1.1680410.702664-0.270806
2-1.467009-0.143080-0.806151
3NaN-0.610798-0.272895
4NaNNaN1.396784
5-0.439343NaNNaN
61.093102-0.764758NaN
70.003098-0.722648NaN
8-0.095899-1.285156-0.300566
90.1094652.497843-1.199856
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 0.771245 0.051024 1.199239\n", + "1 -1.168041 0.702664 -0.270806\n", + "2 -1.467009 -0.143080 -0.806151\n", + "3 NaN -0.610798 -0.272895\n", + "4 NaN NaN 1.396784\n", + "5 -0.439343 NaN NaN\n", + "6 1.093102 -0.764758 NaN\n", + "7 0.003098 -0.722648 NaN\n", + "8 -0.095899 -1.285156 -0.300566\n", + "9 0.109465 2.497843 -1.199856" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
00.7712450.0510241.199239
1-1.1680410.702664-0.270806
2-1.467009-0.143080-0.806151
3-0.149173-0.610798-0.272895
4-0.149173-0.0343641.396784
5-0.439343-0.034364-0.036322
61.093102-0.764758-0.036322
70.003098-0.722648-0.036322
8-0.095899-1.285156-0.300566
90.1094652.497843-1.199856
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 0.771245 0.051024 1.199239\n", + "1 -1.168041 0.702664 -0.270806\n", + "2 -1.467009 -0.143080 -0.806151\n", + "3 -0.149173 -0.610798 -0.272895\n", + "4 -0.149173 -0.034364 1.396784\n", + "5 -0.439343 -0.034364 -0.036322\n", + "6 1.093102 -0.764758 -0.036322\n", + "7 0.003098 -0.722648 -0.036322\n", + "8 -0.095899 -1.285156 -0.300566\n", + "9 0.109465 2.497843 -1.199856" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff.fillna(dff.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABC
00.7712450.0510241.199239
1-1.1680410.702664-0.270806
2-1.467009-0.143080-0.806151
3NaN-0.610798-0.272895
4NaN-0.0343641.396784
5-0.439343-0.034364-0.036322
61.093102-0.764758-0.036322
70.003098-0.722648-0.036322
8-0.095899-1.285156-0.300566
90.1094652.497843-1.199856
\n", + "
" + ], + "text/plain": [ + " A B C\n", + "0 0.771245 0.051024 1.199239\n", + "1 -1.168041 0.702664 -0.270806\n", + "2 -1.467009 -0.143080 -0.806151\n", + "3 NaN -0.610798 -0.272895\n", + "4 NaN -0.034364 1.396784\n", + "5 -0.439343 -0.034364 -0.036322\n", + "6 1.093102 -0.764758 -0.036322\n", + "7 0.003098 -0.722648 -0.036322\n", + "8 -0.095899 -1.285156 -0.300566\n", + "9 0.109465 2.497843 -1.199856" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff.fillna(dff.mean()[1:3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dropping axis labels with missing data: dropna" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Missing data can be excluded using `dropna()`:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
01NaN
1<NA>2.0
223.2
330.1
4<NA>1.0
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 1 NaN\n", + "1 2.0\n", + "2 2 3.2\n", + "3 3 0.1\n", + "4 1.0" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
223.2
330.1
\n", + "
" + ], + "text/plain": [ + " a b\n", + "2 2 3.2\n", + "3 3 0.1" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.dropna(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
1
2
3
4
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: [0, 1, 2, 3, 4]" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1.dropna(axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "An equivalent `dropna()` is available for Series. " + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1\n", + "2 2\n", + "3 3\n", + "Name: a, dtype: int64" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1['a'].dropna()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Replacing generic values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Often times we want to replace arbitrary values with other values.\n", + "\n", + "`replace()` in Series and `replace()` in DataFrame provides an efficient yet flexible way to perform such replacements." + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "series = cudf.Series([0.0, 1.0, 2.0, 3.0, 4.0])" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0.0\n", + "1 1.0\n", + "2 2.0\n", + "3 3.0\n", + "4 4.0\n", + "dtype: float64" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5.0\n", + "1 1.0\n", + "2 2.0\n", + "3 3.0\n", + "4 4.0\n", + "dtype: float64" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.replace(0, 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also replace any value with a `` value." + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 \n", + "1 1.0\n", + "2 2.0\n", + "3 3.0\n", + "4 4.0\n", + "dtype: float64" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.replace(0, None)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can replace a list of values by a list of other values:" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 4.0\n", + "1 3.0\n", + "2 2.0\n", + "3 1.0\n", + "4 0.0\n", + "dtype: float64" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also specify a mapping dict:" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 10.0\n", + "1 100.0\n", + "2 2.0\n", + "3 3.0\n", + "4 4.0\n", + "dtype: float64" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series.replace({0: 10, 1: 100})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For a DataFrame, you can specify individual values by column:" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [], + "source": [ + "df = cudf.DataFrame({\"a\": [0, 1, 2, 3, 4], \"b\": [5, 6, 7, 8, 9]})" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
005
116
227
338
449
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 0 5\n", + "1 1 6\n", + "2 2 7\n", + "3 3 8\n", + "4 4 9" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
0100100
116
227
338
449
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 100 100\n", + "1 1 6\n", + "2 2 7\n", + "3 3 8\n", + "4 4 9" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.replace({\"a\": 0, \"b\": 5}, 100)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## String/regular expression replacement" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "cudf supports replacing string values using `replace` API:" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "d = {\"a\": list(range(4)), \"b\": list(\"ab..\"), \"c\": [\"a\", \"b\", None, \"d\"]}" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "df = cudf.DataFrame(d)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00aa
11bb
22.<NA>
33.d
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 a a\n", + "1 1 b b\n", + "2 2 . \n", + "3 3 . d" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00aa
11bb
22A Dot<NA>
33A Dotd
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 a a\n", + "1 1 b b\n", + "2 2 A Dot \n", + "3 3 A Dot d" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.replace(\".\", \"A Dot\")" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00aa
11<NA><NA>
22A Dot<NA>
33A Dotd
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 a a\n", + "1 1 \n", + "2 2 A Dot \n", + "3 3 A Dot d" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.replace([\".\", \"b\"], [\"A Dot\", None])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Replace a few different values (list -> list):\n" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00bb
11bb
22--<NA>
33--d
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 b b\n", + "1 1 b b\n", + "2 2 -- \n", + "3 3 -- d" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.replace([\"a\", \".\"], [\"b\", \"--\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Only search in column 'b' (dict -> dict):" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
00aa
11bb
22replacement value<NA>
33replacement valued
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 0 a a\n", + "1 1 b b\n", + "2 2 replacement value \n", + "3 3 replacement value d" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.replace({\"b\": \".\"}, {\"b\": \"replacement value\"})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Numeric replacement" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`replace()` can also be used similar to `fillna()`." + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "df = cudf.DataFrame(cp.random.randn(10, 2))" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [], + "source": [ + "df[np.random.rand(df.shape[0]) > 0.5] = 1.5" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0<NA><NA>
1<NA><NA>
20.1231607461.09464783
3<NA><NA>
4<NA><NA>
50.68137677-0.357346253
6<NA><NA>
7<NA><NA>
81.173285961-0.968616065
90.147922362-0.154880098
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 \n", + "1 \n", + "2 0.123160746 1.09464783\n", + "3 \n", + "4 \n", + "5 0.68137677 -0.357346253\n", + "6 \n", + "7 \n", + "8 1.173285961 -0.968616065\n", + "9 0.147922362 -0.154880098" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.replace(1.5, None)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Replacing more than one value is possible by passing a list.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [], + "source": [ + "df00 = df.iloc[0, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
05.0000005.000000
15.0000005.000000
20.1231611.094648
35.0000005.000000
45.0000005.000000
50.681377-0.357346
65.0000005.000000
75.0000005.000000
81.173286-0.968616
90.147922-0.154880
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 5.000000 5.000000\n", + "1 5.000000 5.000000\n", + "2 0.123161 1.094648\n", + "3 5.000000 5.000000\n", + "4 5.000000 5.000000\n", + "5 0.681377 -0.357346\n", + "6 5.000000 5.000000\n", + "7 5.000000 5.000000\n", + "8 1.173286 -0.968616\n", + "9 0.147922 -0.154880" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.replace([1.5, df00], [5, 10])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also operate on the DataFrame in place:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "df.replace(1.5, None, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0<NA><NA>
1<NA><NA>
20.1231607461.09464783
3<NA><NA>
4<NA><NA>
50.68137677-0.357346253
6<NA><NA>
7<NA><NA>
81.173285961-0.968616065
90.147922362-0.154880098
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 \n", + "1 \n", + "2 0.123160746 1.09464783\n", + "3 \n", + "4 \n", + "5 0.68137677 -0.357346253\n", + "6 \n", + "7 \n", + "8 1.173285961 -0.968616065\n", + "9 0.147922362 -0.154880098" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb new file mode 100644 index 00000000000..3299414ac7e --- /dev/null +++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb @@ -0,0 +1,1716 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Overview of User Defined Functions with cuDF" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Like many tabular data processing APIs, cuDF provides a range of composable, DataFrame style operators. While out of the box functions are flexible and useful, it is sometimes necessary to write custom code, or user-defined functions (UDFs), that can be applied to rows, columns, and other groupings of the cells making up the DataFrame.\n", + "\n", + "In conjunction with the broader GPU PyData ecosystem, cuDF provides interfaces to run UDFs on a variety of data structures. Currently, we can only execute UDFs on numeric and Boolean typed data (support for strings is being planned). This guide covers writing and executing UDFs on the following data structures:\n", + "\n", + "- Series\n", + "- DataFrame\n", + "- Rolling Windows Series\n", + "- Groupby DataFrames\n", + "- CuPy NDArrays\n", + "- Numba DeviceNDArrays\n", + "\n", + "It also demonstrates cuDF's default null handling behavior, and how to write UDFs that can interact with null values in a limited fashion." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overview\n", + "\n", + "When cuDF executes a UDF, it gets just-in-time (JIT) compiled into a CUDA kernel (either explicitly or implicitly) and is run on the GPU. Exploring CUDA and GPU architecture in-depth is out of scope for this guide. At a high level:\n", + "\n", + "- Compute is spread across multiple \"blocks\", which have access to both global memory and their own block local memory\n", + "- Within each block, many \"threads\" operate independently and simultaneously access their block-specific shared memory with low latency\n", + "\n", + "\n", + "This guide covers APIs that automatically handle dividing columns into chunks and assigning them into different GPU blocks for parallel computation (see [apply_chunks](https://docs.rapids.ai/api/cudf/stable/api.html#cudf.core.dataframe.DataFrame.apply_chunks) or the [numba CUDA JIT API](https://numba.pydata.org/numba-doc/dev/cuda/index.html) if you need to control this yourself)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Series UDFs\n", + "\n", + "You can execute UDFs on Series in two ways:\n", + "\n", + "- Writing a standard Python function and using `applymap`\n", + "- Writing a Numba kernel and using Numba's `forall` syntax\n", + "\n", + "Using `applymap` is simpler, but writing a Numba kernel offers the flexibility to build more complex functions (we'll be writing only simple kernels in this guide).\n", + "\n", + "Let's start by importing a few libraries and creating a DataFrame of several Series." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
0-0.691674TrueDan
10.480099FalseBob
2-0.473370TrueXavier
30.067479TrueAlice
4-0.970850FalseSarah
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 -0.691674 True Dan\n", + "1 0.480099 False Bob\n", + "2 -0.473370 True Xavier\n", + "3 0.067479 True Alice\n", + "4 -0.970850 False Sarah" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "import cudf\n", + "from cudf.datasets import randomdata \n", + "\n", + "df = randomdata(nrows=10, dtypes={'a':float, 'b':bool, 'c':str}, seed=12)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll define a basic Python function and call it as a UDF with `applymap`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def udf(x):\n", + " if x > 0:\n", + " return x + 5\n", + " else:\n", + " return x - 5" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 -5.691674\n", + "1 5.480099\n", + "2 -5.473370\n", + "3 5.067479\n", + "4 -5.970850\n", + "5 5.837494\n", + "6 5.801430\n", + "7 -5.933157\n", + "8 5.913899\n", + "9 -5.725581\n", + "Name: a, dtype: float64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['a'].applymap(udf)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That's all there is to it. For more complex UDFs, though, we'd want to write an actual Numba kernel.\n", + "\n", + "For more complex logic (for instance, accessing values from multiple input columns or rows, you'll need to use a more complex API. There are several types. First we'll cover writing and running a Numba JITed CUDA kernel.\n", + "\n", + "The easiest way to write a Numba kernel is to use `cuda.grid(1)` to manage our thread indices, and then leverage Numba's `forall` method to configure the kernel for us. Below, define a basic multiplication kernel as an example and use `@cuda.jit` to compile it." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from numba import cuda\n", + "\n", + "@cuda.jit\n", + "def multiply(in_col, out_col, multiplier):\n", + " i = cuda.grid(1)\n", + " if i < in_col.size: # boundary guard\n", + " out_col[i] = in_col[i] * multiplier" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This kernel will take an input array, multiply it by a configurable value (supplied at runtime), and store the result in an output array. Notice that we wrapped our logic in an `if` statement. Because we can launch more threads than the size of our array, we need to make sure that we don't use threads with an index that would be out of bounds. Leaving this out can result in undefined behavior.\n", + "\n", + "To execute our kernel, we just need to pre-allocate an output array and leverage the `forall` method mentioned above. First, we create a Series of all `0.0` in our DataFrame, since we want `float64` output. Next, we run the kernel with `forall`. `forall` requires us to specify our desired number of tasks, so we'll supply in the length of our Series (which we store in `size`). The [__cuda_array_interface__](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) is what allows us to directly call our Numba kernel on our Series." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "size = len(df['a'])\n", + "df['e'] = 0.0\n", + "multiply.forall(size)(df['a'], df['e'], 10.0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After calling our kernel, our DataFrame is now populated with the result." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abce
0-0.691674TrueDan-6.916743
10.480099FalseBob4.800994
2-0.473370TrueXavier-4.733700
30.067479TrueAlice0.674788
4-0.970850FalseSarah-9.708501
\n", + "
" + ], + "text/plain": [ + " a b c e\n", + "0 -0.691674 True Dan -6.916743\n", + "1 0.480099 False Bob 4.800994\n", + "2 -0.473370 True Xavier -4.733700\n", + "3 0.067479 True Alice 0.674788\n", + "4 -0.970850 False Sarah -9.708501" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that, while we're operating on the Series `df['e']`, the kernel executes on the [DeviceNDArray](https://numba.pydata.org/numba-doc/dev/cuda/memory.html#device-arrays) \\\"underneath\\\" the Series. If you ever need to access the underlying DeviceNDArray of a Series, you can do so with `Series.data.mem`. We'll use this during an example in the Null Handling section of this guide." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DataFrame UDFs\n", + "\n", + "We could apply a UDF on a DataFrame like we did above with `forall`. We'd need to write a kernel that expects multiple inputs, and pass multiple Series as arguments when we execute our kernel. Because this is fairly common and can be difficult to manage, cuDF provides two APIs to streamline this: `apply_rows` and `apply_chunks`. Below, we walk through an example of using `apply_rows`. `apply_chunks` works in a similar way, but also offers more control over low-level kernel behavior.\n", + "\n", + "Now that we have two numeric columns in our DataFrame, let's write a kernel that uses both of them." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def conditional_add(x, y, out):\n", + " for i, (a, e) in enumerate(zip(x, y)):\n", + " if a > 0:\n", + " out[i] = a + e\n", + " else:\n", + " out[i] = a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that we need to `enumerate` through our `zipped` function arguments (which either match or are mapped to our input column names). We can pass this kernel to `apply_rows`. We'll need to specify a few arguments:\n", + "- incols\n", + " - A list of names of input columns that match the function arguments. Or, a dictionary mapping input column names to their corresponding function arguments such as `{'col1': 'arg1'}`.\n", + "- outcols\n", + " - A dictionary defining our output column names and their data types. These names must match our function arguments.\n", + "- kwargs (optional)\n", + " - We can optionally pass keyword arguments as a dictionary. Since we don't need any, we pass an empty one.\n", + " \n", + "While it looks like our function is looping sequentially through our columns, it actually executes in parallel in multiple threads on the GPU. This parallelism is the heart of GPU-accelerated computing. With that background, we're ready to use our UDF." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abceout
0-0.691674TrueDan-6.916743-0.691674
10.480099FalseBob4.8009945.281093
2-0.473370TrueXavier-4.733700-0.473370
30.067479TrueAlice0.6747880.742267
4-0.970850FalseSarah-9.708501-0.970850
\n", + "
" + ], + "text/plain": [ + " a b c e out\n", + "0 -0.691674 True Dan -6.916743 -0.691674\n", + "1 0.480099 False Bob 4.800994 5.281093\n", + "2 -0.473370 True Xavier -4.733700 -0.473370\n", + "3 0.067479 True Alice 0.674788 0.742267\n", + "4 -0.970850 False Sarah -9.708501 -0.970850" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.apply_rows(conditional_add, \n", + " incols={'a':'x', 'e':'y'},\n", + " outcols={'out': np.float64},\n", + " kwargs={}\n", + " )\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As expected, we see our conditional addition worked. At this point, we've successfully executed UDFs on the core data structures of cuDF." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Rolling Window UDFs\n", + "\n", + "For time-series data, we may need to operate on a small \\\"window\\\" of our column at a time, processing each portion independently. We could slide (\\\"roll\\\") this window over the entire column to answer questions like \\\"What is the 3-day moving average of a stock price over the past year?\"\n", + "\n", + "We can apply more complex functions to rolling windows to `rolling` Series and DataFrames using `apply`. This example is adapted from cuDF's [API documentation](https://docs.rapids.ai/api/cudf/stable/api.html#cudf.core.dataframe.DataFrame.rolling). First, we'll create an example Series and then create a `rolling` object from the Series." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 16.0\n", + "1 25.0\n", + "2 36.0\n", + "3 49.0\n", + "4 64.0\n", + "5 81.0\n", + "dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ser = cudf.Series([16, 25, 36, 49, 64, 81], dtype='float64')\n", + "ser" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Rolling [window=3,min_periods=3,center=False]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rolling = ser.rolling(window=3, min_periods=3, center=False)\n", + "rolling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll define a function to use on our rolling windows. We created this one to highlight how you can include things like loops, mathematical functions, and conditionals. Rolling window UDFs do not yet support null values." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "import math\n", + "\n", + "def example_func(window):\n", + " b = 0\n", + " for a in window:\n", + " b = max(b, math.sqrt(a))\n", + " if b == 8:\n", + " return 100 \n", + " return b" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can execute the function by passing it to `apply`. With `window=3`, `min_periods=3`, and `center=False`, our first two values are `null`." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 null\n", + "1 null\n", + "2 6.0\n", + "3 7.0\n", + "4 100.0\n", + "5 9.0\n", + "dtype: float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rolling.apply(example_func)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can apply this function to every column in a DataFrame, too." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
055.055.0
156.056.0
257.057.0
358.058.0
459.059.0
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 55.0 55.0\n", + "1 56.0 56.0\n", + "2 57.0 57.0\n", + "3 58.0 58.0\n", + "4 59.0 59.0" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2 = cudf.DataFrame()\n", + "df2['a'] = np.arange(55, 65, dtype='float64')\n", + "df2['b'] = np.arange(55, 65, dtype='float64')\n", + "df2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
0nullnull
1nullnull
27.5498344357.549834435
37.6157731067.615773106
47.6811457487.681145748
57.7459666927.745966692
67.8102496767.810249676
77.8740078747.874007874
87.9372539337.937253933
9100.0100.0
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 null null\n", + "1 null null\n", + "2 7.549834435 7.549834435\n", + "3 7.615773106 7.615773106\n", + "4 7.681145748 7.681145748\n", + "5 7.745966692 7.745966692\n", + "6 7.810249676 7.810249676\n", + "7 7.874007874 7.874007874\n", + "8 7.937253933 7.937253933\n", + "9 100.0 100.0" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rolling = df2.rolling(window=3, min_periods=3, center=False)\n", + "rolling.apply(example_func)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## GroupBy DataFrame UDFs\n", + "\n", + "We can also apply UDFs to grouped DataFrames using `apply_grouped`. This example is also drawn and adapted from the RAPIDS [API documentation](https://docs.rapids.ai/api/cudf/stable/api.html#cudf.core.groupby.groupby.GroupBy.apply_grouped).\n", + "\n", + "First, we'll group our DataFrame based on column `b`, which is either True or False. Note that we currently need to pass `method=\"cudf\"` to use UDFs with GroupBy objects." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abceout
0-0.691674TrueDan-6.916743-0.691674
10.480099FalseBob4.8009945.281093
2-0.473370TrueXavier-4.733700-0.473370
30.067479TrueAlice0.6747880.742267
4-0.970850FalseSarah-9.708501-0.970850
\n", + "
" + ], + "text/plain": [ + " a b c e out\n", + "0 -0.691674 True Dan -6.916743 -0.691674\n", + "1 0.480099 False Bob 4.800994 5.281093\n", + "2 -0.473370 True Xavier -4.733700 -0.473370\n", + "3 0.067479 True Alice 0.674788 0.742267\n", + "4 -0.970850 False Sarah -9.708501 -0.970850" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/rapids/lib/python3.7/site-packages/cudf/core/dataframe.py:2559: UserWarning: as_index==True not supported due to the lack of multi-index with legacy groupby function. Use hash method for multi-index\n", + " \"as_index==True not supported due to the lack of \"\n" + ] + } + ], + "source": [ + "grouped = df.groupby(['b'], method=\"cudf\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next we'll define a function to apply to each group independently. In this case, we'll take the rolling average of column `e`, and call that new column `rolling_avg_e`." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "def rolling_avg(e, rolling_avg_e):\n", + " win_size = 3\n", + " for i in range(cuda.threadIdx.x, len(e), cuda.blockDim.x):\n", + " if i < win_size - 1:\n", + " # If there is not enough data to fill the window,\n", + " # take the average to be NaN\n", + " rolling_avg_e[i] = np.nan\n", + " else:\n", + " total = 0\n", + " for j in range(i - win_size + 1, i + 1):\n", + " total += e[j]\n", + " rolling_avg_e[i] = total / win_size" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can execute this with a very similar API to `apply_rows`. This time, though, it's going to execute independently for each group." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abceoutrolling_avg_e
00.480099FalseBob4.8009945.281093NaN
1-0.970850FalseSarah-9.708501-0.970850NaN
20.801430FalseSarah8.0142978.8157271.035597
3-0.933157FalseQuinn-9.331571-0.933157-3.675258
4-0.691674TrueDan-6.916743-0.691674NaN
5-0.473370TrueXavier-4.733700-0.473370NaN
60.067479TrueAlice0.6747880.742267-3.658552
70.837494TrueWendy8.3749409.2124341.438676
80.913899TrueUrsula9.13898710.0528856.062905
9-0.725581TrueGeorge-7.255814-0.7255813.419371
\n", + "
" + ], + "text/plain": [ + " a b c e out rolling_avg_e\n", + "0 0.480099 False Bob 4.800994 5.281093 NaN\n", + "1 -0.970850 False Sarah -9.708501 -0.970850 NaN\n", + "2 0.801430 False Sarah 8.014297 8.815727 1.035597\n", + "3 -0.933157 False Quinn -9.331571 -0.933157 -3.675258\n", + "4 -0.691674 True Dan -6.916743 -0.691674 NaN\n", + "5 -0.473370 True Xavier -4.733700 -0.473370 NaN\n", + "6 0.067479 True Alice 0.674788 0.742267 -3.658552\n", + "7 0.837494 True Wendy 8.374940 9.212434 1.438676\n", + "8 0.913899 True Ursula 9.138987 10.052885 6.062905\n", + "9 -0.725581 True George -7.255814 -0.725581 3.419371" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results = grouped.apply_grouped(rolling_avg,\n", + " incols=['e'],\n", + " outcols=dict(rolling_avg_e=np.float64))\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice how, with a window size of three in the kernel, the first two values in each group for our output column are null." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Numba Kernels on CuPy Arrays\n", + "\n", + "We can also execute Numba kernels on CuPy NDArrays, again thanks to the `__cuda_array_interface__`. We can even run the same UDF on the Series and the CuPy array. First, we define a Series and then create a CuPy array from that Series." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1., 2., 3., 4., 10.])" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import cupy as cp\n", + "\n", + "s = cudf.Series([1.0, 2, 3, 4, 10])\n", + "arr = cp.asarray(s)\n", + "arr" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we define a UDF and execute it on our Series. We need to allocate a Series of the same size for our output, which we'll call `out`." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5\n", + "1 10\n", + "2 15\n", + "3 20\n", + "4 50\n", + "dtype: int32" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from cudf.utils import cudautils\n", + "\n", + "@cuda.jit\n", + "def multiply_by_5(x, out):\n", + " i = cuda.grid(1)\n", + " if i < x.size:\n", + " out[i] = x[i] * 5\n", + " \n", + "out = cudf.Series(cudautils.zeros(len(s), dtype='int32'))\n", + "multiply_by_5.forall(s.shape[0])(s, out)\n", + "out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we execute the same function on our array. We allocate an empty array `out` to store our results." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 5., 10., 15., 20., 50.])" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out = cp.empty_like(arr)\n", + "multiply_by_5.forall(arr.size)(arr, out)\n", + "out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Null Handling in UDFs\n", + "\n", + "Above, we covered most basic usage of UDFs with cuDF.\n", + "\n", + "The remainder of the guide focuses on considerations for executing UDFs on DataFrames containing null values. If your UDFs will read or write any column containing nulls, **you should read this section carefully**.\n", + "\n", + "Writing UDFs that can handle null values is complicated by the fact that a separate bitmask is used to identify when a value is valid and when it's null. By default, DataFrame methods for applying UDFs like `apply_rows` will handle nulls pessimistically (all rows with a null value will be removed from the output if they are used in the kernel). Exploring how not handling not pessimistically can lead to undefined behavior is outside the scope of this guide. Suffice it to say, pessimistic null handling is the safe and consistent approach. You can see an example below." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
09631005997
19771026null
2null10261019
31078null985
49799821011
\n", + "
" + ], + "text/plain": [ + " a b c\n", + "0 963 1005 997\n", + "1 977 1026 null\n", + "2 null 1026 1019\n", + "3 1078 null 985\n", + "4 979 982 1011" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def gpu_add(a, b, out):\n", + " for i, (x, y) in enumerate(zip(a, b)):\n", + " out[i] = x + y\n", + "\n", + "df = randomdata(nrows=5, dtypes={'a':int, 'b':int, 'c':int}, seed=12)\n", + "df.loc[2, 'a'] = None\n", + "df.loc[3, 'b'] = None\n", + "df.loc[1, 'c'] = None\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In the dataframe above, there are three null values. Each column has a null in a different row. When we use our UDF with `apply_rows`, our output should have two nulls due to pessimistic null handling (because we're not using column `c`, the null value there does not matter to us)." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abcout
096310059971968.0
19771026null2003.0
2null10261019null
31078null985null
497998210111961.0
\n", + "
" + ], + "text/plain": [ + " a b c out\n", + "0 963 1005 997 1968.0\n", + "1 977 1026 null 2003.0\n", + "2 null 1026 1019 null\n", + "3 1078 null 985 null\n", + "4 979 982 1011 1961.0" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.apply_rows(gpu_add, \n", + " incols=['a', 'b'],\n", + " outcols={'out':np.float64},\n", + " kwargs={})\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As expected, we end up with two nulls in our output. The null values from the columns we used propogated to our output, but the null from the column we ignored did not." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Operating on Null Values\n", + "\n", + "If you don't need to conditionally handle null values in your UDFs, feel free to skip these final two sections.\n", + "\n", + "As a developer or data scientist, you may sometimes need to write UDFs that operate on null values. This means you need to think about the null bitmask array when writing your UDF. As a note, cuDF allows you to turn off pessimistic null handling in `apply_rows`. Instead of doing this, if you need to operate on null values we recommend writing standard `Numba.cuda` kernels. To help you interact with null bitmasks from Python, cuDF provides the `mask_get` utility function. The following example illustrates how you can use `mask_get` in Numba kernels like we used earlier in this guide." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Standard Numba Kernels\n", + "\n", + "First, we import `mask_get` and create a DataFrame with some null values." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
0-0.691674315True
10.480099393False
2nullTrue
30.067478787True
4nullFalse
\n", + "
" + ], + "text/plain": [ + " a b\n", + "0 -0.691674315 True\n", + "1 0.480099393 False\n", + "2 null True\n", + "3 0.067478787 True\n", + "4 null False" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from cudf.utils.cudautils import mask_get\n", + "\n", + "df = randomdata(nrows=10, dtypes={'a':float, 'b':bool}, seed=12)\n", + "df.loc[[2,4], 'a'] = None\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll define a simple kernel like before, with a couple of differences. This kernel needs access to the null bitmask, so we include a `validity_mask` argument. We also wrap our logic in a conditional based on the results of `mask_get`:\n", + "- If the result of `mask_get` for that index **is** valid (there is a value), do the multiplication\n", + "- If the result of `mask_get` for that index **is not** valid (it's null), set the output -999999" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "@cuda.jit\n", + "def gpu_kernel_masked(in_col, validity_mask, out_col, multiplier):\n", + " i = cuda.grid(1)\n", + " if i < in_col.size:\n", + " valid = mask_get(validity_mask, i)\n", + " if valid:\n", + " out_col[i] = in_col[i] * multiplier\n", + " else:\n", + " out_col[i] = -999999" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We now grab the underlying DeviceArrays and execute our kernel like we did previously, except that this time we also pass in the DeviceArray of our column's null mask. Because Numba doesn't yet handle masked GPU arrays, we can't directly pass our `Series` here." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abresult
0-0.691674315True-6.916743
10.480099393False4.800994
2nullTrue-999999.000000
30.067478787True0.674788
4nullFalse-999999.000000
\n", + "
" + ], + "text/plain": [ + " a b result\n", + "0 -0.691674315 True -6.916743\n", + "1 0.480099393 False 4.800994\n", + "2 null True -999999.000000\n", + "3 0.067478787 True 0.674788\n", + "4 null False -999999.000000" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import rmm # RAPIDS Memory Manager\n", + "\n", + "a_dary = df.a._column.data.mem\n", + "a_mask = df.a.nullmask.mem\n", + "output_dary = rmm.device_array_like(a_dary)\n", + "\n", + "gpu_kernel_masked.forall(output_dary.size)(a_dary, a_mask, output_dary, 10)\n", + "df['result'] = output_dary\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "This guide has covered a lot of content. At this point, you should hopefully feel comfortable writing UDFs (with or without null values) that operate on\n", + "\n", + "- Series\n", + "- DataFrame\n", + "- Rolling Windows\n", + "- GroupBy DataFrames\n", + "- CuPy NDArrays\n", + "- Numba DeviceNDArrays\n", + "\n", + "\n", + "For more information please see the [cuDF](https://docs.rapids.ai/api/cudf/nightly/), [Numba.cuda](https://numba.pydata.org/numba-doc/dev/cuda/index.html), and [CuPy](https://docs-cupy.chainer.org/en/stable/) documentation." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/cudf/source/user_guide/index.rst b/docs/cudf/source/user_guide/index.rst new file mode 100644 index 00000000000..1061008eb3c --- /dev/null +++ b/docs/cudf/source/user_guide/index.rst @@ -0,0 +1,12 @@ +========== +User Guide +========== + + +.. toctree:: + :maxdepth: 2 + + 10min.ipynb + 10min-cudf-cupy.ipynb + guide-to-udfs.ipynb + Working-with-missing-data.ipynb From 438be0a648e39609d351da7ee98874fa39cccb1a Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 17 May 2021 12:11:01 -0700 Subject: [PATCH 02/49] update series --- docs/cudf/source/api_docs/series.rst | 299 +++++++++------------------ 1 file changed, 102 insertions(+), 197 deletions(-) diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 6e0943930a1..e8030196605 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -1,3 +1,6 @@ +.. meta:: + :my-var: a-for-apple + ====== Series ====== @@ -18,21 +21,15 @@ Attributes :toctree: api/ Series.index - Series.array Series.values Series.dtype Series.shape - Series.nbytes Series.ndim Series.size - Series.T Series.memory_usage - Series.hasnans + Series.has_nulls Series.empty - Series.dtypes Series.name - Series.flags - Series.set_flags Conversion ---------- @@ -40,13 +37,7 @@ Conversion :toctree: api/ Series.astype - Series.convert_dtypes - Series.infer_objects Series.copy - Series.bool - Series.to_numpy - Series.to_period - Series.to_timestamp Series.to_list Series.__array__ @@ -55,18 +46,12 @@ Indexing, iteration .. autosummary:: :toctree: api/ - Series.get - Series.at - Series.iat Series.loc Series.iloc Series.__iter__ Series.items Series.iteritems Series.keys - Series.pop - Series.item - Series.xs For more information on ``.at``, ``.iat``, ``.loc``, and ``.iloc``, see the :ref:`indexing documentation `. @@ -79,7 +64,6 @@ Binary operator functions Series.add Series.sub Series.mul - Series.div Series.truediv Series.floordiv Series.mod @@ -87,13 +71,10 @@ Binary operator functions Series.radd Series.rsub Series.rmul - Series.rdiv Series.rtruediv Series.rfloordiv Series.rmod Series.rpow - Series.combine - Series.combine_first Series.round Series.lt Series.gt @@ -102,22 +83,15 @@ Binary operator functions Series.ne Series.eq Series.product - Series.dot Function application, GroupBy & window -------------------------------------- .. autosummary:: :toctree: api/ - Series.apply - Series.agg - Series.aggregate - Series.transform Series.map Series.groupby Series.rolling - Series.expanding - Series.ewm Series.pipe .. _api.series.stats: @@ -130,8 +104,6 @@ Computations / descriptive stats Series.abs Series.all Series.any - Series.autocorr - Series.between Series.clip Series.corr Series.count @@ -144,7 +116,6 @@ Computations / descriptive stats Series.diff Series.factorize Series.kurt - Series.mad Series.max Series.mean Series.median @@ -152,11 +123,9 @@ Computations / descriptive stats Series.mode Series.nlargest Series.nsmallest - Series.pct_change Series.prod Series.quantile Series.rank - Series.sem Series.skew Series.std Series.sum @@ -175,50 +144,32 @@ Reindexing / selection / label manipulation .. autosummary:: :toctree: api/ - Series.align Series.drop - Series.droplevel Series.drop_duplicates - Series.duplicated Series.equals - Series.first Series.head - Series.idxmax - Series.idxmin Series.isin - Series.last Series.reindex - Series.reindex_like Series.rename - Series.rename_axis Series.reset_index Series.sample - Series.set_axis Series.take Series.tail Series.truncate Series.where Series.mask - Series.add_prefix - Series.add_suffix - Series.filter Missing data handling --------------------- .. autosummary:: :toctree: api/ - Series.backfill - Series.bfill Series.dropna - Series.ffill Series.fillna - Series.interpolate Series.isna Series.isnull Series.notna Series.notnull - Series.pad Series.replace Reshaping, sorting @@ -227,19 +178,11 @@ Reshaping, sorting :toctree: api/ Series.argsort - Series.argmin - Series.argmax - Series.reorder_levels Series.sort_values Series.sort_index - Series.swaplevel - Series.unstack Series.explode Series.searchsorted - Series.ravel Series.repeat - Series.squeeze - Series.view Combining / comparing / joining / merging ----------------------------------------- @@ -247,7 +190,6 @@ Combining / comparing / joining / merging :toctree: api/ Series.append - Series.compare Series.update Time Series-related @@ -255,18 +197,7 @@ Time Series-related .. autosummary:: :toctree: api/ - Series.asfreq - Series.asof Series.shift - Series.first_valid_index - Series.last_valid_index - Series.resample - Series.tz_convert - Series.tz_localize - Series.at_time - Series.between_time - Series.tshift - Series.slice_shift Accessors --------- @@ -278,7 +209,7 @@ to specific data types. =========================== ================================= Data Type Accessor =========================== ================================= -Datetime, Timedelta, Period :ref:`dt ` +Datetime, Timedelta :ref:`dt ` String :ref:`str ` Categorical :ref:`cat ` Sparse :ref:`sparse ` @@ -295,94 +226,41 @@ These can be accessed like ``Series.dt.``. Datetime properties ^^^^^^^^^^^^^^^^^^^ +.. currentmodule:: cudf.core.series.DatetimeProperties .. autosummary:: :toctree: api/ - :template: autosummary/accessor_attribute.rst - Series.dt.date - Series.dt.time - Series.dt.timetz - Series.dt.year - Series.dt.month - Series.dt.day - Series.dt.hour - Series.dt.minute - Series.dt.second - Series.dt.microsecond - Series.dt.nanosecond - Series.dt.week - Series.dt.weekofyear - Series.dt.dayofweek - Series.dt.day_of_week - Series.dt.weekday - Series.dt.dayofyear - Series.dt.day_of_year - Series.dt.quarter - Series.dt.is_month_start - Series.dt.is_month_end - Series.dt.is_quarter_start - Series.dt.is_quarter_end - Series.dt.is_year_start - Series.dt.is_year_end - Series.dt.is_leap_year - Series.dt.daysinmonth - Series.dt.days_in_month - Series.dt.tz - Series.dt.freq + day + dayofweek + hour + minute + month + second + weekday + year Datetime methods ^^^^^^^^^^^^^^^^ .. autosummary:: :toctree: api/ - :template: autosummary/accessor_method.rst - - Series.dt.to_period - Series.dt.to_pydatetime - Series.dt.tz_localize - Series.dt.tz_convert - Series.dt.normalize - Series.dt.strftime - Series.dt.round - Series.dt.floor - Series.dt.ceil - Series.dt.month_name - Series.dt.day_name - -Period properties -^^^^^^^^^^^^^^^^^ -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_attribute.rst + strftime - Series.dt.qyear - Series.dt.start_time - Series.dt.end_time Timedelta properties ^^^^^^^^^^^^^^^^^^^^ +.. currentmodule:: cudf.core.series.TimedeltaProperties .. autosummary:: :toctree: api/ - :template: autosummary/accessor_attribute.rst - - Series.dt.days - Series.dt.seconds - Series.dt.microseconds - Series.dt.nanoseconds - Series.dt.components - -Timedelta methods -^^^^^^^^^^^^^^^^^ - -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_method.rst - Series.dt.to_pytimedelta - Series.dt.total_seconds + components + days + microseconds + nanoseconds + seconds .. _api.series.str: @@ -394,63 +272,90 @@ String handling strings and apply several methods to it. These can be accessed like ``Series.str.``. +.. currentmodule:: cudf.core.column.string.StringMethods .. autosummary:: :toctree: api/ - :template: autosummary/accessor_method.rst - Series.str.capitalize - Series.str.casefold - Series.str.cat - Series.str.center - Series.str.contains - Series.str.count - Series.str.decode - Series.str.encode - Series.str.endswith - Series.str.extract - Series.str.extractall - Series.str.find - Series.str.findall - Series.str.get - Series.str.index - Series.str.join - Series.str.len - Series.str.ljust - Series.str.lower - Series.str.lstrip - Series.str.match - Series.str.normalize - Series.str.pad - Series.str.partition - Series.str.repeat - Series.str.replace - Series.str.rfind - Series.str.rindex - Series.str.rjust - Series.str.rpartition - Series.str.rstrip - Series.str.slice - Series.str.slice_replace - Series.str.split - Series.str.rsplit - Series.str.startswith - Series.str.strip - Series.str.swapcase - Series.str.title - Series.str.translate - Series.str.upper - Series.str.wrap - Series.str.zfill - Series.str.isalnum - Series.str.isalpha - Series.str.isdigit - Series.str.isspace - Series.str.islower - Series.str.isupper - Series.str.istitle - Series.str.isnumeric - Series.str.isdecimal - Series.str.get_dummies + byte_count + capitalize + cat + center + character_ngrams + character_tokenize + code_points + contains + count + detokenize + edit_distance + endswith + extract + filter_alphanum + filter_characters + filter_tokens + find + findall + get + get_json_object + htoi + index + insert + ip2int + is_consonant + is_vowel + isalnum + isalpha + isdecimal + isdigit + isempty + isfloat + ishex + isinteger + isipv4 + isspace + islower + isnumeric + isupper + istimestamp + join + len + ljust + lower + lstrip + match + ngrams + ngrams_tokenize + normalize_characters + pad + partition + porter_stemmer_measure + replace + replace_tokens + replace_with_backrefs + rfind + rindex + rjust + rpartition + rstrip + slice + slice_from + slice_replace + split + rsplit + startswith + strip + subword_tokenize + swapcase + title + token_count + tokenize + translate + upper + url_decode + url_encode + wrap + zfill + + .. The following is needed to ensure the generated pages are created with the From f18fcabdbee2a92c5adbe520cd0c950947f3c8ae Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 18 May 2021 10:56:28 -0700 Subject: [PATCH 03/49] update series --- docs/cudf/source/api_docs/series.rst | 119 +++------------------------ 1 file changed, 11 insertions(+), 108 deletions(-) diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index e8030196605..947235356d9 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -381,126 +381,29 @@ Categorical accessor Categorical-dtype specific methods and attributes are available under the ``Series.cat`` accessor. +.. currentmodule:: cudf.core.column.categorical.CategoricalAccessor .. autosummary:: :toctree: api/ - :template: autosummary/accessor_attribute.rst - Series.cat.categories - Series.cat.ordered - Series.cat.codes + categories + ordered + codes + reorder_categories + add_categories + remove_categories + set_categories + as_ordered + as_unordered -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_method.rst - - Series.cat.rename_categories - Series.cat.reorder_categories - Series.cat.add_categories - Series.cat.remove_categories - Series.cat.remove_unused_categories - Series.cat.set_categories - Series.cat.as_ordered - Series.cat.as_unordered - - -.. _api.series.sparse: - -Sparse accessor -~~~~~~~~~~~~~~~ - -Sparse-dtype specific methods and attributes are provided under the -``Series.sparse`` accessor. - -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_attribute.rst - - Series.sparse.npoints - Series.sparse.density - Series.sparse.fill_value - Series.sparse.sp_values - -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_method.rst - - Series.sparse.from_coo - Series.sparse.to_coo - -.. _api.series.flags: - -Flags -~~~~~ - -Flags refer to attributes of the pandas object. Properties of the dataset (like -the date is was recorded, the URL it was accessed from, etc.) should be stored -in :attr:`Series.attrs`. - -.. autosummary:: - :toctree: api/ - Flags - -.. _api.series.metadata: - -Metadata -~~~~~~~~ - -:attr:`Series.attrs` is a dictionary for storing global metadata for this Series. - -.. warning:: ``Series.attrs`` is considered experimental and may change without warning. - -.. autosummary:: - :toctree: api/ - - Series.attrs - - -Plotting --------- -``Series.plot`` is both a callable method and a namespace attribute for -specific plotting methods of the form ``Series.plot.``. - -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_callable.rst - - Series.plot - -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_method.rst - - Series.plot.area - Series.plot.bar - Series.plot.barh - Series.plot.box - Series.plot.density - Series.plot.hist - Series.plot.kde - Series.plot.line - Series.plot.pie - -.. autosummary:: - :toctree: api/ - - Series.hist Serialization / IO / conversion ------------------------------- +.. currentmodule:: cudf .. autosummary:: :toctree: api/ - Series.to_pickle - Series.to_csv - Series.to_dict - Series.to_excel Series.to_frame - Series.to_xarray Series.to_hdf - Series.to_sql Series.to_json Series.to_string - Series.to_clipboard - Series.to_latex - Series.to_markdown From 683d9c9d4d56ca3eb2e587c247eeeeec160742a0 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 24 May 2021 07:05:58 -0700 Subject: [PATCH 04/49] add dataframe and index --- docs/cudf/source/api_docs/dataframe.rst | 172 -------- docs/cudf/source/api_docs/index.rst | 1 + docs/cudf/source/api_docs/index_objects.rst | 445 ++++++++++++++++++++ 3 files changed, 446 insertions(+), 172 deletions(-) create mode 100644 docs/cudf/source/api_docs/index_objects.rst diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index 44357be27bc..07fba0808e4 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -27,13 +27,11 @@ Attributes and underlying data DataFrame.info DataFrame.select_dtypes DataFrame.values - DataFrame.axes DataFrame.ndim DataFrame.size DataFrame.shape DataFrame.memory_usage DataFrame.empty - DataFrame.set_flags Conversion ~~~~~~~~~~ @@ -41,10 +39,7 @@ Conversion :toctree: api/ DataFrame.astype - DataFrame.convert_dtypes - DataFrame.infer_objects DataFrame.copy - DataFrame.bool Indexing, iteration ~~~~~~~~~~~~~~~~~~~ @@ -58,16 +53,12 @@ Indexing, iteration DataFrame.iloc DataFrame.insert DataFrame.__iter__ - DataFrame.items DataFrame.iteritems DataFrame.keys DataFrame.iterrows DataFrame.itertuples - DataFrame.lookup DataFrame.pop DataFrame.tail - DataFrame.xs - DataFrame.get DataFrame.isin DataFrame.where DataFrame.mask @@ -89,7 +80,6 @@ Binary operator functions DataFrame.floordiv DataFrame.mod DataFrame.pow - DataFrame.dot DataFrame.radd DataFrame.rsub DataFrame.rmul @@ -98,30 +88,16 @@ Binary operator functions DataFrame.rfloordiv DataFrame.rmod DataFrame.rpow - DataFrame.lt - DataFrame.gt - DataFrame.le - DataFrame.ge - DataFrame.ne - DataFrame.eq - DataFrame.combine - DataFrame.combine_first Function application, GroupBy & window ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ - DataFrame.apply - DataFrame.applymap DataFrame.pipe DataFrame.agg - DataFrame.aggregate - DataFrame.transform DataFrame.groupby DataFrame.rolling - DataFrame.expanding - DataFrame.ewm .. _api.dataframe.stats: @@ -130,12 +106,10 @@ Computations / descriptive stats .. autosummary:: :toctree: api/ - DataFrame.abs DataFrame.all DataFrame.any DataFrame.clip DataFrame.corr - DataFrame.corrwith DataFrame.count DataFrame.cov DataFrame.cummax @@ -143,61 +117,38 @@ Computations / descriptive stats DataFrame.cumprod DataFrame.cumsum DataFrame.describe - DataFrame.diff - DataFrame.eval DataFrame.kurt DataFrame.kurtosis - DataFrame.mad DataFrame.max DataFrame.mean - DataFrame.median DataFrame.min DataFrame.mode - DataFrame.pct_change DataFrame.prod DataFrame.product DataFrame.quantile DataFrame.rank DataFrame.round - DataFrame.sem DataFrame.skew DataFrame.sum DataFrame.std DataFrame.var - DataFrame.nunique - DataFrame.value_counts Reindexing / selection / label manipulation ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ - DataFrame.add_prefix - DataFrame.add_suffix - DataFrame.align - DataFrame.at_time - DataFrame.between_time DataFrame.drop DataFrame.drop_duplicates - DataFrame.duplicated DataFrame.equals - DataFrame.filter - DataFrame.first DataFrame.head - DataFrame.idxmax - DataFrame.idxmin - DataFrame.last DataFrame.reindex - DataFrame.reindex_like DataFrame.rename - DataFrame.rename_axis DataFrame.reset_index DataFrame.sample - DataFrame.set_axis DataFrame.set_index DataFrame.tail DataFrame.take - DataFrame.truncate .. _api.dataframe.missing: @@ -206,17 +157,12 @@ Missing data handling .. autosummary:: :toctree: api/ - DataFrame.backfill - DataFrame.bfill DataFrame.dropna - DataFrame.ffill DataFrame.fillna - DataFrame.interpolate DataFrame.isna DataFrame.isnull DataFrame.notna DataFrame.notnull - DataFrame.pad DataFrame.replace Reshaping, sorting, transposing @@ -224,22 +170,15 @@ Reshaping, sorting, transposing .. autosummary:: :toctree: api/ - DataFrame.droplevel DataFrame.pivot - DataFrame.pivot_table - DataFrame.reorder_levels DataFrame.sort_values DataFrame.sort_index DataFrame.nlargest DataFrame.nsmallest - DataFrame.swaplevel DataFrame.stack DataFrame.unstack - DataFrame.swapaxes DataFrame.melt DataFrame.explode - DataFrame.squeeze - DataFrame.to_xarray DataFrame.T DataFrame.transpose @@ -250,7 +189,6 @@ Combining / comparing / joining / merging DataFrame.append DataFrame.assign - DataFrame.compare DataFrame.join DataFrame.merge DataFrame.update @@ -260,130 +198,20 @@ Time Series-related .. autosummary:: :toctree: api/ - DataFrame.asfreq - DataFrame.asof DataFrame.shift - DataFrame.slice_shift - DataFrame.tshift - DataFrame.first_valid_index - DataFrame.last_valid_index - DataFrame.resample - DataFrame.to_period - DataFrame.to_timestamp - DataFrame.tz_convert - DataFrame.tz_localize - -.. _api.frame.flags: - -Flags -~~~~~ - -Flags refer to attributes of the pandas object. Properties of the dataset (like -the date is was recorded, the URL it was accessed from, etc.) should be stored -in :attr:`DataFrame.attrs`. - -.. autosummary:: - :toctree: api/ - - Flags - - -.. _api.frame.metadata: - -Metadata -~~~~~~~~ - -:attr:`DataFrame.attrs` is a dictionary for storing global metadata for this DataFrame. - -.. warning:: ``DataFrame.attrs`` is considered experimental and may change without warning. - -.. autosummary:: - :toctree: api/ - - DataFrame.attrs - - -.. _api.dataframe.plotting: - -Plotting -~~~~~~~~ -``DataFrame.plot`` is both a callable method and a namespace attribute for -specific plotting methods of the form ``DataFrame.plot.``. - -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_callable.rst - - DataFrame.plot - -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_method.rst - - DataFrame.plot.area - DataFrame.plot.bar - DataFrame.plot.barh - DataFrame.plot.box - DataFrame.plot.density - DataFrame.plot.hexbin - DataFrame.plot.hist - DataFrame.plot.kde - DataFrame.plot.line - DataFrame.plot.pie - DataFrame.plot.scatter - -.. autosummary:: - :toctree: api/ - - DataFrame.boxplot - DataFrame.hist - - -.. _api.frame.sparse: - -Sparse accessor -~~~~~~~~~~~~~~~ - -Sparse-dtype specific methods and attributes are provided under the -``DataFrame.sparse`` accessor. - -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_attribute.rst - - DataFrame.sparse.density - -.. autosummary:: - :toctree: api/ - :template: autosummary/accessor_method.rst - - DataFrame.sparse.from_spmatrix - DataFrame.sparse.to_coo - DataFrame.sparse.to_dense - Serialization / IO / conversion ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ - DataFrame.from_dict DataFrame.from_records DataFrame.to_parquet DataFrame.to_pickle DataFrame.to_csv DataFrame.to_hdf - DataFrame.to_sql DataFrame.to_dict - DataFrame.to_excel DataFrame.to_json - DataFrame.to_html DataFrame.to_feather - DataFrame.to_latex - DataFrame.to_stata - DataFrame.to_gbq DataFrame.to_records DataFrame.to_string - DataFrame.to_clipboard - DataFrame.to_markdown - DataFrame.style diff --git a/docs/cudf/source/api_docs/index.rst b/docs/cudf/source/api_docs/index.rst index 94d9132d4c9..8d4ab19c341 100644 --- a/docs/cudf/source/api_docs/index.rst +++ b/docs/cudf/source/api_docs/index.rst @@ -12,4 +12,5 @@ the left sidebar to see how various elements look on this theme. series dataframe + index_objects diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst new file mode 100644 index 00000000000..bb70a78b10c --- /dev/null +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -0,0 +1,445 @@ +============= +Index objects +============= + +Index +----- +.. currentmodule:: cudf + +**Many of these methods or variants thereof are available on the objects +that contain an index (Series/DataFrame) and those should most likely be +used before calling these methods directly.** + +.. autosummary:: + :toctree: api/ + + Index + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.values + Index.is_monotonic + Index.is_monotonic_increasing + Index.is_monotonic_decreasing + Index.is_unique + Index.inferred_type + Index.is_all_dates + Index.shape + Index.name + Index.names + Index.ndim + Index.size + Index.empty + Index.memory_usage + +Modifying and computations +~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.any + Index.copy + Index.drop_duplicates + Index.equals + Index.factorize + Index.min + Index.max + Index.rename + Index.repeat + Index.where + Index.take + Index.unique + +Compatibility with MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.set_names + +Missing values +~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.fillna + Index.dropna + Index.isna + Index.notna + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.astype + Index.to_list + Index.to_series + Index.to_frame + +Sorting +~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.argsort + Index.searchsorted + Index.sort_values + +Time-specific operations +~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.shift + +Combining / joining / set operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.append + Index.join + Index.difference + +Selecting +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.get_level_values + Index.get_slice_bound + Index.isin + +.. _api.numericindex: + +Numeric Index +------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + RangeIndex + Int64Index + UInt64Index + Float64Index + +.. We need this autosummary so that the methods are generated. +.. Separate block, since they aren't classes. + +.. autosummary:: + :toctree: api/ + + RangeIndex.start + RangeIndex.stop + RangeIndex.step + +.. _api.categoricalindex: + +CategoricalIndex +---------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + CategoricalIndex + +Categorical components +~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CategoricalIndex.codes + CategoricalIndex.categories + CategoricalIndex.ordered + CategoricalIndex.rename_categories + CategoricalIndex.reorder_categories + CategoricalIndex.add_categories + CategoricalIndex.remove_categories + CategoricalIndex.remove_unused_categories + CategoricalIndex.set_categories + CategoricalIndex.as_ordered + CategoricalIndex.as_unordered + +Modifying and computations +~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + CategoricalIndex.map + CategoricalIndex.equals + +.. _api.intervalindex: + +IntervalIndex +------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + IntervalIndex + +IntervalIndex components +~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + IntervalIndex.from_arrays + IntervalIndex.from_tuples + IntervalIndex.from_breaks + IntervalIndex.left + IntervalIndex.right + IntervalIndex.mid + IntervalIndex.closed + IntervalIndex.length + IntervalIndex.values + IntervalIndex.is_empty + IntervalIndex.is_non_overlapping_monotonic + IntervalIndex.is_overlapping + IntervalIndex.get_loc + IntervalIndex.get_indexer + IntervalIndex.set_closed + IntervalIndex.contains + IntervalIndex.overlaps + IntervalIndex.to_tuples + +.. _api.multiindex: + +MultiIndex +---------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + MultiIndex + +.. autosummary:: + :toctree: api/ + + IndexSlice + +MultiIndex constructors +~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.from_arrays + MultiIndex.from_tuples + MultiIndex.from_product + MultiIndex.from_frame + +MultiIndex properties +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.names + MultiIndex.levels + MultiIndex.codes + MultiIndex.nlevels + MultiIndex.levshape + MultiIndex.dtypes + +MultiIndex components +~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.set_levels + MultiIndex.set_codes + MultiIndex.to_flat_index + MultiIndex.to_frame + MultiIndex.sortlevel + MultiIndex.droplevel + MultiIndex.swaplevel + MultiIndex.reorder_levels + MultiIndex.remove_unused_levels + +MultiIndex selecting +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + MultiIndex.get_loc + MultiIndex.get_locs + MultiIndex.get_loc_level + MultiIndex.get_indexer + MultiIndex.get_level_values + +.. _api.datetimeindex: + +DatetimeIndex +------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + DatetimeIndex + +Time/date components +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.year + DatetimeIndex.month + DatetimeIndex.day + DatetimeIndex.hour + DatetimeIndex.minute + DatetimeIndex.second + DatetimeIndex.microsecond + DatetimeIndex.nanosecond + DatetimeIndex.date + DatetimeIndex.time + DatetimeIndex.timetz + DatetimeIndex.dayofyear + DatetimeIndex.day_of_year + DatetimeIndex.weekofyear + DatetimeIndex.week + DatetimeIndex.dayofweek + DatetimeIndex.day_of_week + DatetimeIndex.weekday + DatetimeIndex.quarter + DatetimeIndex.tz + DatetimeIndex.freq + DatetimeIndex.freqstr + DatetimeIndex.is_month_start + DatetimeIndex.is_month_end + DatetimeIndex.is_quarter_start + DatetimeIndex.is_quarter_end + DatetimeIndex.is_year_start + DatetimeIndex.is_year_end + DatetimeIndex.is_leap_year + DatetimeIndex.inferred_freq + +Selecting +~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.indexer_at_time + DatetimeIndex.indexer_between_time + + +Time-specific operations +~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.normalize + DatetimeIndex.strftime + DatetimeIndex.snap + DatetimeIndex.tz_convert + DatetimeIndex.tz_localize + DatetimeIndex.round + DatetimeIndex.floor + DatetimeIndex.ceil + DatetimeIndex.month_name + DatetimeIndex.day_name + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.to_period + DatetimeIndex.to_perioddelta + DatetimeIndex.to_pydatetime + DatetimeIndex.to_series + DatetimeIndex.to_frame + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + DatetimeIndex.mean + +TimedeltaIndex +-------------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + TimedeltaIndex + +Components +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.days + TimedeltaIndex.seconds + TimedeltaIndex.microseconds + TimedeltaIndex.nanoseconds + TimedeltaIndex.components + TimedeltaIndex.inferred_freq + +Conversion +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.to_pytimedelta + TimedeltaIndex.to_series + TimedeltaIndex.round + TimedeltaIndex.floor + TimedeltaIndex.ceil + TimedeltaIndex.to_frame + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + TimedeltaIndex.mean + +.. currentmodule:: pandas + +PeriodIndex +----------- +.. autosummary:: + :toctree: api/ + :template: autosummary/class_without_autosummary.rst + + PeriodIndex + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + PeriodIndex.day + PeriodIndex.dayofweek + PeriodIndex.day_of_week + PeriodIndex.dayofyear + PeriodIndex.day_of_year + PeriodIndex.days_in_month + PeriodIndex.daysinmonth + PeriodIndex.end_time + PeriodIndex.freq + PeriodIndex.freqstr + PeriodIndex.hour + PeriodIndex.is_leap_year + PeriodIndex.minute + PeriodIndex.month + PeriodIndex.quarter + PeriodIndex.qyear + PeriodIndex.second + PeriodIndex.start_time + PeriodIndex.week + PeriodIndex.weekday + PeriodIndex.weekofyear + PeriodIndex.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: api/ + + PeriodIndex.asfreq + PeriodIndex.strftime + PeriodIndex.to_timestamp \ No newline at end of file From e5448be872b60cf0f9f4fd518d6c4fdc33bb6cac Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 29 Jun 2021 16:53:11 -0700 Subject: [PATCH 05/49] update index docs --- docs/cudf/source/api_docs/index_objects.rst | 158 +------------------- 1 file changed, 2 insertions(+), 156 deletions(-) diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index bb70a78b10c..5bd11a97b62 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -25,8 +25,6 @@ Properties Index.is_monotonic_increasing Index.is_monotonic_decreasing Index.is_unique - Index.inferred_type - Index.is_all_dates Index.shape Index.name Index.names @@ -59,6 +57,7 @@ Compatibility with MultiIndex :toctree: api/ Index.set_names + Index.droplevel Missing values ~~~~~~~~~~~~~~ @@ -111,6 +110,7 @@ Selecting :toctree: api/ Index.get_level_values + Index.get_loc Index.get_slice_bound Index.isin @@ -120,7 +120,6 @@ Numeric Index ------------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst RangeIndex Int64Index @@ -154,22 +153,12 @@ Categorical components CategoricalIndex.codes CategoricalIndex.categories - CategoricalIndex.ordered - CategoricalIndex.rename_categories - CategoricalIndex.reorder_categories - CategoricalIndex.add_categories - CategoricalIndex.remove_categories - CategoricalIndex.remove_unused_categories - CategoricalIndex.set_categories - CategoricalIndex.as_ordered - CategoricalIndex.as_unordered Modifying and computations ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ - CategoricalIndex.map CategoricalIndex.equals .. _api.intervalindex: @@ -178,7 +167,6 @@ IntervalIndex ------------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst IntervalIndex @@ -187,24 +175,9 @@ IntervalIndex components .. autosummary:: :toctree: api/ - IntervalIndex.from_arrays - IntervalIndex.from_tuples IntervalIndex.from_breaks - IntervalIndex.left - IntervalIndex.right - IntervalIndex.mid - IntervalIndex.closed - IntervalIndex.length IntervalIndex.values - IntervalIndex.is_empty - IntervalIndex.is_non_overlapping_monotonic - IntervalIndex.is_overlapping IntervalIndex.get_loc - IntervalIndex.get_indexer - IntervalIndex.set_closed - IntervalIndex.contains - IntervalIndex.overlaps - IntervalIndex.to_tuples .. _api.multiindex: @@ -212,21 +185,15 @@ MultiIndex ---------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst MultiIndex -.. autosummary:: - :toctree: api/ - - IndexSlice MultiIndex constructors ~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ - MultiIndex.from_arrays MultiIndex.from_tuples MultiIndex.from_product MultiIndex.from_frame @@ -240,23 +207,14 @@ MultiIndex properties MultiIndex.levels MultiIndex.codes MultiIndex.nlevels - MultiIndex.levshape - MultiIndex.dtypes MultiIndex components ~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ - MultiIndex.set_levels - MultiIndex.set_codes - MultiIndex.to_flat_index MultiIndex.to_frame - MultiIndex.sortlevel MultiIndex.droplevel - MultiIndex.swaplevel - MultiIndex.reorder_levels - MultiIndex.remove_unused_levels MultiIndex selecting ~~~~~~~~~~~~~~~~~~~~ @@ -264,9 +222,6 @@ MultiIndex selecting :toctree: api/ MultiIndex.get_loc - MultiIndex.get_locs - MultiIndex.get_loc_level - MultiIndex.get_indexer MultiIndex.get_level_values .. _api.datetimeindex: @@ -275,7 +230,6 @@ DatetimeIndex ------------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst DatetimeIndex @@ -290,79 +244,28 @@ Time/date components DatetimeIndex.hour DatetimeIndex.minute DatetimeIndex.second - DatetimeIndex.microsecond - DatetimeIndex.nanosecond - DatetimeIndex.date - DatetimeIndex.time - DatetimeIndex.timetz - DatetimeIndex.dayofyear - DatetimeIndex.day_of_year - DatetimeIndex.weekofyear - DatetimeIndex.week DatetimeIndex.dayofweek - DatetimeIndex.day_of_week DatetimeIndex.weekday - DatetimeIndex.quarter - DatetimeIndex.tz - DatetimeIndex.freq - DatetimeIndex.freqstr - DatetimeIndex.is_month_start - DatetimeIndex.is_month_end - DatetimeIndex.is_quarter_start - DatetimeIndex.is_quarter_end - DatetimeIndex.is_year_start - DatetimeIndex.is_year_end - DatetimeIndex.is_leap_year - DatetimeIndex.inferred_freq - -Selecting -~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - DatetimeIndex.indexer_at_time - DatetimeIndex.indexer_between_time - Time-specific operations ~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ - DatetimeIndex.normalize - DatetimeIndex.strftime - DatetimeIndex.snap - DatetimeIndex.tz_convert - DatetimeIndex.tz_localize DatetimeIndex.round - DatetimeIndex.floor - DatetimeIndex.ceil - DatetimeIndex.month_name - DatetimeIndex.day_name Conversion ~~~~~~~~~~ .. autosummary:: :toctree: api/ - DatetimeIndex.to_period - DatetimeIndex.to_perioddelta - DatetimeIndex.to_pydatetime DatetimeIndex.to_series DatetimeIndex.to_frame -Methods -~~~~~~~ -.. autosummary:: - :toctree: api/ - - DatetimeIndex.mean - TimedeltaIndex -------------- .. autosummary:: :toctree: api/ - :template: autosummary/class_without_autosummary.rst TimedeltaIndex @@ -383,63 +286,6 @@ Conversion .. autosummary:: :toctree: api/ - TimedeltaIndex.to_pytimedelta TimedeltaIndex.to_series TimedeltaIndex.round - TimedeltaIndex.floor - TimedeltaIndex.ceil TimedeltaIndex.to_frame - -Methods -~~~~~~~ -.. autosummary:: - :toctree: api/ - - TimedeltaIndex.mean - -.. currentmodule:: pandas - -PeriodIndex ------------ -.. autosummary:: - :toctree: api/ - :template: autosummary/class_without_autosummary.rst - - PeriodIndex - -Properties -~~~~~~~~~~ -.. autosummary:: - :toctree: api/ - - PeriodIndex.day - PeriodIndex.dayofweek - PeriodIndex.day_of_week - PeriodIndex.dayofyear - PeriodIndex.day_of_year - PeriodIndex.days_in_month - PeriodIndex.daysinmonth - PeriodIndex.end_time - PeriodIndex.freq - PeriodIndex.freqstr - PeriodIndex.hour - PeriodIndex.is_leap_year - PeriodIndex.minute - PeriodIndex.month - PeriodIndex.quarter - PeriodIndex.qyear - PeriodIndex.second - PeriodIndex.start_time - PeriodIndex.week - PeriodIndex.weekday - PeriodIndex.weekofyear - PeriodIndex.year - -Methods -~~~~~~~ -.. autosummary:: - :toctree: api/ - - PeriodIndex.asfreq - PeriodIndex.strftime - PeriodIndex.to_timestamp \ No newline at end of file From d7e8aa4b0bd79438920a20d3962854de3814fca0 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 30 Jun 2021 14:35:03 -0700 Subject: [PATCH 06/49] add groupby related apis --- conda/environments/cudf_dev_cuda11.0.yml | 1 + conda/environments/cudf_dev_cuda11.2.yml | 1 + docs/cudf/source/api_docs/groupby.rst | 142 +++++++++++++++++++ docs/cudf/source/api_docs/index.rst | 1 + docs/cudf/source/{ => user_guide}/groupby.md | 6 +- docs/cudf/source/user_guide/index.rst | 1 + python/cudf/setup.py | 2 +- 7 files changed, 150 insertions(+), 4 deletions(-) create mode 100644 docs/cudf/source/api_docs/groupby.rst rename docs/cudf/source/{ => user_guide}/groupby.md (99%) diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 5561a573609..67592ac2797 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -59,6 +59,7 @@ dependencies: - nvtx>=0.2.1 - cachetools - transformers + - pydata_sphinx_theme - pip: - git+https://github.com/dask/dask.git@main - git+https://github.com/dask/distributed.git@main diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 6c8ae4cb9b0..68705f1d28a 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -59,6 +59,7 @@ dependencies: - nvtx>=0.2.1 - cachetools - transformers + - pydata_sphinx_theme - pip: - git+https://github.com/dask/dask.git@main - git+https://github.com/dask/distributed.git@main diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst new file mode 100644 index 00000000000..8f383b23d67 --- /dev/null +++ b/docs/cudf/source/api_docs/groupby.rst @@ -0,0 +1,142 @@ +.. _api.groupby: + +======= +GroupBy +======= +.. currentmodule:: cudf.core.groupby + +GroupBy objects are returned by groupby calls: :func:`cudf.DataFrame.groupby`, :func:`cudf.Series.groupby`, etc. + +Indexing, iteration +------------------- +.. autosummary:: + :toctree: api/ + + GroupBy.__iter__ + GroupBy.groups + GroupBy.get_group + +.. currentmodule:: cudf + +.. autosummary:: + :toctree: api/ + + Grouper + +.. currentmodule:: cudf.core.groupby + +Function application +-------------------- +.. autosummary:: + :toctree: api/ + + GroupBy.apply + GroupBy.agg + SeriesGroupBy.aggregate + DataFrameGroupBy.aggregate + SeriesGroupBy.transform + DataFrameGroupBy.transform + GroupBy.pipe + +Computations / descriptive stats +-------------------------------- +.. autosummary:: + :toctree: api/ + + GroupBy.all + GroupBy.any + GroupBy.bfill + GroupBy.backfill + GroupBy.count + GroupBy.cumcount + GroupBy.cummax + GroupBy.cummin + GroupBy.cumprod + GroupBy.cumsum + GroupBy.ffill + GroupBy.first + GroupBy.head + GroupBy.last + GroupBy.max + GroupBy.mean + GroupBy.median + GroupBy.min + GroupBy.ngroup + GroupBy.nth + GroupBy.ohlc + GroupBy.pad + GroupBy.prod + GroupBy.rank + GroupBy.pct_change + GroupBy.size + GroupBy.sem + GroupBy.std + GroupBy.sum + GroupBy.var + GroupBy.tail + +The following methods are available in both ``SeriesGroupBy`` and +``DataFrameGroupBy`` objects, but may differ slightly, usually in that +the ``DataFrameGroupBy`` version usually permits the specification of an +axis argument, and often an argument indicating whether to restrict +application to columns of a specific data type. + +.. autosummary:: + :toctree: api/ + + DataFrameGroupBy.all + DataFrameGroupBy.any + DataFrameGroupBy.backfill + DataFrameGroupBy.bfill + DataFrameGroupBy.corr + DataFrameGroupBy.count + DataFrameGroupBy.cov + DataFrameGroupBy.cumcount + DataFrameGroupBy.cummax + DataFrameGroupBy.cummin + DataFrameGroupBy.cumprod + DataFrameGroupBy.cumsum + DataFrameGroupBy.describe + DataFrameGroupBy.diff + DataFrameGroupBy.ffill + DataFrameGroupBy.fillna + DataFrameGroupBy.filter + DataFrameGroupBy.hist + DataFrameGroupBy.idxmax + DataFrameGroupBy.idxmin + DataFrameGroupBy.mad + DataFrameGroupBy.nunique + DataFrameGroupBy.pad + DataFrameGroupBy.pct_change + DataFrameGroupBy.plot + DataFrameGroupBy.quantile + DataFrameGroupBy.rank + DataFrameGroupBy.resample + DataFrameGroupBy.sample + DataFrameGroupBy.shift + DataFrameGroupBy.size + DataFrameGroupBy.skew + DataFrameGroupBy.take + DataFrameGroupBy.tshift + +The following methods are available only for ``SeriesGroupBy`` objects. + +.. autosummary:: + :toctree: api/ + + SeriesGroupBy.hist + SeriesGroupBy.nlargest + SeriesGroupBy.nsmallest + SeriesGroupBy.nunique + SeriesGroupBy.unique + SeriesGroupBy.value_counts + SeriesGroupBy.is_monotonic_increasing + SeriesGroupBy.is_monotonic_decreasing + +The following methods are available only for ``DataFrameGroupBy`` objects. + +.. autosummary:: + :toctree: api/ + + DataFrameGroupBy.corrwith + DataFrameGroupBy.boxplot \ No newline at end of file diff --git a/docs/cudf/source/api_docs/index.rst b/docs/cudf/source/api_docs/index.rst index 8d4ab19c341..fedec8e9124 100644 --- a/docs/cudf/source/api_docs/index.rst +++ b/docs/cudf/source/api_docs/index.rst @@ -13,4 +13,5 @@ the left sidebar to see how various elements look on this theme. series dataframe index_objects + groupby diff --git a/docs/cudf/source/groupby.md b/docs/cudf/source/user_guide/groupby.md similarity index 99% rename from docs/cudf/source/groupby.md rename to docs/cudf/source/user_guide/groupby.md index 8a0e5dddba0..12d1c846329 100644 --- a/docs/cudf/source/groupby.md +++ b/docs/cudf/source/user_guide/groupby.md @@ -1,5 +1,5 @@ -GroupBy -======= +Using GroupBy +============= cuDF supports a small (but important) subset of Pandas' [groupby API](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html). @@ -18,7 +18,7 @@ Pandas' [groupby API](https://pandas.pydata.org/pandas-docs/stable/user_guide/gr See the section on [apply](#groupby-apply) for more details. 1. `GroupBy.pipe` similar to [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls). -## Grouping +## How to do Grouping A GroupBy object is created by grouping the values of a `Series` or `DataFrame` by one or more columns: diff --git a/docs/cudf/source/user_guide/index.rst b/docs/cudf/source/user_guide/index.rst index 1061008eb3c..4cd5eeb00d8 100644 --- a/docs/cudf/source/user_guide/index.rst +++ b/docs/cudf/source/user_guide/index.rst @@ -10,3 +10,4 @@ User Guide 10min-cudf-cupy.ipynb guide-to-udfs.ipynb Working-with-missing-data.ipynb + groupby diff --git a/python/cudf/setup.py b/python/cudf/setup.py index 54921396b6f..f0c5cda2b47 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -218,7 +218,7 @@ def run(self): extensions, nthreads=nthreads, compiler_directives=dict( - profile=False, language_level=3, embedsignature=True + profile=False, language_level=3, embedsignature=True, binding=True ), ), packages=find_packages(include=["cudf", "cudf.*"]), From e719bff074c4c3cde9342db12465e7c4897fe769 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 14 Jul 2021 16:42:57 -0700 Subject: [PATCH 07/49] cleanUp --- .../source/api_docs/api/cudf.DataFrame.T.rst | 6 - .../api_docs/api/cudf.DataFrame.__iter__.rst | 6 - .../api_docs/api/cudf.DataFrame.add.rst | 6 - .../api_docs/api/cudf.DataFrame.agg.rst | 6 - .../api_docs/api/cudf.DataFrame.all.rst | 6 - .../api_docs/api/cudf.DataFrame.any.rst | 6 - .../api_docs/api/cudf.DataFrame.append.rst | 6 - .../api_docs/api/cudf.DataFrame.assign.rst | 6 - .../api_docs/api/cudf.DataFrame.astype.rst | 6 - .../source/api_docs/api/cudf.DataFrame.at.rst | 6 - .../api_docs/api/cudf.DataFrame.clip.rst | 6 - .../api_docs/api/cudf.DataFrame.columns.rst | 6 - .../api_docs/api/cudf.DataFrame.copy.rst | 6 - .../api_docs/api/cudf.DataFrame.corr.rst | 6 - .../api_docs/api/cudf.DataFrame.count.rst | 6 - .../api_docs/api/cudf.DataFrame.cov.rst | 6 - .../api_docs/api/cudf.DataFrame.cummax.rst | 6 - .../api_docs/api/cudf.DataFrame.cummin.rst | 6 - .../api_docs/api/cudf.DataFrame.cumprod.rst | 6 - .../api_docs/api/cudf.DataFrame.cumsum.rst | 6 - .../api_docs/api/cudf.DataFrame.describe.rst | 6 - .../api_docs/api/cudf.DataFrame.div.rst | 6 - .../api_docs/api/cudf.DataFrame.drop.rst | 6 - .../api/cudf.DataFrame.drop_duplicates.rst | 6 - .../api_docs/api/cudf.DataFrame.dropna.rst | 6 - .../api_docs/api/cudf.DataFrame.dtypes.rst | 6 - .../api_docs/api/cudf.DataFrame.empty.rst | 6 - .../api_docs/api/cudf.DataFrame.equals.rst | 6 - .../api_docs/api/cudf.DataFrame.explode.rst | 6 - .../api_docs/api/cudf.DataFrame.fillna.rst | 6 - .../api_docs/api/cudf.DataFrame.floordiv.rst | 6 - .../api/cudf.DataFrame.from_records.rst | 6 - .../api_docs/api/cudf.DataFrame.groupby.rst | 6 - .../api_docs/api/cudf.DataFrame.head.rst | 6 - .../api_docs/api/cudf.DataFrame.iat.rst | 6 - .../api_docs/api/cudf.DataFrame.iloc.rst | 6 - .../api_docs/api/cudf.DataFrame.index.rst | 6 - .../api_docs/api/cudf.DataFrame.info.rst | 6 - .../api_docs/api/cudf.DataFrame.insert.rst | 6 - .../api_docs/api/cudf.DataFrame.isin.rst | 6 - .../api_docs/api/cudf.DataFrame.isna.rst | 6 - .../api_docs/api/cudf.DataFrame.isnull.rst | 6 - .../api_docs/api/cudf.DataFrame.iteritems.rst | 6 - .../api_docs/api/cudf.DataFrame.iterrows.rst | 6 - .../api/cudf.DataFrame.itertuples.rst | 6 - .../api_docs/api/cudf.DataFrame.join.rst | 6 - .../api_docs/api/cudf.DataFrame.keys.rst | 6 - .../api_docs/api/cudf.DataFrame.kurt.rst | 6 - .../api_docs/api/cudf.DataFrame.kurtosis.rst | 6 - .../api_docs/api/cudf.DataFrame.loc.rst | 6 - .../api_docs/api/cudf.DataFrame.mask.rst | 6 - .../api_docs/api/cudf.DataFrame.max.rst | 6 - .../api_docs/api/cudf.DataFrame.mean.rst | 6 - .../api_docs/api/cudf.DataFrame.melt.rst | 6 - .../api/cudf.DataFrame.memory_usage.rst | 6 - .../api_docs/api/cudf.DataFrame.merge.rst | 6 - .../api_docs/api/cudf.DataFrame.min.rst | 6 - .../api_docs/api/cudf.DataFrame.mod.rst | 6 - .../api_docs/api/cudf.DataFrame.mode.rst | 6 - .../api_docs/api/cudf.DataFrame.mul.rst | 6 - .../api_docs/api/cudf.DataFrame.ndim.rst | 6 - .../api_docs/api/cudf.DataFrame.nlargest.rst | 6 - .../api_docs/api/cudf.DataFrame.notna.rst | 6 - .../api_docs/api/cudf.DataFrame.notnull.rst | 6 - .../api_docs/api/cudf.DataFrame.nsmallest.rst | 6 - .../api_docs/api/cudf.DataFrame.pipe.rst | 6 - .../api_docs/api/cudf.DataFrame.pivot.rst | 6 - .../api_docs/api/cudf.DataFrame.pop.rst | 6 - .../api_docs/api/cudf.DataFrame.pow.rst | 6 - .../api_docs/api/cudf.DataFrame.prod.rst | 6 - .../api_docs/api/cudf.DataFrame.product.rst | 6 - .../api_docs/api/cudf.DataFrame.quantile.rst | 6 - .../api_docs/api/cudf.DataFrame.query.rst | 6 - .../api_docs/api/cudf.DataFrame.radd.rst | 6 - .../api_docs/api/cudf.DataFrame.rank.rst | 6 - .../api_docs/api/cudf.DataFrame.rdiv.rst | 6 - .../api_docs/api/cudf.DataFrame.reindex.rst | 6 - .../api_docs/api/cudf.DataFrame.rename.rst | 6 - .../api_docs/api/cudf.DataFrame.replace.rst | 6 - .../api/cudf.DataFrame.reset_index.rst | 6 - .../api_docs/api/cudf.DataFrame.rfloordiv.rst | 6 - .../api_docs/api/cudf.DataFrame.rmod.rst | 6 - .../api_docs/api/cudf.DataFrame.rmul.rst | 6 - .../api_docs/api/cudf.DataFrame.rolling.rst | 6 - .../api_docs/api/cudf.DataFrame.round.rst | 6 - .../api_docs/api/cudf.DataFrame.rpow.rst | 6 - .../source/api_docs/api/cudf.DataFrame.rst | 181 -------------- .../api_docs/api/cudf.DataFrame.rsub.rst | 6 - .../api_docs/api/cudf.DataFrame.rtruediv.rst | 6 - .../api_docs/api/cudf.DataFrame.sample.rst | 6 - .../api/cudf.DataFrame.select_dtypes.rst | 6 - .../api_docs/api/cudf.DataFrame.set_index.rst | 6 - .../api_docs/api/cudf.DataFrame.shape.rst | 6 - .../api_docs/api/cudf.DataFrame.shift.rst | 6 - .../api_docs/api/cudf.DataFrame.size.rst | 6 - .../api_docs/api/cudf.DataFrame.skew.rst | 6 - .../api/cudf.DataFrame.sort_index.rst | 6 - .../api/cudf.DataFrame.sort_values.rst | 6 - .../api_docs/api/cudf.DataFrame.stack.rst | 6 - .../api_docs/api/cudf.DataFrame.std.rst | 6 - .../api_docs/api/cudf.DataFrame.sub.rst | 6 - .../api_docs/api/cudf.DataFrame.sum.rst | 6 - .../api_docs/api/cudf.DataFrame.tail.rst | 6 - .../api_docs/api/cudf.DataFrame.take.rst | 6 - .../api_docs/api/cudf.DataFrame.to_csv.rst | 6 - .../api_docs/api/cudf.DataFrame.to_dict.rst | 6 - .../api/cudf.DataFrame.to_feather.rst | 6 - .../api_docs/api/cudf.DataFrame.to_hdf.rst | 6 - .../api_docs/api/cudf.DataFrame.to_json.rst | 6 - .../api/cudf.DataFrame.to_parquet.rst | 6 - .../api/cudf.DataFrame.to_records.rst | 6 - .../api_docs/api/cudf.DataFrame.to_string.rst | 6 - .../api_docs/api/cudf.DataFrame.transpose.rst | 6 - .../api_docs/api/cudf.DataFrame.truediv.rst | 6 - .../api_docs/api/cudf.DataFrame.unstack.rst | 6 - .../api_docs/api/cudf.DataFrame.update.rst | 6 - .../api_docs/api/cudf.DataFrame.values.rst | 6 - .../api_docs/api/cudf.DataFrame.var.rst | 6 - .../api_docs/api/cudf.DataFrame.where.rst | 6 - .../api_docs/api/cudf.Series.__array__.rst | 6 - .../api_docs/api/cudf.Series.__iter__.rst | 6 - .../source/api_docs/api/cudf.Series.abs.rst | 6 - .../source/api_docs/api/cudf.Series.add.rst | 6 - .../source/api_docs/api/cudf.Series.all.rst | 6 - .../source/api_docs/api/cudf.Series.any.rst | 6 - .../api_docs/api/cudf.Series.append.rst | 6 - .../api_docs/api/cudf.Series.argsort.rst | 6 - .../api_docs/api/cudf.Series.astype.rst | 6 - .../source/api_docs/api/cudf.Series.cat.rst | 6 - .../source/api_docs/api/cudf.Series.clip.rst | 6 - .../source/api_docs/api/cudf.Series.copy.rst | 6 - .../source/api_docs/api/cudf.Series.corr.rst | 6 - .../source/api_docs/api/cudf.Series.count.rst | 6 - .../source/api_docs/api/cudf.Series.cov.rst | 6 - .../api_docs/api/cudf.Series.cummax.rst | 6 - .../api_docs/api/cudf.Series.cummin.rst | 6 - .../api_docs/api/cudf.Series.cumprod.rst | 6 - .../api_docs/api/cudf.Series.cumsum.rst | 6 - .../api_docs/api/cudf.Series.describe.rst | 6 - .../source/api_docs/api/cudf.Series.diff.rst | 6 - .../source/api_docs/api/cudf.Series.drop.rst | 6 - .../api/cudf.Series.drop_duplicates.rst | 6 - .../api_docs/api/cudf.Series.dropna.rst | 6 - .../source/api_docs/api/cudf.Series.dt.rst | 6 - .../source/api_docs/api/cudf.Series.dtype.rst | 6 - .../source/api_docs/api/cudf.Series.empty.rst | 6 - .../source/api_docs/api/cudf.Series.eq.rst | 6 - .../api_docs/api/cudf.Series.equals.rst | 6 - .../api_docs/api/cudf.Series.explode.rst | 6 - .../api_docs/api/cudf.Series.factorize.rst | 6 - .../api_docs/api/cudf.Series.fillna.rst | 6 - .../api_docs/api/cudf.Series.floordiv.rst | 6 - .../source/api_docs/api/cudf.Series.ge.rst | 6 - .../api_docs/api/cudf.Series.groupby.rst | 6 - .../source/api_docs/api/cudf.Series.gt.rst | 6 - .../source/api_docs/api/cudf.Series.head.rst | 6 - .../source/api_docs/api/cudf.Series.iloc.rst | 6 - .../source/api_docs/api/cudf.Series.index.rst | 6 - .../api_docs/api/cudf.Series.is_monotonic.rst | 6 - .../cudf.Series.is_monotonic_decreasing.rst | 6 - .../cudf.Series.is_monotonic_increasing.rst | 6 - .../api_docs/api/cudf.Series.is_unique.rst | 6 - .../source/api_docs/api/cudf.Series.isin.rst | 6 - .../source/api_docs/api/cudf.Series.isna.rst | 6 - .../api_docs/api/cudf.Series.isnull.rst | 6 - .../source/api_docs/api/cudf.Series.items.rst | 6 - .../api_docs/api/cudf.Series.iteritems.rst | 6 - .../source/api_docs/api/cudf.Series.keys.rst | 6 - .../source/api_docs/api/cudf.Series.kurt.rst | 6 - .../api_docs/api/cudf.Series.kurtosis.rst | 6 - .../source/api_docs/api/cudf.Series.le.rst | 6 - .../source/api_docs/api/cudf.Series.loc.rst | 6 - .../source/api_docs/api/cudf.Series.lt.rst | 6 - .../source/api_docs/api/cudf.Series.map.rst | 6 - .../source/api_docs/api/cudf.Series.mask.rst | 6 - .../source/api_docs/api/cudf.Series.max.rst | 6 - .../source/api_docs/api/cudf.Series.mean.rst | 6 - .../api_docs/api/cudf.Series.median.rst | 6 - .../api_docs/api/cudf.Series.memory_usage.rst | 6 - .../source/api_docs/api/cudf.Series.min.rst | 6 - .../source/api_docs/api/cudf.Series.mod.rst | 6 - .../source/api_docs/api/cudf.Series.mode.rst | 6 - .../source/api_docs/api/cudf.Series.mul.rst | 6 - .../source/api_docs/api/cudf.Series.name.rst | 6 - .../source/api_docs/api/cudf.Series.ndim.rst | 6 - .../source/api_docs/api/cudf.Series.ne.rst | 6 - .../api_docs/api/cudf.Series.nlargest.rst | 6 - .../source/api_docs/api/cudf.Series.notna.rst | 6 - .../api_docs/api/cudf.Series.notnull.rst | 6 - .../api_docs/api/cudf.Series.nsmallest.rst | 6 - .../api_docs/api/cudf.Series.nunique.rst | 6 - .../source/api_docs/api/cudf.Series.pipe.rst | 6 - .../source/api_docs/api/cudf.Series.pow.rst | 6 - .../source/api_docs/api/cudf.Series.prod.rst | 6 - .../api_docs/api/cudf.Series.product.rst | 6 - .../api_docs/api/cudf.Series.quantile.rst | 6 - .../source/api_docs/api/cudf.Series.radd.rst | 6 - .../source/api_docs/api/cudf.Series.rank.rst | 6 - .../api_docs/api/cudf.Series.reindex.rst | 6 - .../api_docs/api/cudf.Series.rename.rst | 6 - .../api_docs/api/cudf.Series.repeat.rst | 6 - .../api_docs/api/cudf.Series.replace.rst | 6 - .../api_docs/api/cudf.Series.reset_index.rst | 6 - .../api_docs/api/cudf.Series.rfloordiv.rst | 6 - .../source/api_docs/api/cudf.Series.rmod.rst | 6 - .../source/api_docs/api/cudf.Series.rmul.rst | 6 - .../api_docs/api/cudf.Series.rolling.rst | 6 - .../source/api_docs/api/cudf.Series.round.rst | 6 - .../source/api_docs/api/cudf.Series.rpow.rst | 6 - docs/cudf/source/api_docs/api/cudf.Series.rst | 205 --------------- .../source/api_docs/api/cudf.Series.rsub.rst | 6 - .../api_docs/api/cudf.Series.rtruediv.rst | 6 - .../api_docs/api/cudf.Series.sample.rst | 6 - .../api_docs/api/cudf.Series.searchsorted.rst | 6 - .../source/api_docs/api/cudf.Series.shape.rst | 6 - .../source/api_docs/api/cudf.Series.shift.rst | 6 - .../source/api_docs/api/cudf.Series.size.rst | 6 - .../source/api_docs/api/cudf.Series.skew.rst | 6 - .../api_docs/api/cudf.Series.sort_index.rst | 6 - .../api_docs/api/cudf.Series.sort_values.rst | 6 - .../source/api_docs/api/cudf.Series.std.rst | 6 - .../source/api_docs/api/cudf.Series.str.rst | 6 - .../source/api_docs/api/cudf.Series.sub.rst | 6 - .../source/api_docs/api/cudf.Series.sum.rst | 6 - .../source/api_docs/api/cudf.Series.tail.rst | 6 - .../source/api_docs/api/cudf.Series.take.rst | 6 - .../api_docs/api/cudf.Series.to_dict.rst | 6 - .../api_docs/api/cudf.Series.to_frame.rst | 6 - .../api_docs/api/cudf.Series.to_hdf.rst | 6 - .../api_docs/api/cudf.Series.to_json.rst | 6 - .../api_docs/api/cudf.Series.to_list.rst | 6 - .../api_docs/api/cudf.Series.to_string.rst | 6 - .../api_docs/api/cudf.Series.truediv.rst | 6 - .../api_docs/api/cudf.Series.unique.rst | 6 - .../api_docs/api/cudf.Series.update.rst | 6 - .../api_docs/api/cudf.Series.value_counts.rst | 6 - .../api_docs/api/cudf.Series.values.rst | 6 - .../source/api_docs/api/cudf.Series.var.rst | 6 - .../source/api_docs/api/cudf.Series.where.rst | 6 - .../api_docs/api/pandas.DataFrame.T.rst | 6 - .../api/pandas.DataFrame.__iter__.rst | 6 - .../api_docs/api/pandas.DataFrame.abs.rst | 6 - .../api_docs/api/pandas.DataFrame.add.rst | 6 - .../api/pandas.DataFrame.add_prefix.rst | 6 - .../api/pandas.DataFrame.add_suffix.rst | 6 - .../api_docs/api/pandas.DataFrame.agg.rst | 6 - .../api/pandas.DataFrame.aggregate.rst | 6 - .../api_docs/api/pandas.DataFrame.align.rst | 6 - .../api_docs/api/pandas.DataFrame.all.rst | 6 - .../api_docs/api/pandas.DataFrame.any.rst | 6 - .../api_docs/api/pandas.DataFrame.append.rst | 6 - .../api_docs/api/pandas.DataFrame.apply.rst | 6 - .../api/pandas.DataFrame.applymap.rst | 6 - .../api_docs/api/pandas.DataFrame.asfreq.rst | 6 - .../api_docs/api/pandas.DataFrame.asof.rst | 6 - .../api_docs/api/pandas.DataFrame.assign.rst | 6 - .../api_docs/api/pandas.DataFrame.astype.rst | 6 - .../api_docs/api/pandas.DataFrame.at.rst | 6 - .../api_docs/api/pandas.DataFrame.at_time.rst | 6 - .../api_docs/api/pandas.DataFrame.attrs.rst | 6 - .../api_docs/api/pandas.DataFrame.axes.rst | 6 - .../api/pandas.DataFrame.backfill.rst | 6 - .../api/pandas.DataFrame.between_time.rst | 6 - .../api_docs/api/pandas.DataFrame.bfill.rst | 6 - .../api_docs/api/pandas.DataFrame.bool.rst | 6 - .../api_docs/api/pandas.DataFrame.boxplot.rst | 6 - .../api_docs/api/pandas.DataFrame.clip.rst | 6 - .../api_docs/api/pandas.DataFrame.columns.rst | 6 - .../api_docs/api/pandas.DataFrame.combine.rst | 6 - .../api/pandas.DataFrame.combine_first.rst | 6 - .../api_docs/api/pandas.DataFrame.compare.rst | 6 - .../api/pandas.DataFrame.convert_dtypes.rst | 6 - .../api_docs/api/pandas.DataFrame.copy.rst | 6 - .../api_docs/api/pandas.DataFrame.corr.rst | 6 - .../api/pandas.DataFrame.corrwith.rst | 6 - .../api_docs/api/pandas.DataFrame.count.rst | 6 - .../api_docs/api/pandas.DataFrame.cov.rst | 6 - .../api_docs/api/pandas.DataFrame.cummax.rst | 6 - .../api_docs/api/pandas.DataFrame.cummin.rst | 6 - .../api_docs/api/pandas.DataFrame.cumprod.rst | 6 - .../api_docs/api/pandas.DataFrame.cumsum.rst | 6 - .../api/pandas.DataFrame.describe.rst | 6 - .../api_docs/api/pandas.DataFrame.diff.rst | 6 - .../api_docs/api/pandas.DataFrame.div.rst | 6 - .../api_docs/api/pandas.DataFrame.dot.rst | 6 - .../api_docs/api/pandas.DataFrame.drop.rst | 6 - .../api/pandas.DataFrame.drop_duplicates.rst | 6 - .../api/pandas.DataFrame.droplevel.rst | 6 - .../api_docs/api/pandas.DataFrame.dropna.rst | 6 - .../api_docs/api/pandas.DataFrame.dtypes.rst | 6 - .../api/pandas.DataFrame.duplicated.rst | 6 - .../api_docs/api/pandas.DataFrame.empty.rst | 6 - .../api_docs/api/pandas.DataFrame.eq.rst | 6 - .../api_docs/api/pandas.DataFrame.equals.rst | 6 - .../api_docs/api/pandas.DataFrame.eval.rst | 6 - .../api_docs/api/pandas.DataFrame.ewm.rst | 6 - .../api/pandas.DataFrame.expanding.rst | 6 - .../api_docs/api/pandas.DataFrame.explode.rst | 6 - .../api_docs/api/pandas.DataFrame.ffill.rst | 6 - .../api_docs/api/pandas.DataFrame.fillna.rst | 6 - .../api_docs/api/pandas.DataFrame.filter.rst | 6 - .../api_docs/api/pandas.DataFrame.first.rst | 6 - .../pandas.DataFrame.first_valid_index.rst | 6 - .../api/pandas.DataFrame.floordiv.rst | 6 - .../api/pandas.DataFrame.from_dict.rst | 6 - .../api/pandas.DataFrame.from_records.rst | 6 - .../api_docs/api/pandas.DataFrame.ge.rst | 6 - .../api_docs/api/pandas.DataFrame.get.rst | 6 - .../api_docs/api/pandas.DataFrame.groupby.rst | 6 - .../api_docs/api/pandas.DataFrame.gt.rst | 6 - .../api_docs/api/pandas.DataFrame.head.rst | 6 - .../api_docs/api/pandas.DataFrame.hist.rst | 6 - .../api_docs/api/pandas.DataFrame.iat.rst | 6 - .../api_docs/api/pandas.DataFrame.idxmax.rst | 6 - .../api_docs/api/pandas.DataFrame.idxmin.rst | 6 - .../api_docs/api/pandas.DataFrame.iloc.rst | 6 - .../api_docs/api/pandas.DataFrame.index.rst | 6 - .../api/pandas.DataFrame.infer_objects.rst | 6 - .../api_docs/api/pandas.DataFrame.info.rst | 6 - .../api_docs/api/pandas.DataFrame.insert.rst | 6 - .../api/pandas.DataFrame.interpolate.rst | 6 - .../api_docs/api/pandas.DataFrame.isin.rst | 6 - .../api_docs/api/pandas.DataFrame.isna.rst | 6 - .../api_docs/api/pandas.DataFrame.isnull.rst | 6 - .../api_docs/api/pandas.DataFrame.items.rst | 6 - .../api/pandas.DataFrame.iteritems.rst | 6 - .../api/pandas.DataFrame.iterrows.rst | 6 - .../api/pandas.DataFrame.itertuples.rst | 6 - .../api_docs/api/pandas.DataFrame.join.rst | 6 - .../api_docs/api/pandas.DataFrame.keys.rst | 6 - .../api_docs/api/pandas.DataFrame.kurt.rst | 6 - .../api/pandas.DataFrame.kurtosis.rst | 6 - .../api_docs/api/pandas.DataFrame.last.rst | 6 - .../api/pandas.DataFrame.last_valid_index.rst | 6 - .../api_docs/api/pandas.DataFrame.le.rst | 6 - .../api_docs/api/pandas.DataFrame.loc.rst | 6 - .../api_docs/api/pandas.DataFrame.lookup.rst | 6 - .../api_docs/api/pandas.DataFrame.lt.rst | 6 - .../api_docs/api/pandas.DataFrame.mad.rst | 6 - .../api_docs/api/pandas.DataFrame.mask.rst | 6 - .../api_docs/api/pandas.DataFrame.max.rst | 6 - .../api_docs/api/pandas.DataFrame.mean.rst | 6 - .../api_docs/api/pandas.DataFrame.median.rst | 6 - .../api_docs/api/pandas.DataFrame.melt.rst | 6 - .../api/pandas.DataFrame.memory_usage.rst | 6 - .../api_docs/api/pandas.DataFrame.merge.rst | 6 - .../api_docs/api/pandas.DataFrame.min.rst | 6 - .../api_docs/api/pandas.DataFrame.mod.rst | 6 - .../api_docs/api/pandas.DataFrame.mode.rst | 6 - .../api_docs/api/pandas.DataFrame.mul.rst | 6 - .../api_docs/api/pandas.DataFrame.ndim.rst | 6 - .../api_docs/api/pandas.DataFrame.ne.rst | 6 - .../api/pandas.DataFrame.nlargest.rst | 6 - .../api_docs/api/pandas.DataFrame.notna.rst | 6 - .../api_docs/api/pandas.DataFrame.notnull.rst | 6 - .../api/pandas.DataFrame.nsmallest.rst | 6 - .../api_docs/api/pandas.DataFrame.nunique.rst | 6 - .../api_docs/api/pandas.DataFrame.pad.rst | 6 - .../api/pandas.DataFrame.pct_change.rst | 6 - .../api_docs/api/pandas.DataFrame.pipe.rst | 6 - .../api_docs/api/pandas.DataFrame.pivot.rst | 6 - .../api/pandas.DataFrame.pivot_table.rst | 6 - .../api/pandas.DataFrame.plot.area.rst | 6 - .../api/pandas.DataFrame.plot.bar.rst | 6 - .../api/pandas.DataFrame.plot.barh.rst | 6 - .../api/pandas.DataFrame.plot.box.rst | 6 - .../api/pandas.DataFrame.plot.density.rst | 6 - .../api/pandas.DataFrame.plot.hexbin.rst | 6 - .../api/pandas.DataFrame.plot.hist.rst | 6 - .../api/pandas.DataFrame.plot.kde.rst | 6 - .../api/pandas.DataFrame.plot.line.rst | 6 - .../api/pandas.DataFrame.plot.pie.rst | 6 - .../api_docs/api/pandas.DataFrame.plot.rst | 6 - .../api/pandas.DataFrame.plot.scatter.rst | 6 - .../api_docs/api/pandas.DataFrame.pop.rst | 6 - .../api_docs/api/pandas.DataFrame.pow.rst | 6 - .../api_docs/api/pandas.DataFrame.prod.rst | 6 - .../api_docs/api/pandas.DataFrame.product.rst | 6 - .../api/pandas.DataFrame.quantile.rst | 6 - .../api_docs/api/pandas.DataFrame.query.rst | 6 - .../api_docs/api/pandas.DataFrame.radd.rst | 6 - .../api_docs/api/pandas.DataFrame.rank.rst | 6 - .../api_docs/api/pandas.DataFrame.rdiv.rst | 6 - .../api_docs/api/pandas.DataFrame.reindex.rst | 6 - .../api/pandas.DataFrame.reindex_like.rst | 6 - .../api_docs/api/pandas.DataFrame.rename.rst | 6 - .../api/pandas.DataFrame.rename_axis.rst | 6 - .../api/pandas.DataFrame.reorder_levels.rst | 6 - .../api_docs/api/pandas.DataFrame.replace.rst | 6 - .../api/pandas.DataFrame.resample.rst | 6 - .../api/pandas.DataFrame.reset_index.rst | 6 - .../api/pandas.DataFrame.rfloordiv.rst | 6 - .../api_docs/api/pandas.DataFrame.rmod.rst | 6 - .../api_docs/api/pandas.DataFrame.rmul.rst | 6 - .../api_docs/api/pandas.DataFrame.rolling.rst | 6 - .../api_docs/api/pandas.DataFrame.round.rst | 6 - .../api_docs/api/pandas.DataFrame.rpow.rst | 6 - .../source/api_docs/api/pandas.DataFrame.rst | 236 ------------------ .../api_docs/api/pandas.DataFrame.rsub.rst | 6 - .../api/pandas.DataFrame.rtruediv.rst | 6 - .../api_docs/api/pandas.DataFrame.sample.rst | 6 - .../api/pandas.DataFrame.select_dtypes.rst | 6 - .../api_docs/api/pandas.DataFrame.sem.rst | 6 - .../api/pandas.DataFrame.set_axis.rst | 6 - .../api/pandas.DataFrame.set_flags.rst | 6 - .../api/pandas.DataFrame.set_index.rst | 6 - .../api_docs/api/pandas.DataFrame.shape.rst | 6 - .../api_docs/api/pandas.DataFrame.shift.rst | 6 - .../api_docs/api/pandas.DataFrame.size.rst | 6 - .../api_docs/api/pandas.DataFrame.skew.rst | 6 - .../api/pandas.DataFrame.slice_shift.rst | 6 - .../api/pandas.DataFrame.sort_index.rst | 6 - .../api/pandas.DataFrame.sort_values.rst | 6 - .../api/pandas.DataFrame.sparse.density.rst | 6 - .../pandas.DataFrame.sparse.from_spmatrix.rst | 6 - .../api/pandas.DataFrame.sparse.to_coo.rst | 6 - .../api/pandas.DataFrame.sparse.to_dense.rst | 6 - .../api_docs/api/pandas.DataFrame.squeeze.rst | 6 - .../api_docs/api/pandas.DataFrame.stack.rst | 6 - .../api_docs/api/pandas.DataFrame.std.rst | 6 - .../api_docs/api/pandas.DataFrame.style.rst | 6 - .../api_docs/api/pandas.DataFrame.sub.rst | 6 - .../api_docs/api/pandas.DataFrame.sum.rst | 6 - .../api/pandas.DataFrame.swapaxes.rst | 6 - .../api/pandas.DataFrame.swaplevel.rst | 6 - .../api_docs/api/pandas.DataFrame.tail.rst | 6 - .../api_docs/api/pandas.DataFrame.take.rst | 6 - .../api/pandas.DataFrame.to_clipboard.rst | 6 - .../api_docs/api/pandas.DataFrame.to_csv.rst | 6 - .../api_docs/api/pandas.DataFrame.to_dict.rst | 6 - .../api/pandas.DataFrame.to_excel.rst | 6 - .../api/pandas.DataFrame.to_feather.rst | 6 - .../api_docs/api/pandas.DataFrame.to_gbq.rst | 6 - .../api_docs/api/pandas.DataFrame.to_hdf.rst | 6 - .../api_docs/api/pandas.DataFrame.to_html.rst | 6 - .../api_docs/api/pandas.DataFrame.to_json.rst | 6 - .../api/pandas.DataFrame.to_latex.rst | 6 - .../api/pandas.DataFrame.to_markdown.rst | 6 - .../api/pandas.DataFrame.to_parquet.rst | 6 - .../api/pandas.DataFrame.to_period.rst | 6 - .../api/pandas.DataFrame.to_pickle.rst | 6 - .../api/pandas.DataFrame.to_records.rst | 6 - .../api_docs/api/pandas.DataFrame.to_sql.rst | 6 - .../api/pandas.DataFrame.to_stata.rst | 6 - .../api/pandas.DataFrame.to_string.rst | 6 - .../api/pandas.DataFrame.to_timestamp.rst | 6 - .../api/pandas.DataFrame.to_xarray.rst | 6 - .../api/pandas.DataFrame.transform.rst | 6 - .../api/pandas.DataFrame.transpose.rst | 6 - .../api_docs/api/pandas.DataFrame.truediv.rst | 6 - .../api/pandas.DataFrame.truncate.rst | 6 - .../api_docs/api/pandas.DataFrame.tshift.rst | 6 - .../api/pandas.DataFrame.tz_convert.rst | 6 - .../api/pandas.DataFrame.tz_localize.rst | 6 - .../api_docs/api/pandas.DataFrame.unstack.rst | 6 - .../api_docs/api/pandas.DataFrame.update.rst | 6 - .../api/pandas.DataFrame.value_counts.rst | 6 - .../api_docs/api/pandas.DataFrame.values.rst | 6 - .../api_docs/api/pandas.DataFrame.var.rst | 6 - .../api_docs/api/pandas.DataFrame.where.rst | 6 - .../api_docs/api/pandas.DataFrame.xs.rst | 6 - .../cudf/source/api_docs/api/pandas.Flags.rst | 28 --- 462 files changed, 3398 deletions(-) delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.T.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.__iter__.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.add.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.agg.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.all.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.any.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.append.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.assign.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.astype.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.at.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.clip.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.columns.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.copy.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.corr.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.count.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cov.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cummax.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cummin.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cumprod.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.cumsum.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.describe.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.div.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.drop.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.drop_duplicates.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.dropna.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.dtypes.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.empty.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.equals.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.explode.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.fillna.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.floordiv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.from_records.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.groupby.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.head.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.iat.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.iloc.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.index.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.info.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.insert.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.isin.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.isna.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.isnull.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.iteritems.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.iterrows.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.itertuples.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.join.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.keys.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.kurt.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.kurtosis.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.loc.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mask.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.max.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mean.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.melt.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.memory_usage.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.merge.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.min.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mod.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mode.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.mul.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.ndim.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.nlargest.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.notna.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.notnull.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.nsmallest.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.pipe.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.pivot.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.pop.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.pow.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.prod.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.product.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.quantile.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.query.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.radd.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rank.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rdiv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.reindex.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rename.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.replace.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.reset_index.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rfloordiv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rmod.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rmul.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rolling.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.round.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rpow.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rsub.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.rtruediv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sample.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.select_dtypes.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.set_index.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.shape.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.shift.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.size.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.skew.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sort_index.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sort_values.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.stack.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.std.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sub.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.sum.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.tail.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.take.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_csv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_dict.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_feather.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_hdf.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_json.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_parquet.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_records.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.to_string.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.transpose.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.truediv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.unstack.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.update.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.values.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.var.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.DataFrame.where.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.__array__.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.__iter__.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.abs.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.add.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.all.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.any.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.append.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.argsort.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.astype.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cat.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.clip.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.copy.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.corr.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.count.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cov.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cummax.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cummin.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cumprod.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.cumsum.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.describe.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.diff.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.drop.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.drop_duplicates.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.dropna.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.dt.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.dtype.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.empty.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.eq.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.equals.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.explode.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.factorize.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.fillna.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.floordiv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.ge.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.groupby.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.gt.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.head.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.iloc.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.index.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.is_monotonic.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_decreasing.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_increasing.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.is_unique.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.isin.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.isna.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.isnull.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.items.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.iteritems.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.keys.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.kurt.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.kurtosis.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.le.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.loc.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.lt.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.map.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mask.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.max.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mean.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.median.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.memory_usage.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.min.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mod.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mode.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.mul.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.name.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.ndim.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.ne.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.nlargest.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.notna.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.notnull.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.nsmallest.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.nunique.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.pipe.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.pow.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.prod.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.product.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.quantile.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.radd.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rank.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.reindex.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rename.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.repeat.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.replace.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.reset_index.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rfloordiv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rmod.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rmul.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rolling.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.round.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rpow.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rsub.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.rtruediv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sample.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.searchsorted.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.shape.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.shift.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.size.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.skew.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sort_index.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sort_values.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.std.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.str.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sub.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.sum.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.tail.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.take.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_dict.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_frame.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_hdf.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_json.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_list.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.to_string.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.truediv.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.unique.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.update.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.value_counts.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.values.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.var.rst delete mode 100644 docs/cudf/source/api_docs/api/cudf.Series.where.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.T.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.__iter__.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.abs.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.add.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.add_prefix.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.add_suffix.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.agg.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.aggregate.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.align.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.all.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.any.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.append.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.apply.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.applymap.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.asfreq.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.asof.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.assign.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.astype.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.at.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.at_time.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.attrs.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.axes.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.backfill.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.between_time.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.bfill.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.bool.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.boxplot.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.clip.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.columns.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.combine.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.combine_first.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.compare.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.convert_dtypes.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.copy.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.corr.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.corrwith.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.count.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cov.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cummax.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cummin.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cumprod.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.cumsum.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.describe.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.diff.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.div.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.dot.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.drop.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.drop_duplicates.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.droplevel.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.dropna.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.dtypes.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.duplicated.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.empty.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.eq.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.equals.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.eval.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ewm.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.expanding.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.explode.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ffill.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.fillna.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.filter.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.first.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.first_valid_index.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.floordiv.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.from_dict.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.from_records.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ge.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.get.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.groupby.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.gt.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.head.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.hist.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.iat.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.idxmax.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.idxmin.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.iloc.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.index.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.infer_objects.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.info.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.insert.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.interpolate.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.isin.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.isna.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.isnull.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.items.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.iteritems.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.iterrows.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.itertuples.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.join.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.keys.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.kurt.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.kurtosis.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.last.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.last_valid_index.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.le.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.loc.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.lookup.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.lt.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mad.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mask.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.max.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mean.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.median.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.melt.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.memory_usage.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.merge.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.min.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mod.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mode.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.mul.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ndim.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.ne.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.nlargest.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.notna.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.notnull.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.nsmallest.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.nunique.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pad.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pct_change.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pipe.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pivot.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pivot_table.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.area.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.bar.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.barh.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.box.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.density.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hexbin.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hist.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.kde.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.line.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.pie.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.plot.scatter.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pop.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.pow.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.prod.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.product.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.quantile.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.query.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.radd.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rank.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rdiv.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.reindex.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.reindex_like.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rename.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rename_axis.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.reorder_levels.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.replace.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.resample.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.reset_index.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rfloordiv.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rmod.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rmul.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rolling.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.round.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rpow.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rsub.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.rtruediv.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sample.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.select_dtypes.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sem.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.set_axis.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.set_flags.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.set_index.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.shape.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.shift.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.size.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.skew.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.slice_shift.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sort_index.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sort_values.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.density.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.from_spmatrix.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_coo.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_dense.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.squeeze.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.stack.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.std.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.style.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sub.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.sum.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.swapaxes.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.swaplevel.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.tail.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.take.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_clipboard.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_csv.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_dict.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_excel.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_feather.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_gbq.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_hdf.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_html.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_json.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_latex.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_markdown.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_parquet.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_period.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_pickle.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_records.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_sql.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_stata.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_string.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_timestamp.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.to_xarray.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.transform.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.transpose.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.truediv.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.truncate.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.tshift.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.tz_convert.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.tz_localize.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.unstack.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.update.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.value_counts.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.values.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.var.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.where.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.DataFrame.xs.rst delete mode 100644 docs/cudf/source/api_docs/api/pandas.Flags.rst diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.T.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.T.rst deleted file mode 100644 index 25124e8867b..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.T.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.T -================ - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.T \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.__iter__.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.__iter__.rst deleted file mode 100644 index 6b677ac74ba..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.__iter__.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.\_\_iter\_\_ -=========================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.__iter__ \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.add.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.add.rst deleted file mode 100644 index b933d48bc0a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.add.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.add -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.add \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.agg.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.agg.rst deleted file mode 100644 index c7ac4d41007..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.agg.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.agg -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.agg \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.all.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.all.rst deleted file mode 100644 index 40542a63be1..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.all.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.all -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.all \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.any.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.any.rst deleted file mode 100644 index 5ba21bd312f..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.any.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.any -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.any \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.append.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.append.rst deleted file mode 100644 index b82eb6f84cf..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.append.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.append -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.append \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.assign.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.assign.rst deleted file mode 100644 index d0915e50f0c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.assign.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.assign -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.assign \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.astype.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.astype.rst deleted file mode 100644 index d0fb7487d97..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.astype.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.astype -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.astype \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.at.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.at.rst deleted file mode 100644 index f80e6454131..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.at.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.at -================= - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.at \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.clip.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.clip.rst deleted file mode 100644 index 4e31d13de01..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.clip.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.clip -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.clip \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.columns.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.columns.rst deleted file mode 100644 index de1f0258759..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.columns.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.columns -====================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.columns \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.copy.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.copy.rst deleted file mode 100644 index a31a5d2735a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.copy.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.copy -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.copy \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.corr.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.corr.rst deleted file mode 100644 index 401bb038146..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.corr.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.corr -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.corr \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.count.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.count.rst deleted file mode 100644 index fc9648dac50..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.count.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.count -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.count \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.cov.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.cov.rst deleted file mode 100644 index c04de8dc7fd..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.cov.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.cov -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.cov \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.cummax.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.cummax.rst deleted file mode 100644 index f7f7528c6f4..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.cummax.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.cummax -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.cummax \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.cummin.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.cummin.rst deleted file mode 100644 index 9ca2e6b1260..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.cummin.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.cummin -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.cummin \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.cumprod.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.cumprod.rst deleted file mode 100644 index a8466880c01..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.cumprod.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.cumprod -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.cumprod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.cumsum.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.cumsum.rst deleted file mode 100644 index 2d708bef623..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.cumsum.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.cumsum -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.cumsum \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.describe.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.describe.rst deleted file mode 100644 index b5a873a900c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.describe.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.describe -======================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.describe \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.div.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.div.rst deleted file mode 100644 index 09712d78e0c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.div.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.div -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.div \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.drop.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.drop.rst deleted file mode 100644 index 6d46566674d..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.drop.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.drop -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.drop_duplicates.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.drop_duplicates.rst deleted file mode 100644 index da2ff3ac745..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.drop_duplicates.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.drop\_duplicates -=============================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.drop_duplicates \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.dropna.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.dropna.rst deleted file mode 100644 index ea12c21a746..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.dropna.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.dropna -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.dropna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.dtypes.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.dtypes.rst deleted file mode 100644 index a1621218fda..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.dtypes.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.dtypes -===================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.dtypes \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.empty.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.empty.rst deleted file mode 100644 index a76cd5f4c7f..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.empty.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.empty -==================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.empty \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.equals.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.equals.rst deleted file mode 100644 index 1d13ff81a8d..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.equals.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.equals -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.equals \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.explode.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.explode.rst deleted file mode 100644 index c3c8a7c5517..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.explode.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.explode -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.explode \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.fillna.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.fillna.rst deleted file mode 100644 index e4a9d17116c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.fillna.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.fillna -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.fillna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.floordiv.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.floordiv.rst deleted file mode 100644 index 596d6527f73..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.floordiv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.floordiv -======================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.floordiv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.from_records.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.from_records.rst deleted file mode 100644 index d846fb716d6..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.from_records.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.from\_records -============================ - -.. currentmodule:: cudf - -.. automethod:: DataFrame.from_records \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.groupby.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.groupby.rst deleted file mode 100644 index 1336f1799df..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.groupby.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.groupby -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.groupby \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.head.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.head.rst deleted file mode 100644 index 300ce94c5b1..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.head.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.head -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.head \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.iat.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.iat.rst deleted file mode 100644 index 63ac068dbd1..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.iat.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.iat -================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.iat \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.iloc.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.iloc.rst deleted file mode 100644 index b4901981e82..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.iloc.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.iloc -=================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.iloc \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.index.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.index.rst deleted file mode 100644 index 45dd84fb2ff..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.index.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.index -==================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.info.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.info.rst deleted file mode 100644 index 155ab913cda..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.info.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.info -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.info \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.insert.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.insert.rst deleted file mode 100644 index 59147e0e1d2..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.insert.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.insert -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.insert \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.isin.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.isin.rst deleted file mode 100644 index 246adfd5839..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.isin.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.isin -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.isin \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.isna.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.isna.rst deleted file mode 100644 index c357d1fc735..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.isna.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.isna -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.isna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.isnull.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.isnull.rst deleted file mode 100644 index 6a53524bf77..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.isnull.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.isnull -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.isnull \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.iteritems.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.iteritems.rst deleted file mode 100644 index 53a7a765408..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.iteritems.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.iteritems -======================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.iteritems \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.iterrows.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.iterrows.rst deleted file mode 100644 index b8786b8c245..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.iterrows.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.iterrows -======================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.iterrows \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.itertuples.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.itertuples.rst deleted file mode 100644 index 39b423fae97..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.itertuples.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.itertuples -========================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.itertuples \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.join.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.join.rst deleted file mode 100644 index fbf3a654194..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.join.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.join -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.join \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.keys.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.keys.rst deleted file mode 100644 index 65fd44cb545..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.keys.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.keys -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.keys \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.kurt.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.kurt.rst deleted file mode 100644 index c0269b06880..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.kurt.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.kurt -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.kurt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.kurtosis.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.kurtosis.rst deleted file mode 100644 index 51fd744610c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.kurtosis.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.kurtosis -======================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.kurtosis \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.loc.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.loc.rst deleted file mode 100644 index 3e3aa275803..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.loc.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.loc -================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.loc \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.mask.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.mask.rst deleted file mode 100644 index 75ea8294e9a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.mask.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.mask -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.mask \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.max.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.max.rst deleted file mode 100644 index c79af1a6ad3..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.max.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.max -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.max \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.mean.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.mean.rst deleted file mode 100644 index 0de3160ae59..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.mean.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.mean -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.mean \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.melt.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.melt.rst deleted file mode 100644 index 2fa561ce7ee..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.melt.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.melt -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.melt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.memory_usage.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.memory_usage.rst deleted file mode 100644 index 28d77875b32..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.memory_usage.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.memory\_usage -============================ - -.. currentmodule:: cudf - -.. automethod:: DataFrame.memory_usage \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.merge.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.merge.rst deleted file mode 100644 index 507ac7bd257..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.merge.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.merge -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.merge \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.min.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.min.rst deleted file mode 100644 index 34fa97c3366..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.min.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.min -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.min \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.mod.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.mod.rst deleted file mode 100644 index 5f07f14315a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.mod.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.mod -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.mod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.mode.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.mode.rst deleted file mode 100644 index 23cb6a9c8d2..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.mode.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.mode -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.mode \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.mul.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.mul.rst deleted file mode 100644 index 8a0a9897746..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.mul.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.mul -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.mul \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.ndim.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.ndim.rst deleted file mode 100644 index d55a8eee601..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.ndim.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.ndim -=================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.ndim \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.nlargest.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.nlargest.rst deleted file mode 100644 index 1966e173b15..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.nlargest.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.nlargest -======================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.nlargest \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.notna.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.notna.rst deleted file mode 100644 index 4103c1912c7..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.notna.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.notna -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.notna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.notnull.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.notnull.rst deleted file mode 100644 index e4979c3dd9e..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.notnull.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.notnull -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.notnull \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.nsmallest.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.nsmallest.rst deleted file mode 100644 index 4c07c32b5d5..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.nsmallest.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.nsmallest -======================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.nsmallest \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.pipe.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.pipe.rst deleted file mode 100644 index c72fe47e131..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.pipe.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.pipe -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.pipe \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.pivot.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.pivot.rst deleted file mode 100644 index 89a6cb75afa..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.pivot.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.pivot -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.pivot \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.pop.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.pop.rst deleted file mode 100644 index 63eb7aabfaa..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.pop.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.pop -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.pop \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.pow.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.pow.rst deleted file mode 100644 index 5357fa29104..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.pow.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.pow -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.pow \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.prod.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.prod.rst deleted file mode 100644 index 5ebf3e02716..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.prod.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.prod -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.prod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.product.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.product.rst deleted file mode 100644 index 13749e5b086..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.product.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.product -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.product \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.quantile.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.quantile.rst deleted file mode 100644 index 4ab302cc82f..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.quantile.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.quantile -======================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.quantile \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.query.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.query.rst deleted file mode 100644 index f402361a8f2..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.query.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.query -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.query \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.radd.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.radd.rst deleted file mode 100644 index a27ae4e1649..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.radd.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.radd -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.radd \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rank.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rank.rst deleted file mode 100644 index 5543fed2a4b..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rank.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rank -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rank \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rdiv.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rdiv.rst deleted file mode 100644 index e71f5fdad8e..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rdiv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rdiv -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rdiv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.reindex.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.reindex.rst deleted file mode 100644 index 309abf95af3..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.reindex.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.reindex -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.reindex \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rename.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rename.rst deleted file mode 100644 index b24f9b8243a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rename.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rename -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rename \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.replace.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.replace.rst deleted file mode 100644 index dd316bc6ec8..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.replace.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.replace -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.replace \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.reset_index.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.reset_index.rst deleted file mode 100644 index 5b3add04c0e..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.reset_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.reset\_index -=========================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.reset_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rfloordiv.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rfloordiv.rst deleted file mode 100644 index 99e59b81fd8..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rfloordiv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rfloordiv -======================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rfloordiv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rmod.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rmod.rst deleted file mode 100644 index 1cd39611612..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rmod.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rmod -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rmod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rmul.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rmul.rst deleted file mode 100644 index 3c5b71a27e4..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rmul.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rmul -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rmul \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rolling.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rolling.rst deleted file mode 100644 index 21fbb9d0058..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rolling.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rolling -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rolling \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.round.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.round.rst deleted file mode 100644 index 73edfd13546..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.round.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.round -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.round \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rpow.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rpow.rst deleted file mode 100644 index 0f39b7f40a5..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rpow.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rpow -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rpow \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rst deleted file mode 100644 index 6d7607c03d2..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rst +++ /dev/null @@ -1,181 +0,0 @@ -cudf.DataFrame -============== - -.. currentmodule:: cudf - -.. autoclass:: DataFrame - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DataFrame.__init__ - ~DataFrame.acos - ~DataFrame.add - ~DataFrame.agg - ~DataFrame.all - ~DataFrame.any - ~DataFrame.append - ~DataFrame.apply_chunks - ~DataFrame.apply_rows - ~DataFrame.argsort - ~DataFrame.as_gpu_matrix - ~DataFrame.as_matrix - ~DataFrame.asin - ~DataFrame.assign - ~DataFrame.astype - ~DataFrame.atan - ~DataFrame.clip - ~DataFrame.copy - ~DataFrame.corr - ~DataFrame.cos - ~DataFrame.count - ~DataFrame.cov - ~DataFrame.cummax - ~DataFrame.cummin - ~DataFrame.cumprod - ~DataFrame.cumsum - ~DataFrame.describe - ~DataFrame.deserialize - ~DataFrame.device_deserialize - ~DataFrame.device_serialize - ~DataFrame.div - ~DataFrame.drop - ~DataFrame.drop_duplicates - ~DataFrame.dropna - ~DataFrame.equals - ~DataFrame.exp - ~DataFrame.explode - ~DataFrame.fillna - ~DataFrame.floordiv - ~DataFrame.from_arrow - ~DataFrame.from_pandas - ~DataFrame.from_records - ~DataFrame.groupby - ~DataFrame.hash_columns - ~DataFrame.head - ~DataFrame.host_deserialize - ~DataFrame.host_serialize - ~DataFrame.info - ~DataFrame.insert - ~DataFrame.interleave_columns - ~DataFrame.isin - ~DataFrame.isna - ~DataFrame.isnull - ~DataFrame.iteritems - ~DataFrame.iterrows - ~DataFrame.itertuples - ~DataFrame.join - ~DataFrame.keys - ~DataFrame.kurt - ~DataFrame.kurtosis - ~DataFrame.label_encoding - ~DataFrame.log - ~DataFrame.mask - ~DataFrame.max - ~DataFrame.mean - ~DataFrame.melt - ~DataFrame.memory_usage - ~DataFrame.merge - ~DataFrame.min - ~DataFrame.mod - ~DataFrame.mode - ~DataFrame.mul - ~DataFrame.nans_to_nulls - ~DataFrame.nlargest - ~DataFrame.notna - ~DataFrame.notnull - ~DataFrame.nsmallest - ~DataFrame.one_hot_encoding - ~DataFrame.partition_by_hash - ~DataFrame.pipe - ~DataFrame.pivot - ~DataFrame.pop - ~DataFrame.pow - ~DataFrame.prod - ~DataFrame.product - ~DataFrame.quantile - ~DataFrame.quantiles - ~DataFrame.query - ~DataFrame.radd - ~DataFrame.rank - ~DataFrame.rdiv - ~DataFrame.reindex - ~DataFrame.rename - ~DataFrame.repeat - ~DataFrame.replace - ~DataFrame.reset_index - ~DataFrame.rfloordiv - ~DataFrame.rmod - ~DataFrame.rmul - ~DataFrame.rolling - ~DataFrame.round - ~DataFrame.rpow - ~DataFrame.rsub - ~DataFrame.rtruediv - ~DataFrame.sample - ~DataFrame.scatter_by_map - ~DataFrame.searchsorted - ~DataFrame.select_dtypes - ~DataFrame.serialize - ~DataFrame.set_index - ~DataFrame.shift - ~DataFrame.sin - ~DataFrame.skew - ~DataFrame.sort_index - ~DataFrame.sort_values - ~DataFrame.sqrt - ~DataFrame.stack - ~DataFrame.std - ~DataFrame.sub - ~DataFrame.sum - ~DataFrame.tail - ~DataFrame.take - ~DataFrame.tan - ~DataFrame.tile - ~DataFrame.to_arrow - ~DataFrame.to_csv - ~DataFrame.to_dict - ~DataFrame.to_dlpack - ~DataFrame.to_feather - ~DataFrame.to_hdf - ~DataFrame.to_json - ~DataFrame.to_orc - ~DataFrame.to_pandas - ~DataFrame.to_parquet - ~DataFrame.to_records - ~DataFrame.to_string - ~DataFrame.transpose - ~DataFrame.truediv - ~DataFrame.unstack - ~DataFrame.update - ~DataFrame.var - ~DataFrame.where - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DataFrame.T - ~DataFrame.at - ~DataFrame.columns - ~DataFrame.dtypes - ~DataFrame.empty - ~DataFrame.iat - ~DataFrame.iloc - ~DataFrame.index - ~DataFrame.loc - ~DataFrame.ndim - ~DataFrame.shape - ~DataFrame.size - ~DataFrame.values - - \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rsub.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rsub.rst deleted file mode 100644 index 678593a6cd1..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rsub.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rsub -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rsub \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.rtruediv.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.rtruediv.rst deleted file mode 100644 index c6f9d551408..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.rtruediv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.rtruediv -======================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.rtruediv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.sample.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.sample.rst deleted file mode 100644 index 403e7d2ddc3..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.sample.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.sample -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.sample \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.select_dtypes.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.select_dtypes.rst deleted file mode 100644 index 3870b1bfc49..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.select_dtypes.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.select\_dtypes -============================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.select_dtypes \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.set_index.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.set_index.rst deleted file mode 100644 index ccb8f37a93f..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.set_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.set\_index -========================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.set_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.shape.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.shape.rst deleted file mode 100644 index 5d20c5c39fd..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.shape.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.shape -==================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.shape \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.shift.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.shift.rst deleted file mode 100644 index 52341677810..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.shift.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.shift -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.shift \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.size.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.size.rst deleted file mode 100644 index 6392d0bfc01..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.size.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.size -=================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.size \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.skew.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.skew.rst deleted file mode 100644 index 23b2923c35e..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.skew.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.skew -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.skew \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.sort_index.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.sort_index.rst deleted file mode 100644 index 92a92185435..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.sort_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.sort\_index -========================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.sort_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.sort_values.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.sort_values.rst deleted file mode 100644 index 118307482df..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.sort_values.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.sort\_values -=========================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.sort_values \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.stack.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.stack.rst deleted file mode 100644 index 9d2706107f5..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.stack.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.stack -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.stack \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.std.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.std.rst deleted file mode 100644 index c9854c22784..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.std.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.std -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.std \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.sub.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.sub.rst deleted file mode 100644 index 3003228e933..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.sub.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.sub -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.sub \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.sum.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.sum.rst deleted file mode 100644 index aaa037c8f39..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.sum.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.sum -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.sum \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.tail.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.tail.rst deleted file mode 100644 index ceb00034370..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.tail.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.tail -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.tail \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.take.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.take.rst deleted file mode 100644 index 4c4ab320209..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.take.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.take -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.take \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_csv.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.to_csv.rst deleted file mode 100644 index 16f8892eb46..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_csv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.to\_csv -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.to_csv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_dict.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.to_dict.rst deleted file mode 100644 index 1872692d2cc..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_dict.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.to\_dict -======================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.to_dict \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_feather.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.to_feather.rst deleted file mode 100644 index 544fd3037f0..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_feather.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.to\_feather -========================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.to_feather \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_hdf.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.to_hdf.rst deleted file mode 100644 index 0c73253e98f..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_hdf.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.to\_hdf -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.to_hdf \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_json.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.to_json.rst deleted file mode 100644 index 9d1bcbb67dd..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_json.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.to\_json -======================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.to_json \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_parquet.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.to_parquet.rst deleted file mode 100644 index 038aaf8155c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_parquet.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.to\_parquet -========================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.to_parquet \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_records.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.to_records.rst deleted file mode 100644 index 6d4c856a207..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_records.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.to\_records -========================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.to_records \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_string.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.to_string.rst deleted file mode 100644 index 604e9f74200..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.to_string.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.to\_string -========================= - -.. currentmodule:: cudf - -.. automethod:: DataFrame.to_string \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.transpose.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.transpose.rst deleted file mode 100644 index 3b65e5696a3..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.transpose.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.transpose -======================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.transpose \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.truediv.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.truediv.rst deleted file mode 100644 index b5e85b5f41c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.truediv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.truediv -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.truediv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.unstack.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.unstack.rst deleted file mode 100644 index 8cd26b58ae9..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.unstack.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.unstack -====================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.unstack \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.update.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.update.rst deleted file mode 100644 index 40c518ffa40..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.update.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.update -===================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.update \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.values.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.values.rst deleted file mode 100644 index c786c84f5f8..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.values.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.values -===================== - -.. currentmodule:: cudf - -.. autoproperty:: DataFrame.values \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.var.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.var.rst deleted file mode 100644 index 3313fd72b6e..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.var.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.var -================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.var \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.DataFrame.where.rst b/docs/cudf/source/api_docs/api/cudf.DataFrame.where.rst deleted file mode 100644 index c2035bf11b0..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.DataFrame.where.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.where -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.where \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.__array__.rst b/docs/cudf/source/api_docs/api/cudf.Series.__array__.rst deleted file mode 100644 index 0c4c8de3369..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.__array__.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.\_\_array\_\_ -========================= - -.. currentmodule:: cudf - -.. automethod:: Series.__array__ \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.__iter__.rst b/docs/cudf/source/api_docs/api/cudf.Series.__iter__.rst deleted file mode 100644 index d5a4d14f135..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.__iter__.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.\_\_iter\_\_ -======================== - -.. currentmodule:: cudf - -.. automethod:: Series.__iter__ \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.abs.rst b/docs/cudf/source/api_docs/api/cudf.Series.abs.rst deleted file mode 100644 index 73c5da6a8ec..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.abs.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.abs -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.abs \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.add.rst b/docs/cudf/source/api_docs/api/cudf.Series.add.rst deleted file mode 100644 index 8d12133bc99..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.add.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.add -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.add \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.all.rst b/docs/cudf/source/api_docs/api/cudf.Series.all.rst deleted file mode 100644 index b2f10c4d6d9..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.all.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.all -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.all \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.any.rst b/docs/cudf/source/api_docs/api/cudf.Series.any.rst deleted file mode 100644 index aa14761853a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.any.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.any -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.any \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.append.rst b/docs/cudf/source/api_docs/api/cudf.Series.append.rst deleted file mode 100644 index 28001818b75..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.append.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.append -================== - -.. currentmodule:: cudf - -.. automethod:: Series.append \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.argsort.rst b/docs/cudf/source/api_docs/api/cudf.Series.argsort.rst deleted file mode 100644 index 49f2792f1b4..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.argsort.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.argsort -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.argsort \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.astype.rst b/docs/cudf/source/api_docs/api/cudf.Series.astype.rst deleted file mode 100644 index 4e277445685..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.astype.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.astype -================== - -.. currentmodule:: cudf - -.. automethod:: Series.astype \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.cat.rst b/docs/cudf/source/api_docs/api/cudf.Series.cat.rst deleted file mode 100644 index 7cf5613422f..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.cat.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.cat -=============== - -.. currentmodule:: cudf - -.. autoproperty:: Series.cat \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.clip.rst b/docs/cudf/source/api_docs/api/cudf.Series.clip.rst deleted file mode 100644 index cf692d4748a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.clip.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.clip -================ - -.. currentmodule:: cudf - -.. automethod:: Series.clip \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.copy.rst b/docs/cudf/source/api_docs/api/cudf.Series.copy.rst deleted file mode 100644 index be01cb74123..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.copy.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.copy -================ - -.. currentmodule:: cudf - -.. automethod:: Series.copy \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.corr.rst b/docs/cudf/source/api_docs/api/cudf.Series.corr.rst deleted file mode 100644 index 58f2f48e6dc..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.corr.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.corr -================ - -.. currentmodule:: cudf - -.. automethod:: Series.corr \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.count.rst b/docs/cudf/source/api_docs/api/cudf.Series.count.rst deleted file mode 100644 index 97c97cd0a6b..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.count.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.count -================= - -.. currentmodule:: cudf - -.. automethod:: Series.count \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.cov.rst b/docs/cudf/source/api_docs/api/cudf.Series.cov.rst deleted file mode 100644 index 84d7aa345b0..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.cov.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.cov -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.cov \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.cummax.rst b/docs/cudf/source/api_docs/api/cudf.Series.cummax.rst deleted file mode 100644 index 4280843a488..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.cummax.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.cummax -================== - -.. currentmodule:: cudf - -.. automethod:: Series.cummax \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.cummin.rst b/docs/cudf/source/api_docs/api/cudf.Series.cummin.rst deleted file mode 100644 index 5325162dc9e..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.cummin.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.cummin -================== - -.. currentmodule:: cudf - -.. automethod:: Series.cummin \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.cumprod.rst b/docs/cudf/source/api_docs/api/cudf.Series.cumprod.rst deleted file mode 100644 index 89fdaeb3389..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.cumprod.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.cumprod -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.cumprod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.cumsum.rst b/docs/cudf/source/api_docs/api/cudf.Series.cumsum.rst deleted file mode 100644 index 561d6ad40e8..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.cumsum.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.cumsum -================== - -.. currentmodule:: cudf - -.. automethod:: Series.cumsum \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.describe.rst b/docs/cudf/source/api_docs/api/cudf.Series.describe.rst deleted file mode 100644 index e130498cd12..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.describe.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.describe -==================== - -.. currentmodule:: cudf - -.. automethod:: Series.describe \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.diff.rst b/docs/cudf/source/api_docs/api/cudf.Series.diff.rst deleted file mode 100644 index 6e4f4fdc6fa..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.diff.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.diff -================ - -.. currentmodule:: cudf - -.. automethod:: Series.diff \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.drop.rst b/docs/cudf/source/api_docs/api/cudf.Series.drop.rst deleted file mode 100644 index bfa29fd5b96..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.drop.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.drop -================ - -.. currentmodule:: cudf - -.. automethod:: Series.drop \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.drop_duplicates.rst b/docs/cudf/source/api_docs/api/cudf.Series.drop_duplicates.rst deleted file mode 100644 index 92c8047cf60..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.drop_duplicates.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.drop\_duplicates -============================ - -.. currentmodule:: cudf - -.. automethod:: Series.drop_duplicates \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.dropna.rst b/docs/cudf/source/api_docs/api/cudf.Series.dropna.rst deleted file mode 100644 index 58b020cc58b..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.dropna.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.dropna -================== - -.. currentmodule:: cudf - -.. automethod:: Series.dropna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.dt.rst b/docs/cudf/source/api_docs/api/cudf.Series.dt.rst deleted file mode 100644 index dece7c123d2..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.dt.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.dt -============== - -.. currentmodule:: cudf - -.. autoproperty:: Series.dt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.dtype.rst b/docs/cudf/source/api_docs/api/cudf.Series.dtype.rst deleted file mode 100644 index c2ca8be355a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.dtype.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.dtype -================= - -.. currentmodule:: cudf - -.. autoproperty:: Series.dtype \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.empty.rst b/docs/cudf/source/api_docs/api/cudf.Series.empty.rst deleted file mode 100644 index b01263df4d0..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.empty.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.empty -================= - -.. currentmodule:: cudf - -.. autoproperty:: Series.empty \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.eq.rst b/docs/cudf/source/api_docs/api/cudf.Series.eq.rst deleted file mode 100644 index abdecc43672..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.eq.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.eq -============== - -.. currentmodule:: cudf - -.. automethod:: Series.eq \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.equals.rst b/docs/cudf/source/api_docs/api/cudf.Series.equals.rst deleted file mode 100644 index a8fd530dbec..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.equals.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.equals -================== - -.. currentmodule:: cudf - -.. automethod:: Series.equals \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.explode.rst b/docs/cudf/source/api_docs/api/cudf.Series.explode.rst deleted file mode 100644 index 77c76b5fb9d..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.explode.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.explode -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.explode \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.factorize.rst b/docs/cudf/source/api_docs/api/cudf.Series.factorize.rst deleted file mode 100644 index 2a5a3e83a40..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.factorize.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.factorize -===================== - -.. currentmodule:: cudf - -.. automethod:: Series.factorize \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.fillna.rst b/docs/cudf/source/api_docs/api/cudf.Series.fillna.rst deleted file mode 100644 index 51c10af2bfe..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.fillna.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.fillna -================== - -.. currentmodule:: cudf - -.. automethod:: Series.fillna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.floordiv.rst b/docs/cudf/source/api_docs/api/cudf.Series.floordiv.rst deleted file mode 100644 index d6dc083c722..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.floordiv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.floordiv -==================== - -.. currentmodule:: cudf - -.. automethod:: Series.floordiv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.ge.rst b/docs/cudf/source/api_docs/api/cudf.Series.ge.rst deleted file mode 100644 index d4d4aac0b44..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.ge.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.ge -============== - -.. currentmodule:: cudf - -.. automethod:: Series.ge \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.groupby.rst b/docs/cudf/source/api_docs/api/cudf.Series.groupby.rst deleted file mode 100644 index d5d73a21e01..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.groupby.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.groupby -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.groupby \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.gt.rst b/docs/cudf/source/api_docs/api/cudf.Series.gt.rst deleted file mode 100644 index 820bb11892a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.gt.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.gt -============== - -.. currentmodule:: cudf - -.. automethod:: Series.gt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.head.rst b/docs/cudf/source/api_docs/api/cudf.Series.head.rst deleted file mode 100644 index 34aa2c49ab0..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.head.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.head -================ - -.. currentmodule:: cudf - -.. automethod:: Series.head \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.iloc.rst b/docs/cudf/source/api_docs/api/cudf.Series.iloc.rst deleted file mode 100644 index 6a00f47e5b5..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.iloc.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.iloc -================ - -.. currentmodule:: cudf - -.. autoproperty:: Series.iloc \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.index.rst b/docs/cudf/source/api_docs/api/cudf.Series.index.rst deleted file mode 100644 index 4247bb19ae7..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.index.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.index -================= - -.. currentmodule:: cudf - -.. autoproperty:: Series.index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.is_monotonic.rst b/docs/cudf/source/api_docs/api/cudf.Series.is_monotonic.rst deleted file mode 100644 index 90da44d63b8..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.is_monotonic.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.is\_monotonic -========================= - -.. currentmodule:: cudf - -.. autoproperty:: Series.is_monotonic \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_decreasing.rst b/docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_decreasing.rst deleted file mode 100644 index e13d84f9a7b..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_decreasing.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.is\_monotonic\_decreasing -===================================== - -.. currentmodule:: cudf - -.. autoproperty:: Series.is_monotonic_decreasing \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_increasing.rst b/docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_increasing.rst deleted file mode 100644 index 042a38d81cd..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.is_monotonic_increasing.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.is\_monotonic\_increasing -===================================== - -.. currentmodule:: cudf - -.. autoproperty:: Series.is_monotonic_increasing \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.is_unique.rst b/docs/cudf/source/api_docs/api/cudf.Series.is_unique.rst deleted file mode 100644 index b2e8ccdaf84..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.is_unique.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.is\_unique -====================== - -.. currentmodule:: cudf - -.. autoproperty:: Series.is_unique \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.isin.rst b/docs/cudf/source/api_docs/api/cudf.Series.isin.rst deleted file mode 100644 index 3b8a0d52f12..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.isin.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.isin -================ - -.. currentmodule:: cudf - -.. automethod:: Series.isin \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.isna.rst b/docs/cudf/source/api_docs/api/cudf.Series.isna.rst deleted file mode 100644 index d68e294f088..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.isna.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.isna -================ - -.. currentmodule:: cudf - -.. automethod:: Series.isna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.isnull.rst b/docs/cudf/source/api_docs/api/cudf.Series.isnull.rst deleted file mode 100644 index 36db975b80b..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.isnull.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.isnull -================== - -.. currentmodule:: cudf - -.. automethod:: Series.isnull \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.items.rst b/docs/cudf/source/api_docs/api/cudf.Series.items.rst deleted file mode 100644 index d04cab76b96..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.items.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.items -================= - -.. currentmodule:: cudf - -.. automethod:: Series.items \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.iteritems.rst b/docs/cudf/source/api_docs/api/cudf.Series.iteritems.rst deleted file mode 100644 index a4cba62c0b5..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.iteritems.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.iteritems -===================== - -.. currentmodule:: cudf - -.. automethod:: Series.iteritems \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.keys.rst b/docs/cudf/source/api_docs/api/cudf.Series.keys.rst deleted file mode 100644 index d40c33b47e1..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.keys.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.keys -================ - -.. currentmodule:: cudf - -.. automethod:: Series.keys \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.kurt.rst b/docs/cudf/source/api_docs/api/cudf.Series.kurt.rst deleted file mode 100644 index 63f6a887ab4..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.kurt.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.kurt -================ - -.. currentmodule:: cudf - -.. automethod:: Series.kurt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.kurtosis.rst b/docs/cudf/source/api_docs/api/cudf.Series.kurtosis.rst deleted file mode 100644 index e6b479d232a..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.kurtosis.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.kurtosis -==================== - -.. currentmodule:: cudf - -.. automethod:: Series.kurtosis \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.le.rst b/docs/cudf/source/api_docs/api/cudf.Series.le.rst deleted file mode 100644 index 1ed319207c0..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.le.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.le -============== - -.. currentmodule:: cudf - -.. automethod:: Series.le \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.loc.rst b/docs/cudf/source/api_docs/api/cudf.Series.loc.rst deleted file mode 100644 index 73263375ac9..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.loc.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.loc -=============== - -.. currentmodule:: cudf - -.. autoproperty:: Series.loc \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.lt.rst b/docs/cudf/source/api_docs/api/cudf.Series.lt.rst deleted file mode 100644 index e3476b31d58..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.lt.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.lt -============== - -.. currentmodule:: cudf - -.. automethod:: Series.lt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.map.rst b/docs/cudf/source/api_docs/api/cudf.Series.map.rst deleted file mode 100644 index f9df1375682..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.map.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.map -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.map \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.mask.rst b/docs/cudf/source/api_docs/api/cudf.Series.mask.rst deleted file mode 100644 index 30474b9115d..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.mask.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.mask -================ - -.. currentmodule:: cudf - -.. automethod:: Series.mask \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.max.rst b/docs/cudf/source/api_docs/api/cudf.Series.max.rst deleted file mode 100644 index 8bc038974af..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.max.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.max -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.max \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.mean.rst b/docs/cudf/source/api_docs/api/cudf.Series.mean.rst deleted file mode 100644 index f38a630aefa..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.mean.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.mean -================ - -.. currentmodule:: cudf - -.. automethod:: Series.mean \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.median.rst b/docs/cudf/source/api_docs/api/cudf.Series.median.rst deleted file mode 100644 index 7ca126be8bf..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.median.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.median -================== - -.. currentmodule:: cudf - -.. automethod:: Series.median \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.memory_usage.rst b/docs/cudf/source/api_docs/api/cudf.Series.memory_usage.rst deleted file mode 100644 index 9bbc4a0e42f..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.memory_usage.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.memory\_usage -========================= - -.. currentmodule:: cudf - -.. automethod:: Series.memory_usage \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.min.rst b/docs/cudf/source/api_docs/api/cudf.Series.min.rst deleted file mode 100644 index f67b7fd3ba4..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.min.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.min -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.min \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.mod.rst b/docs/cudf/source/api_docs/api/cudf.Series.mod.rst deleted file mode 100644 index 20c8b5573a4..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.mod.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.mod -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.mod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.mode.rst b/docs/cudf/source/api_docs/api/cudf.Series.mode.rst deleted file mode 100644 index 614ed44aee8..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.mode.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.mode -================ - -.. currentmodule:: cudf - -.. automethod:: Series.mode \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.mul.rst b/docs/cudf/source/api_docs/api/cudf.Series.mul.rst deleted file mode 100644 index 16efb7891e1..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.mul.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.mul -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.mul \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.name.rst b/docs/cudf/source/api_docs/api/cudf.Series.name.rst deleted file mode 100644 index 5fc702aa1ed..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.name.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.name -================ - -.. currentmodule:: cudf - -.. autoproperty:: Series.name \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.ndim.rst b/docs/cudf/source/api_docs/api/cudf.Series.ndim.rst deleted file mode 100644 index 4938f0654c1..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.ndim.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.ndim -================ - -.. currentmodule:: cudf - -.. autoproperty:: Series.ndim \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.ne.rst b/docs/cudf/source/api_docs/api/cudf.Series.ne.rst deleted file mode 100644 index 84f337eef6f..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.ne.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.ne -============== - -.. currentmodule:: cudf - -.. automethod:: Series.ne \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.nlargest.rst b/docs/cudf/source/api_docs/api/cudf.Series.nlargest.rst deleted file mode 100644 index 50b49d6efc3..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.nlargest.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.nlargest -==================== - -.. currentmodule:: cudf - -.. automethod:: Series.nlargest \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.notna.rst b/docs/cudf/source/api_docs/api/cudf.Series.notna.rst deleted file mode 100644 index ebe5a1a662e..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.notna.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.notna -================= - -.. currentmodule:: cudf - -.. automethod:: Series.notna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.notnull.rst b/docs/cudf/source/api_docs/api/cudf.Series.notnull.rst deleted file mode 100644 index 8f605dac836..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.notnull.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.notnull -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.notnull \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.nsmallest.rst b/docs/cudf/source/api_docs/api/cudf.Series.nsmallest.rst deleted file mode 100644 index 285b06d8198..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.nsmallest.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.nsmallest -===================== - -.. currentmodule:: cudf - -.. automethod:: Series.nsmallest \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.nunique.rst b/docs/cudf/source/api_docs/api/cudf.Series.nunique.rst deleted file mode 100644 index a64bc64217c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.nunique.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.nunique -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.nunique \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.pipe.rst b/docs/cudf/source/api_docs/api/cudf.Series.pipe.rst deleted file mode 100644 index 6717547b6af..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.pipe.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.pipe -================ - -.. currentmodule:: cudf - -.. automethod:: Series.pipe \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.pow.rst b/docs/cudf/source/api_docs/api/cudf.Series.pow.rst deleted file mode 100644 index 34ed150fa82..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.pow.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.pow -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.pow \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.prod.rst b/docs/cudf/source/api_docs/api/cudf.Series.prod.rst deleted file mode 100644 index 28b2ac3fb66..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.prod.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.prod -================ - -.. currentmodule:: cudf - -.. automethod:: Series.prod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.product.rst b/docs/cudf/source/api_docs/api/cudf.Series.product.rst deleted file mode 100644 index 0baf88de478..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.product.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.product -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.product \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.quantile.rst b/docs/cudf/source/api_docs/api/cudf.Series.quantile.rst deleted file mode 100644 index 80789c444a5..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.quantile.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.quantile -==================== - -.. currentmodule:: cudf - -.. automethod:: Series.quantile \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.radd.rst b/docs/cudf/source/api_docs/api/cudf.Series.radd.rst deleted file mode 100644 index f385b30c142..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.radd.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.radd -================ - -.. currentmodule:: cudf - -.. automethod:: Series.radd \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rank.rst b/docs/cudf/source/api_docs/api/cudf.Series.rank.rst deleted file mode 100644 index a05f68f6b85..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rank.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.rank -================ - -.. currentmodule:: cudf - -.. automethod:: Series.rank \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.reindex.rst b/docs/cudf/source/api_docs/api/cudf.Series.reindex.rst deleted file mode 100644 index 73b37437d88..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.reindex.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.reindex -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.reindex \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rename.rst b/docs/cudf/source/api_docs/api/cudf.Series.rename.rst deleted file mode 100644 index 3da4b45cdd5..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rename.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.rename -================== - -.. currentmodule:: cudf - -.. automethod:: Series.rename \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.repeat.rst b/docs/cudf/source/api_docs/api/cudf.Series.repeat.rst deleted file mode 100644 index c40030f685c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.repeat.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.repeat -================== - -.. currentmodule:: cudf - -.. automethod:: Series.repeat \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.replace.rst b/docs/cudf/source/api_docs/api/cudf.Series.replace.rst deleted file mode 100644 index e8f646d12ab..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.replace.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.replace -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.replace \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.reset_index.rst b/docs/cudf/source/api_docs/api/cudf.Series.reset_index.rst deleted file mode 100644 index 170eb499ad1..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.reset_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.reset\_index -======================== - -.. currentmodule:: cudf - -.. automethod:: Series.reset_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rfloordiv.rst b/docs/cudf/source/api_docs/api/cudf.Series.rfloordiv.rst deleted file mode 100644 index 6e3cadbe616..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rfloordiv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.rfloordiv -===================== - -.. currentmodule:: cudf - -.. automethod:: Series.rfloordiv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rmod.rst b/docs/cudf/source/api_docs/api/cudf.Series.rmod.rst deleted file mode 100644 index eec1bfb9211..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rmod.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.rmod -================ - -.. currentmodule:: cudf - -.. automethod:: Series.rmod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rmul.rst b/docs/cudf/source/api_docs/api/cudf.Series.rmul.rst deleted file mode 100644 index bc96e5fb616..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rmul.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.rmul -================ - -.. currentmodule:: cudf - -.. automethod:: Series.rmul \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rolling.rst b/docs/cudf/source/api_docs/api/cudf.Series.rolling.rst deleted file mode 100644 index 2c796281881..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rolling.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.rolling -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.rolling \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.round.rst b/docs/cudf/source/api_docs/api/cudf.Series.round.rst deleted file mode 100644 index 96a7bbc50d3..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.round.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.round -================= - -.. currentmodule:: cudf - -.. automethod:: Series.round \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rpow.rst b/docs/cudf/source/api_docs/api/cudf.Series.rpow.rst deleted file mode 100644 index 13a6ea51009..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rpow.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.rpow -================ - -.. currentmodule:: cudf - -.. automethod:: Series.rpow \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rst b/docs/cudf/source/api_docs/api/cudf.Series.rst deleted file mode 100644 index 9c6679c1c64..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rst +++ /dev/null @@ -1,205 +0,0 @@ -cudf.Series -=========== - -.. currentmodule:: cudf - -.. autoclass:: Series - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Series.__init__ - ~Series.abs - ~Series.acos - ~Series.add - ~Series.all - ~Series.any - ~Series.append - ~Series.applymap - ~Series.argsort - ~Series.as_index - ~Series.as_mask - ~Series.asin - ~Series.astype - ~Series.atan - ~Series.ceil - ~Series.clip - ~Series.copy - ~Series.corr - ~Series.cos - ~Series.count - ~Series.cov - ~Series.cummax - ~Series.cummin - ~Series.cumprod - ~Series.cumsum - ~Series.describe - ~Series.deserialize - ~Series.device_deserialize - ~Series.device_serialize - ~Series.diff - ~Series.digitize - ~Series.drop - ~Series.drop_duplicates - ~Series.dropna - ~Series.eq - ~Series.equals - ~Series.exp - ~Series.explode - ~Series.factorize - ~Series.fill - ~Series.fillna - ~Series.floor - ~Series.floordiv - ~Series.from_arrow - ~Series.from_categorical - ~Series.from_masked_array - ~Series.from_pandas - ~Series.ge - ~Series.groupby - ~Series.gt - ~Series.hash_encode - ~Series.hash_values - ~Series.head - ~Series.host_deserialize - ~Series.host_serialize - ~Series.interleave_columns - ~Series.isin - ~Series.isna - ~Series.isnull - ~Series.items - ~Series.iteritems - ~Series.keys - ~Series.kurt - ~Series.kurtosis - ~Series.label_encoding - ~Series.le - ~Series.log - ~Series.logical_and - ~Series.logical_not - ~Series.logical_or - ~Series.lt - ~Series.map - ~Series.mask - ~Series.max - ~Series.mean - ~Series.median - ~Series.memory_usage - ~Series.merge - ~Series.min - ~Series.mod - ~Series.mode - ~Series.mul - ~Series.multiply - ~Series.nans_to_nulls - ~Series.ne - ~Series.nlargest - ~Series.notna - ~Series.notnull - ~Series.nsmallest - ~Series.nunique - ~Series.one_hot_encoding - ~Series.pipe - ~Series.pow - ~Series.prod - ~Series.product - ~Series.quantile - ~Series.radd - ~Series.rank - ~Series.reindex - ~Series.remainder - ~Series.rename - ~Series.repeat - ~Series.replace - ~Series.reset_index - ~Series.reverse - ~Series.rfloordiv - ~Series.rmod - ~Series.rmul - ~Series.rolling - ~Series.round - ~Series.rpow - ~Series.rsub - ~Series.rtruediv - ~Series.sample - ~Series.scale - ~Series.scatter_by_map - ~Series.searchsorted - ~Series.serialize - ~Series.set_index - ~Series.set_mask - ~Series.shift - ~Series.sin - ~Series.skew - ~Series.sort_index - ~Series.sort_values - ~Series.sqrt - ~Series.std - ~Series.sub - ~Series.subtract - ~Series.sum - ~Series.sum_of_squares - ~Series.tail - ~Series.take - ~Series.tan - ~Series.tile - ~Series.to_array - ~Series.to_arrow - ~Series.to_dict - ~Series.to_dlpack - ~Series.to_frame - ~Series.to_gpu_array - ~Series.to_hdf - ~Series.to_json - ~Series.to_list - ~Series.to_pandas - ~Series.to_string - ~Series.tolist - ~Series.truediv - ~Series.unique - ~Series.update - ~Series.value_counts - ~Series.var - ~Series.where - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~Series.cat - ~Series.data - ~Series.dt - ~Series.dtype - ~Series.empty - ~Series.has_nulls - ~Series.iloc - ~Series.index - ~Series.is_monotonic - ~Series.is_monotonic_decreasing - ~Series.is_monotonic_increasing - ~Series.is_unique - ~Series.list - ~Series.loc - ~Series.name - ~Series.ndim - ~Series.null_count - ~Series.nullable - ~Series.nullmask - ~Series.shape - ~Series.size - ~Series.str - ~Series.struct - ~Series.valid_count - ~Series.values - ~Series.values_host - - \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rsub.rst b/docs/cudf/source/api_docs/api/cudf.Series.rsub.rst deleted file mode 100644 index 43c54ad1f72..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rsub.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.rsub -================ - -.. currentmodule:: cudf - -.. automethod:: Series.rsub \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.rtruediv.rst b/docs/cudf/source/api_docs/api/cudf.Series.rtruediv.rst deleted file mode 100644 index a5d45a08b31..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.rtruediv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.rtruediv -==================== - -.. currentmodule:: cudf - -.. automethod:: Series.rtruediv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.sample.rst b/docs/cudf/source/api_docs/api/cudf.Series.sample.rst deleted file mode 100644 index bc9ed939c94..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.sample.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.sample -================== - -.. currentmodule:: cudf - -.. automethod:: Series.sample \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.searchsorted.rst b/docs/cudf/source/api_docs/api/cudf.Series.searchsorted.rst deleted file mode 100644 index 1673672a139..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.searchsorted.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.searchsorted -======================== - -.. currentmodule:: cudf - -.. automethod:: Series.searchsorted \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.shape.rst b/docs/cudf/source/api_docs/api/cudf.Series.shape.rst deleted file mode 100644 index fd9b9c78df8..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.shape.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.shape -================= - -.. currentmodule:: cudf - -.. autoproperty:: Series.shape \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.shift.rst b/docs/cudf/source/api_docs/api/cudf.Series.shift.rst deleted file mode 100644 index 86cafcf8771..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.shift.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.shift -================= - -.. currentmodule:: cudf - -.. automethod:: Series.shift \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.size.rst b/docs/cudf/source/api_docs/api/cudf.Series.size.rst deleted file mode 100644 index a4863b2b6f3..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.size.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.size -================ - -.. currentmodule:: cudf - -.. autoproperty:: Series.size \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.skew.rst b/docs/cudf/source/api_docs/api/cudf.Series.skew.rst deleted file mode 100644 index 30f7b7e1811..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.skew.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.skew -================ - -.. currentmodule:: cudf - -.. automethod:: Series.skew \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.sort_index.rst b/docs/cudf/source/api_docs/api/cudf.Series.sort_index.rst deleted file mode 100644 index 6dcc4b8b12f..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.sort_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.sort\_index -======================= - -.. currentmodule:: cudf - -.. automethod:: Series.sort_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.sort_values.rst b/docs/cudf/source/api_docs/api/cudf.Series.sort_values.rst deleted file mode 100644 index dbb4c8e926c..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.sort_values.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.sort\_values -======================== - -.. currentmodule:: cudf - -.. automethod:: Series.sort_values \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.std.rst b/docs/cudf/source/api_docs/api/cudf.Series.std.rst deleted file mode 100644 index 556479b4a78..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.std.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.std -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.std \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.str.rst b/docs/cudf/source/api_docs/api/cudf.Series.str.rst deleted file mode 100644 index 7f7d3ac65bf..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.str.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.str -=============== - -.. currentmodule:: cudf - -.. autoproperty:: Series.str \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.sub.rst b/docs/cudf/source/api_docs/api/cudf.Series.sub.rst deleted file mode 100644 index 940bc172ab0..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.sub.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.sub -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.sub \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.sum.rst b/docs/cudf/source/api_docs/api/cudf.Series.sum.rst deleted file mode 100644 index 2a909e6002b..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.sum.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.sum -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.sum \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.tail.rst b/docs/cudf/source/api_docs/api/cudf.Series.tail.rst deleted file mode 100644 index cf2c7855691..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.tail.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.tail -================ - -.. currentmodule:: cudf - -.. automethod:: Series.tail \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.take.rst b/docs/cudf/source/api_docs/api/cudf.Series.take.rst deleted file mode 100644 index cf871af11d6..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.take.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.take -================ - -.. currentmodule:: cudf - -.. automethod:: Series.take \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.to_dict.rst b/docs/cudf/source/api_docs/api/cudf.Series.to_dict.rst deleted file mode 100644 index 245dea46d58..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.to_dict.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.to\_dict -==================== - -.. currentmodule:: cudf - -.. automethod:: Series.to_dict \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.to_frame.rst b/docs/cudf/source/api_docs/api/cudf.Series.to_frame.rst deleted file mode 100644 index 18abeaae928..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.to_frame.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.to\_frame -===================== - -.. currentmodule:: cudf - -.. automethod:: Series.to_frame \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.to_hdf.rst b/docs/cudf/source/api_docs/api/cudf.Series.to_hdf.rst deleted file mode 100644 index 5c8b5c3ae68..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.to_hdf.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.to\_hdf -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.to_hdf \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.to_json.rst b/docs/cudf/source/api_docs/api/cudf.Series.to_json.rst deleted file mode 100644 index f36b90e64a8..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.to_json.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.to\_json -==================== - -.. currentmodule:: cudf - -.. automethod:: Series.to_json \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.to_list.rst b/docs/cudf/source/api_docs/api/cudf.Series.to_list.rst deleted file mode 100644 index 0b7eda982cb..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.to_list.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.to\_list -==================== - -.. currentmodule:: cudf - -.. automethod:: Series.to_list \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.to_string.rst b/docs/cudf/source/api_docs/api/cudf.Series.to_string.rst deleted file mode 100644 index a1d8e49c5d3..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.to_string.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.to\_string -====================== - -.. currentmodule:: cudf - -.. automethod:: Series.to_string \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.truediv.rst b/docs/cudf/source/api_docs/api/cudf.Series.truediv.rst deleted file mode 100644 index 07dd45c5a56..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.truediv.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.truediv -=================== - -.. currentmodule:: cudf - -.. automethod:: Series.truediv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.unique.rst b/docs/cudf/source/api_docs/api/cudf.Series.unique.rst deleted file mode 100644 index a9873809e1b..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.unique.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.unique -================== - -.. currentmodule:: cudf - -.. automethod:: Series.unique \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.update.rst b/docs/cudf/source/api_docs/api/cudf.Series.update.rst deleted file mode 100644 index 1f7c7e10fb1..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.update.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.update -================== - -.. currentmodule:: cudf - -.. automethod:: Series.update \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.value_counts.rst b/docs/cudf/source/api_docs/api/cudf.Series.value_counts.rst deleted file mode 100644 index e29d499a297..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.value_counts.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.value\_counts -========================= - -.. currentmodule:: cudf - -.. automethod:: Series.value_counts \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.values.rst b/docs/cudf/source/api_docs/api/cudf.Series.values.rst deleted file mode 100644 index e91eaebdb47..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.values.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.values -================== - -.. currentmodule:: cudf - -.. autoproperty:: Series.values \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.var.rst b/docs/cudf/source/api_docs/api/cudf.Series.var.rst deleted file mode 100644 index cfa470f6711..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.var.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.var -=============== - -.. currentmodule:: cudf - -.. automethod:: Series.var \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/cudf.Series.where.rst b/docs/cudf/source/api_docs/api/cudf.Series.where.rst deleted file mode 100644 index c6b53babf11..00000000000 --- a/docs/cudf/source/api_docs/api/cudf.Series.where.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.Series.where -================= - -.. currentmodule:: cudf - -.. automethod:: Series.where \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.T.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.T.rst deleted file mode 100644 index 03537765d1c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.T.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.T -================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.T \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.__iter__.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.__iter__.rst deleted file mode 100644 index b87f2f1cbf8..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.__iter__.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.\_\_iter\_\_ -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.__iter__ \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.abs.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.abs.rst deleted file mode 100644 index 436ab756650..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.abs.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.abs -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.abs \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.add.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.add.rst deleted file mode 100644 index 3548062e08f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.add.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.add -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.add \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.add_prefix.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.add_prefix.rst deleted file mode 100644 index adfcb156943..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.add_prefix.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.add\_prefix -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.add_prefix \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.add_suffix.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.add_suffix.rst deleted file mode 100644 index 7e5d1dc754c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.add_suffix.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.add\_suffix -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.add_suffix \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.agg.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.agg.rst deleted file mode 100644 index d70669c611e..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.agg.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.agg -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.agg \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.aggregate.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.aggregate.rst deleted file mode 100644 index 7ce8d54b9bb..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.aggregate.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.aggregate -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.aggregate \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.align.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.align.rst deleted file mode 100644 index 51127fcca22..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.align.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.align -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.align \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.all.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.all.rst deleted file mode 100644 index 5f5e0dcba4b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.all.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.all -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.all \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.any.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.any.rst deleted file mode 100644 index a52b7b3c6d4..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.any.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.any -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.any \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.append.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.append.rst deleted file mode 100644 index da28b001828..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.append.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.append -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.append \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.apply.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.apply.rst deleted file mode 100644 index ba56bcdc088..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.apply.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.apply -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.apply \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.applymap.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.applymap.rst deleted file mode 100644 index bfeacbb8283..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.applymap.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.applymap -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.applymap \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.asfreq.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.asfreq.rst deleted file mode 100644 index 249c5e0e355..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.asfreq.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.asfreq -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.asfreq \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.asof.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.asof.rst deleted file mode 100644 index 34085e65158..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.asof.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.asof -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.asof \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.assign.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.assign.rst deleted file mode 100644 index 15fd1e4eb90..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.assign.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.assign -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.assign \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.astype.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.astype.rst deleted file mode 100644 index 50bf5e4bc57..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.astype.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.astype -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.astype \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.at.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.at.rst deleted file mode 100644 index 43a5f859ce3..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.at.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.at -=================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.at \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.at_time.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.at_time.rst deleted file mode 100644 index 79bd0721f36..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.at_time.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.at\_time -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.at_time \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.attrs.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.attrs.rst deleted file mode 100644 index 09b0c927efa..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.attrs.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.attrs -====================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.attrs \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.axes.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.axes.rst deleted file mode 100644 index c0b8cfe30fb..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.axes.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.axes -===================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.axes \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.backfill.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.backfill.rst deleted file mode 100644 index 4414f0dc17d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.backfill.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.backfill -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.backfill \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.between_time.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.between_time.rst deleted file mode 100644 index 9e44bb4e821..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.between_time.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.between\_time -============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.between_time \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.bfill.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.bfill.rst deleted file mode 100644 index 92a7ab5904c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.bfill.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.bfill -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.bfill \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.bool.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.bool.rst deleted file mode 100644 index 48ec48da1ff..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.bool.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.bool -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.bool \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.boxplot.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.boxplot.rst deleted file mode 100644 index e272a254269..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.boxplot.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.boxplot -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.boxplot \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.clip.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.clip.rst deleted file mode 100644 index 101a72aec5c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.clip.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.clip -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.clip \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.columns.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.columns.rst deleted file mode 100644 index eba6ca9e945..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.columns.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.columns -======================== - -.. currentmodule:: pandas - -.. autoattribute:: DataFrame.columns \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.combine.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.combine.rst deleted file mode 100644 index fa084fa9d85..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.combine.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.combine -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.combine \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.combine_first.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.combine_first.rst deleted file mode 100644 index ae749204c36..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.combine_first.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.combine\_first -=============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.combine_first \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.compare.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.compare.rst deleted file mode 100644 index 539a491c4b6..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.compare.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.compare -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.compare \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.convert_dtypes.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.convert_dtypes.rst deleted file mode 100644 index e740f012f15..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.convert_dtypes.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.convert\_dtypes -================================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.convert_dtypes \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.copy.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.copy.rst deleted file mode 100644 index 19857b90b06..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.copy.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.copy -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.copy \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.corr.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.corr.rst deleted file mode 100644 index 9893057e033..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.corr.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.corr -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.corr \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.corrwith.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.corrwith.rst deleted file mode 100644 index 07b8b81c734..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.corrwith.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.corrwith -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.corrwith \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.count.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.count.rst deleted file mode 100644 index 77a3c7309e3..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.count.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.count -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.count \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.cov.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.cov.rst deleted file mode 100644 index 5501bec774d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.cov.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.cov -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.cov \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.cummax.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.cummax.rst deleted file mode 100644 index 84389d5516c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.cummax.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.cummax -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.cummax \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.cummin.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.cummin.rst deleted file mode 100644 index 7e4cc6cffcb..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.cummin.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.cummin -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.cummin \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.cumprod.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.cumprod.rst deleted file mode 100644 index efd1eaaaeab..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.cumprod.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.cumprod -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.cumprod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.cumsum.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.cumsum.rst deleted file mode 100644 index 54fd81bc4ba..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.cumsum.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.cumsum -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.cumsum \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.describe.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.describe.rst deleted file mode 100644 index bf54abc99ce..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.describe.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.describe -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.describe \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.diff.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.diff.rst deleted file mode 100644 index f5cfbea3f3a..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.diff.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.diff -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.diff \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.div.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.div.rst deleted file mode 100644 index 49926f9f384..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.div.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.div -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.div \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.dot.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.dot.rst deleted file mode 100644 index 0f5ec373abb..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.dot.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.dot -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.dot \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.drop.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.drop.rst deleted file mode 100644 index 9ee305369db..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.drop.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.drop -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.drop_duplicates.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.drop_duplicates.rst deleted file mode 100644 index 9a72f602cf5..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.drop_duplicates.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.drop\_duplicates -================================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.drop_duplicates \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.droplevel.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.droplevel.rst deleted file mode 100644 index fe3e4ed7dbd..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.droplevel.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.droplevel -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.droplevel \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.dropna.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.dropna.rst deleted file mode 100644 index bf67d663c2a..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.dropna.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.dropna -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.dropna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.dtypes.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.dtypes.rst deleted file mode 100644 index f7bc0086449..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.dtypes.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.dtypes -======================= - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.dtypes \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.duplicated.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.duplicated.rst deleted file mode 100644 index dc0ce02d412..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.duplicated.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.duplicated -=========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.duplicated \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.empty.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.empty.rst deleted file mode 100644 index 1566f4b84cd..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.empty.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.empty -====================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.empty \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.eq.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.eq.rst deleted file mode 100644 index ab3e0e53934..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.eq.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.eq -=================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.eq \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.equals.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.equals.rst deleted file mode 100644 index 06e9ef4b620..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.equals.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.equals -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.equals \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.eval.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.eval.rst deleted file mode 100644 index 794afecf147..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.eval.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.eval -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.eval \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.ewm.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.ewm.rst deleted file mode 100644 index 095c7bffe3f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.ewm.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.ewm -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.ewm \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.expanding.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.expanding.rst deleted file mode 100644 index 09f9280c148..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.expanding.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.expanding -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.expanding \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.explode.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.explode.rst deleted file mode 100644 index b593a3df556..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.explode.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.explode -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.explode \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.ffill.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.ffill.rst deleted file mode 100644 index e690dbf8f3a..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.ffill.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.ffill -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.ffill \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.fillna.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.fillna.rst deleted file mode 100644 index 2aab63b418d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.fillna.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.fillna -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.fillna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.filter.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.filter.rst deleted file mode 100644 index fb7bdfc146d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.filter.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.filter -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.filter \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.first.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.first.rst deleted file mode 100644 index 86af8ed6384..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.first.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.first -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.first \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.first_valid_index.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.first_valid_index.rst deleted file mode 100644 index aa02ea01184..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.first_valid_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.first\_valid\_index -==================================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.first_valid_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.floordiv.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.floordiv.rst deleted file mode 100644 index 84593cbb661..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.floordiv.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.floordiv -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.floordiv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.from_dict.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.from_dict.rst deleted file mode 100644 index 185c2101293..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.from_dict.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.from\_dict -=========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.from_dict \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.from_records.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.from_records.rst deleted file mode 100644 index d9e1b1b2f97..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.from_records.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.from\_records -============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.from_records \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.ge.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.ge.rst deleted file mode 100644 index 07ca9a1ea38..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.ge.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.ge -=================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.ge \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.get.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.get.rst deleted file mode 100644 index 8666f9cf49a..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.get.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.get -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.get \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.groupby.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.groupby.rst deleted file mode 100644 index f8872657308..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.groupby.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.groupby -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.groupby \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.gt.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.gt.rst deleted file mode 100644 index 769a5b529ba..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.gt.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.gt -=================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.gt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.head.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.head.rst deleted file mode 100644 index 91b80799a37..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.head.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.head -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.head \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.hist.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.hist.rst deleted file mode 100644 index d00f2ed8d21..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.hist.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.hist -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.hist \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.iat.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.iat.rst deleted file mode 100644 index f5e7d2e5bb8..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.iat.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.iat -==================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.iat \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.idxmax.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.idxmax.rst deleted file mode 100644 index 7d04f17f603..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.idxmax.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.idxmax -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.idxmax \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.idxmin.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.idxmin.rst deleted file mode 100644 index 20148233718..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.idxmin.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.idxmin -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.idxmin \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.iloc.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.iloc.rst deleted file mode 100644 index a037a5a4438..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.iloc.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.iloc -===================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.iloc \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.index.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.index.rst deleted file mode 100644 index f2b9ccb4ea0..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.index.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.index -====================== - -.. currentmodule:: pandas - -.. autoattribute:: DataFrame.index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.infer_objects.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.infer_objects.rst deleted file mode 100644 index f5334ce6241..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.infer_objects.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.infer\_objects -=============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.infer_objects \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.info.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.info.rst deleted file mode 100644 index ef02ad30511..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.info.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.info -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.info \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.insert.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.insert.rst deleted file mode 100644 index e13c5a3082a..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.insert.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.insert -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.insert \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.interpolate.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.interpolate.rst deleted file mode 100644 index 82a9026f7a5..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.interpolate.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.interpolate -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.interpolate \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.isin.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.isin.rst deleted file mode 100644 index ecead879b6b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.isin.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.isin -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.isin \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.isna.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.isna.rst deleted file mode 100644 index aaa1c1ba409..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.isna.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.isna -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.isna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.isnull.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.isnull.rst deleted file mode 100644 index ed218ac4e22..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.isnull.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.isnull -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.isnull \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.items.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.items.rst deleted file mode 100644 index b9bfe33b49f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.items.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.items -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.items \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.iteritems.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.iteritems.rst deleted file mode 100644 index b1a5ac0e545..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.iteritems.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.iteritems -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.iteritems \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.iterrows.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.iterrows.rst deleted file mode 100644 index 9d19d6b23b1..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.iterrows.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.iterrows -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.iterrows \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.itertuples.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.itertuples.rst deleted file mode 100644 index 50d8e68f52d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.itertuples.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.itertuples -=========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.itertuples \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.join.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.join.rst deleted file mode 100644 index 88574103604..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.join.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.join -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.join \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.keys.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.keys.rst deleted file mode 100644 index c601a2aa022..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.keys.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.keys -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.keys \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.kurt.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.kurt.rst deleted file mode 100644 index b2d2e2bfb0f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.kurt.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.kurt -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.kurt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.kurtosis.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.kurtosis.rst deleted file mode 100644 index a518bc3bbaf..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.kurtosis.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.kurtosis -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.kurtosis \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.last.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.last.rst deleted file mode 100644 index c80c1c244e1..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.last.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.last -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.last \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.last_valid_index.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.last_valid_index.rst deleted file mode 100644 index d0da1d0ae4b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.last_valid_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.last\_valid\_index -=================================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.last_valid_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.le.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.le.rst deleted file mode 100644 index 4e547b7177d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.le.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.le -=================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.le \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.loc.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.loc.rst deleted file mode 100644 index eea417e8b56..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.loc.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.loc -==================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.loc \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.lookup.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.lookup.rst deleted file mode 100644 index 53da973a34d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.lookup.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.lookup -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.lookup \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.lt.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.lt.rst deleted file mode 100644 index 0c30b0baab2..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.lt.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.lt -=================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.lt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.mad.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.mad.rst deleted file mode 100644 index a3b565902cd..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.mad.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.mad -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.mad \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.mask.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.mask.rst deleted file mode 100644 index 72b4a198d3d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.mask.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.mask -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.mask \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.max.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.max.rst deleted file mode 100644 index ab5107fb32e..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.max.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.max -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.max \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.mean.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.mean.rst deleted file mode 100644 index 77e6d62e561..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.mean.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.mean -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.mean \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.median.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.median.rst deleted file mode 100644 index dd54dccc800..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.median.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.median -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.median \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.melt.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.melt.rst deleted file mode 100644 index b077b2f7a41..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.melt.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.melt -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.melt \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.memory_usage.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.memory_usage.rst deleted file mode 100644 index 12990f20a9e..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.memory_usage.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.memory\_usage -============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.memory_usage \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.merge.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.merge.rst deleted file mode 100644 index 9fbf5eb23dc..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.merge.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.merge -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.merge \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.min.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.min.rst deleted file mode 100644 index 62731caab58..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.min.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.min -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.min \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.mod.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.mod.rst deleted file mode 100644 index b9039a225b2..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.mod.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.mod -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.mod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.mode.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.mode.rst deleted file mode 100644 index 55e17e0f16a..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.mode.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.mode -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.mode \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.mul.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.mul.rst deleted file mode 100644 index 31851345095..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.mul.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.mul -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.mul \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.ndim.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.ndim.rst deleted file mode 100644 index adf6339d924..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.ndim.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.ndim -===================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.ndim \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.ne.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.ne.rst deleted file mode 100644 index b557dfc2d3f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.ne.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.ne -=================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.ne \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.nlargest.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.nlargest.rst deleted file mode 100644 index 4e9b95daada..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.nlargest.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.nlargest -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.nlargest \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.notna.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.notna.rst deleted file mode 100644 index 5941cd5255a..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.notna.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.notna -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.notna \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.notnull.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.notnull.rst deleted file mode 100644 index 4408bbb8b7f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.notnull.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.notnull -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.notnull \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.nsmallest.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.nsmallest.rst deleted file mode 100644 index eb87234da0f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.nsmallest.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.nsmallest -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.nsmallest \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.nunique.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.nunique.rst deleted file mode 100644 index 750bff9fce8..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.nunique.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.nunique -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.nunique \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.pad.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.pad.rst deleted file mode 100644 index c7c4d9567ef..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.pad.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.pad -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.pad \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.pct_change.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.pct_change.rst deleted file mode 100644 index 06cc2da2528..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.pct_change.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.pct\_change -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.pct_change \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.pipe.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.pipe.rst deleted file mode 100644 index 12616a61023..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.pipe.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.pipe -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.pipe \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.pivot.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.pivot.rst deleted file mode 100644 index 6c2e8026b50..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.pivot.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.pivot -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.pivot \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.pivot_table.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.pivot_table.rst deleted file mode 100644 index 53d57f5ae61..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.pivot_table.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.pivot\_table -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.pivot_table \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.area.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.area.rst deleted file mode 100644 index faef939bf62..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.area.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.area -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.area \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.bar.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.bar.rst deleted file mode 100644 index 7b57b919113..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.bar.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.bar -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.bar \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.barh.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.barh.rst deleted file mode 100644 index 71756bd592a..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.barh.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.barh -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.barh \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.box.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.box.rst deleted file mode 100644 index 795fd22a58d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.box.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.box -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.box \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.density.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.density.rst deleted file mode 100644 index 16cf2f91f15..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.density.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.density -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.density \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hexbin.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hexbin.rst deleted file mode 100644 index 230b2b2b20c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hexbin.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.hexbin -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.hexbin \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hist.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hist.rst deleted file mode 100644 index b7ff6dc00b3..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.hist.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.hist -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.hist \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.kde.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.kde.rst deleted file mode 100644 index 73243f4ec15..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.kde.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.kde -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.kde \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.line.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.line.rst deleted file mode 100644 index 3e715257702..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.line.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.line -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.line \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.pie.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.pie.rst deleted file mode 100644 index 375d3bc5038..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.pie.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.pie -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.pie \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.rst deleted file mode 100644 index b060aa155e2..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot -===================== - -.. currentmodule:: pandas - -.. autoclass:: DataFrame.plot \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.scatter.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.scatter.rst deleted file mode 100644 index d2b659f882e..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.plot.scatter.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.plot.scatter -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.plot.scatter \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.pop.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.pop.rst deleted file mode 100644 index bd5df617265..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.pop.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.pop -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.pop \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.pow.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.pow.rst deleted file mode 100644 index 18dc68c9bb7..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.pow.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.pow -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.pow \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.prod.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.prod.rst deleted file mode 100644 index 346340a68cd..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.prod.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.prod -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.prod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.product.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.product.rst deleted file mode 100644 index c48ccf21ae1..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.product.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.product -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.product \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.quantile.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.quantile.rst deleted file mode 100644 index 8f7e988fe3b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.quantile.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.quantile -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.quantile \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.query.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.query.rst deleted file mode 100644 index f8a331db7a8..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.query.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.query -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.query \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.radd.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.radd.rst deleted file mode 100644 index 1a85dd2e3ed..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.radd.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.radd -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.radd \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rank.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rank.rst deleted file mode 100644 index a198a038f71..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rank.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rank -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rank \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rdiv.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rdiv.rst deleted file mode 100644 index 671ddb32aca..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rdiv.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rdiv -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rdiv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.reindex.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.reindex.rst deleted file mode 100644 index ae15567120f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.reindex.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.reindex -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.reindex \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.reindex_like.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.reindex_like.rst deleted file mode 100644 index d9d6bb53be5..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.reindex_like.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.reindex\_like -============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.reindex_like \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rename.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rename.rst deleted file mode 100644 index 0a65a29716d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rename.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rename -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rename \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rename_axis.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rename_axis.rst deleted file mode 100644 index 6b9233c81d0..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rename_axis.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rename\_axis -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rename_axis \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.reorder_levels.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.reorder_levels.rst deleted file mode 100644 index c155ad00fa7..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.reorder_levels.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.reorder\_levels -================================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.reorder_levels \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.replace.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.replace.rst deleted file mode 100644 index b12b5db8a7b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.replace.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.replace -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.replace \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.resample.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.resample.rst deleted file mode 100644 index f5b46207d40..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.resample.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.resample -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.resample \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.reset_index.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.reset_index.rst deleted file mode 100644 index 188ed794c31..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.reset_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.reset\_index -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.reset_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rfloordiv.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rfloordiv.rst deleted file mode 100644 index e222bbedde4..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rfloordiv.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rfloordiv -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rfloordiv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rmod.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rmod.rst deleted file mode 100644 index 6862ed7f2af..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rmod.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rmod -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rmod \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rmul.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rmul.rst deleted file mode 100644 index 6082fe44a35..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rmul.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rmul -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rmul \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rolling.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rolling.rst deleted file mode 100644 index 697e8ff7193..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rolling.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rolling -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rolling \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.round.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.round.rst deleted file mode 100644 index 44a176d3566..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.round.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.round -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.round \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rpow.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rpow.rst deleted file mode 100644 index c0ea60d674e..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rpow.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rpow -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rpow \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rst deleted file mode 100644 index d1f409dc9cb..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rst +++ /dev/null @@ -1,236 +0,0 @@ -pandas.DataFrame -================ - -.. currentmodule:: pandas - -.. autoclass:: DataFrame - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DataFrame.__init__ - ~DataFrame.abs - ~DataFrame.add - ~DataFrame.add_prefix - ~DataFrame.add_suffix - ~DataFrame.agg - ~DataFrame.aggregate - ~DataFrame.align - ~DataFrame.all - ~DataFrame.any - ~DataFrame.append - ~DataFrame.apply - ~DataFrame.applymap - ~DataFrame.asfreq - ~DataFrame.asof - ~DataFrame.assign - ~DataFrame.astype - ~DataFrame.at_time - ~DataFrame.backfill - ~DataFrame.between_time - ~DataFrame.bfill - ~DataFrame.bool - ~DataFrame.boxplot - ~DataFrame.clip - ~DataFrame.combine - ~DataFrame.combine_first - ~DataFrame.compare - ~DataFrame.convert_dtypes - ~DataFrame.copy - ~DataFrame.corr - ~DataFrame.corrwith - ~DataFrame.count - ~DataFrame.cov - ~DataFrame.cummax - ~DataFrame.cummin - ~DataFrame.cumprod - ~DataFrame.cumsum - ~DataFrame.describe - ~DataFrame.diff - ~DataFrame.div - ~DataFrame.divide - ~DataFrame.dot - ~DataFrame.drop - ~DataFrame.drop_duplicates - ~DataFrame.droplevel - ~DataFrame.dropna - ~DataFrame.duplicated - ~DataFrame.eq - ~DataFrame.equals - ~DataFrame.eval - ~DataFrame.ewm - ~DataFrame.expanding - ~DataFrame.explode - ~DataFrame.ffill - ~DataFrame.fillna - ~DataFrame.filter - ~DataFrame.first - ~DataFrame.first_valid_index - ~DataFrame.floordiv - ~DataFrame.from_dict - ~DataFrame.from_records - ~DataFrame.ge - ~DataFrame.get - ~DataFrame.groupby - ~DataFrame.gt - ~DataFrame.head - ~DataFrame.hist - ~DataFrame.idxmax - ~DataFrame.idxmin - ~DataFrame.infer_objects - ~DataFrame.info - ~DataFrame.insert - ~DataFrame.interpolate - ~DataFrame.isin - ~DataFrame.isna - ~DataFrame.isnull - ~DataFrame.items - ~DataFrame.iteritems - ~DataFrame.iterrows - ~DataFrame.itertuples - ~DataFrame.join - ~DataFrame.keys - ~DataFrame.kurt - ~DataFrame.kurtosis - ~DataFrame.last - ~DataFrame.last_valid_index - ~DataFrame.le - ~DataFrame.lookup - ~DataFrame.lt - ~DataFrame.mad - ~DataFrame.mask - ~DataFrame.max - ~DataFrame.mean - ~DataFrame.median - ~DataFrame.melt - ~DataFrame.memory_usage - ~DataFrame.merge - ~DataFrame.min - ~DataFrame.mod - ~DataFrame.mode - ~DataFrame.mul - ~DataFrame.multiply - ~DataFrame.ne - ~DataFrame.nlargest - ~DataFrame.notna - ~DataFrame.notnull - ~DataFrame.nsmallest - ~DataFrame.nunique - ~DataFrame.pad - ~DataFrame.pct_change - ~DataFrame.pipe - ~DataFrame.pivot - ~DataFrame.pivot_table - ~DataFrame.pop - ~DataFrame.pow - ~DataFrame.prod - ~DataFrame.product - ~DataFrame.quantile - ~DataFrame.query - ~DataFrame.radd - ~DataFrame.rank - ~DataFrame.rdiv - ~DataFrame.reindex - ~DataFrame.reindex_like - ~DataFrame.rename - ~DataFrame.rename_axis - ~DataFrame.reorder_levels - ~DataFrame.replace - ~DataFrame.resample - ~DataFrame.reset_index - ~DataFrame.rfloordiv - ~DataFrame.rmod - ~DataFrame.rmul - ~DataFrame.rolling - ~DataFrame.round - ~DataFrame.rpow - ~DataFrame.rsub - ~DataFrame.rtruediv - ~DataFrame.sample - ~DataFrame.select_dtypes - ~DataFrame.sem - ~DataFrame.set_axis - ~DataFrame.set_flags - ~DataFrame.set_index - ~DataFrame.shift - ~DataFrame.skew - ~DataFrame.slice_shift - ~DataFrame.sort_index - ~DataFrame.sort_values - ~DataFrame.squeeze - ~DataFrame.stack - ~DataFrame.std - ~DataFrame.sub - ~DataFrame.subtract - ~DataFrame.sum - ~DataFrame.swapaxes - ~DataFrame.swaplevel - ~DataFrame.tail - ~DataFrame.take - ~DataFrame.to_clipboard - ~DataFrame.to_csv - ~DataFrame.to_dict - ~DataFrame.to_excel - ~DataFrame.to_feather - ~DataFrame.to_gbq - ~DataFrame.to_hdf - ~DataFrame.to_html - ~DataFrame.to_json - ~DataFrame.to_latex - ~DataFrame.to_markdown - ~DataFrame.to_numpy - ~DataFrame.to_parquet - ~DataFrame.to_period - ~DataFrame.to_pickle - ~DataFrame.to_records - ~DataFrame.to_sql - ~DataFrame.to_stata - ~DataFrame.to_string - ~DataFrame.to_timestamp - ~DataFrame.to_xarray - ~DataFrame.transform - ~DataFrame.transpose - ~DataFrame.truediv - ~DataFrame.truncate - ~DataFrame.tshift - ~DataFrame.tz_convert - ~DataFrame.tz_localize - ~DataFrame.unstack - ~DataFrame.update - ~DataFrame.value_counts - ~DataFrame.var - ~DataFrame.where - ~DataFrame.xs - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DataFrame.T - ~DataFrame.at - ~DataFrame.attrs - ~DataFrame.axes - ~DataFrame.columns - ~DataFrame.dtypes - ~DataFrame.empty - ~DataFrame.flags - ~DataFrame.iat - ~DataFrame.iloc - ~DataFrame.index - ~DataFrame.loc - ~DataFrame.ndim - ~DataFrame.shape - ~DataFrame.size - ~DataFrame.style - ~DataFrame.values - - \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rsub.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rsub.rst deleted file mode 100644 index 538784ad78a..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rsub.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rsub -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rsub \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.rtruediv.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.rtruediv.rst deleted file mode 100644 index 7e6dc6bdedb..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.rtruediv.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.rtruediv -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.rtruediv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sample.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sample.rst deleted file mode 100644 index 104b6ae398e..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sample.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sample -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.sample \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.select_dtypes.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.select_dtypes.rst deleted file mode 100644 index 6ea142593eb..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.select_dtypes.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.select\_dtypes -=============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.select_dtypes \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sem.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sem.rst deleted file mode 100644 index b6ec27afa65..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sem.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sem -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.sem \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.set_axis.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.set_axis.rst deleted file mode 100644 index dabbcacdcaf..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.set_axis.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.set\_axis -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.set_axis \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.set_flags.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.set_flags.rst deleted file mode 100644 index 2e90e358eed..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.set_flags.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.set\_flags -=========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.set_flags \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.set_index.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.set_index.rst deleted file mode 100644 index 8dc12d1125b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.set_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.set\_index -=========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.set_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.shape.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.shape.rst deleted file mode 100644 index 22b2212eb5b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.shape.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.shape -====================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.shape \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.shift.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.shift.rst deleted file mode 100644 index 1d1ca622569..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.shift.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.shift -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.shift \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.size.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.size.rst deleted file mode 100644 index ee479c487b0..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.size.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.size -===================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.size \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.skew.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.skew.rst deleted file mode 100644 index 4d287ba5ba8..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.skew.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.skew -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.skew \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.slice_shift.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.slice_shift.rst deleted file mode 100644 index 30b2be55fd3..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.slice_shift.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.slice\_shift -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.slice_shift \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sort_index.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sort_index.rst deleted file mode 100644 index 1a59868330d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sort_index.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sort\_index -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.sort_index \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sort_values.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sort_values.rst deleted file mode 100644 index a95b0c64579..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sort_values.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sort\_values -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.sort_values \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.density.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.density.rst deleted file mode 100644 index 7daa26c004f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.density.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sparse.density -=============================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.sparse.density \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.from_spmatrix.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.from_spmatrix.rst deleted file mode 100644 index b4412b02515..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.from_spmatrix.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sparse.from\_spmatrix -====================================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.sparse.from_spmatrix \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_coo.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_coo.rst deleted file mode 100644 index 3cb20c11b67..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_coo.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sparse.to\_coo -=============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.sparse.to_coo \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_dense.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_dense.rst deleted file mode 100644 index 01b838efee3..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sparse.to_dense.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sparse.to\_dense -================================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.sparse.to_dense \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.squeeze.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.squeeze.rst deleted file mode 100644 index 97ec63aec21..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.squeeze.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.squeeze -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.squeeze \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.stack.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.stack.rst deleted file mode 100644 index cea7889a321..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.stack.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.stack -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.stack \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.std.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.std.rst deleted file mode 100644 index 0d9de90c84b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.std.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.std -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.std \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.style.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.style.rst deleted file mode 100644 index 4af3dc86463..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.style.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.style -====================== - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.style \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sub.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sub.rst deleted file mode 100644 index 4f32aeb329c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sub.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sub -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.sub \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.sum.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.sum.rst deleted file mode 100644 index f94fd634f42..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.sum.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.sum -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.sum \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.swapaxes.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.swapaxes.rst deleted file mode 100644 index b471f3e345d..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.swapaxes.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.swapaxes -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.swapaxes \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.swaplevel.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.swaplevel.rst deleted file mode 100644 index f0047b19093..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.swaplevel.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.swaplevel -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.swaplevel \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.tail.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.tail.rst deleted file mode 100644 index eee1eed4bad..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.tail.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.tail -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.tail \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.take.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.take.rst deleted file mode 100644 index b648ce8dfb8..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.take.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.take -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.take \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_clipboard.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_clipboard.rst deleted file mode 100644 index 52339fccf95..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_clipboard.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_clipboard -============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_clipboard \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_csv.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_csv.rst deleted file mode 100644 index 69a5452fe2c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_csv.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_csv -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_csv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_dict.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_dict.rst deleted file mode 100644 index 63aeda88e0c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_dict.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_dict -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_dict \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_excel.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_excel.rst deleted file mode 100644 index 1cc49c2bfba..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_excel.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_excel -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_excel \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_feather.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_feather.rst deleted file mode 100644 index 808e0546e49..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_feather.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_feather -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_feather \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_gbq.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_gbq.rst deleted file mode 100644 index 5f4a84fe48c..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_gbq.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_gbq -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_gbq \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_hdf.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_hdf.rst deleted file mode 100644 index 87b471c175e..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_hdf.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_hdf -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_hdf \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_html.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_html.rst deleted file mode 100644 index 361ea3cceac..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_html.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_html -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_html \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_json.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_json.rst deleted file mode 100644 index 08914be4613..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_json.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_json -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_json \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_latex.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_latex.rst deleted file mode 100644 index 119cd0891fa..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_latex.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_latex -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_latex \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_markdown.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_markdown.rst deleted file mode 100644 index a991c9278ed..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_markdown.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_markdown -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_markdown \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_parquet.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_parquet.rst deleted file mode 100644 index 2a48e018775..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_parquet.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_parquet -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_parquet \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_period.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_period.rst deleted file mode 100644 index f2607f0751f..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_period.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_period -=========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_period \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_pickle.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_pickle.rst deleted file mode 100644 index 58bcbdfec1b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_pickle.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_pickle -=========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_pickle \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_records.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_records.rst deleted file mode 100644 index 7219f0840cb..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_records.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_records -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_records \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_sql.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_sql.rst deleted file mode 100644 index 2d5f6ec7422..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_sql.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_sql -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_sql \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_stata.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_stata.rst deleted file mode 100644 index 5129b57990b..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_stata.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_stata -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_stata \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_string.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_string.rst deleted file mode 100644 index 04cc550ebde..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_string.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_string -=========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_string \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_timestamp.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_timestamp.rst deleted file mode 100644 index a778cd29fe8..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_timestamp.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_timestamp -============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_timestamp \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_xarray.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.to_xarray.rst deleted file mode 100644 index fb3796e7031..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.to_xarray.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.to\_xarray -=========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.to_xarray \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.transform.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.transform.rst deleted file mode 100644 index 05970513c14..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.transform.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.transform -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.transform \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.transpose.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.transpose.rst deleted file mode 100644 index 6a04dd4d1b4..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.transpose.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.transpose -========================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.transpose \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.truediv.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.truediv.rst deleted file mode 100644 index 14b855fee38..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.truediv.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.truediv -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.truediv \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.truncate.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.truncate.rst deleted file mode 100644 index 9de5770da97..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.truncate.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.truncate -========================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.truncate \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.tshift.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.tshift.rst deleted file mode 100644 index 896ed9edbd4..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.tshift.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.tshift -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.tshift \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.tz_convert.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.tz_convert.rst deleted file mode 100644 index 0251fb97110..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.tz_convert.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.tz\_convert -============================ - -.. currentmodule:: pandas - -.. automethod:: DataFrame.tz_convert \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.tz_localize.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.tz_localize.rst deleted file mode 100644 index 3d354311388..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.tz_localize.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.tz\_localize -============================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.tz_localize \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.unstack.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.unstack.rst deleted file mode 100644 index bffe7398b91..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.unstack.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.unstack -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.unstack \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.update.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.update.rst deleted file mode 100644 index 08c7963cd81..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.update.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.update -======================= - -.. currentmodule:: pandas - -.. automethod:: DataFrame.update \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.value_counts.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.value_counts.rst deleted file mode 100644 index 79f2f43a032..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.value_counts.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.value\_counts -============================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.value_counts \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.values.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.values.rst deleted file mode 100644 index d7e2e4f54d6..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.values.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.values -======================= - -.. currentmodule:: pandas - -.. autoproperty:: DataFrame.values \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.var.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.var.rst deleted file mode 100644 index 0b8e97e1c03..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.var.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.var -==================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.var \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.where.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.where.rst deleted file mode 100644 index 4135b4bb2fa..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.where.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.where -====================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.where \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.DataFrame.xs.rst b/docs/cudf/source/api_docs/api/pandas.DataFrame.xs.rst deleted file mode 100644 index 78007c290d8..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.DataFrame.xs.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.xs -=================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.xs \ No newline at end of file diff --git a/docs/cudf/source/api_docs/api/pandas.Flags.rst b/docs/cudf/source/api_docs/api/pandas.Flags.rst deleted file mode 100644 index a7d844cb73e..00000000000 --- a/docs/cudf/source/api_docs/api/pandas.Flags.rst +++ /dev/null @@ -1,28 +0,0 @@ -pandas.Flags -============ - -.. currentmodule:: pandas - -.. autoclass:: Flags - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~Flags.__init__ - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~Flags.allows_duplicate_labels - - \ No newline at end of file From f8139541ddc25d495e874abc59d6d4a60a1f36ce Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 14 Jul 2021 16:50:15 -0700 Subject: [PATCH 08/49] remove markdown --- docs/cudf/source/api_docs/generated/pandas.Series.array.rst | 6 ------ docs/cudf/source/conf.py | 3 +-- docs/cudf/source/pandas.DataFrame.drop.rst | 6 ------ docs/cudf/source/pandas.DataFrame.groupby.rst | 6 ------ 4 files changed, 1 insertion(+), 20 deletions(-) delete mode 100644 docs/cudf/source/api_docs/generated/pandas.Series.array.rst delete mode 100644 docs/cudf/source/pandas.DataFrame.drop.rst delete mode 100644 docs/cudf/source/pandas.DataFrame.groupby.rst diff --git a/docs/cudf/source/api_docs/generated/pandas.Series.array.rst b/docs/cudf/source/api_docs/generated/pandas.Series.array.rst deleted file mode 100644 index e0954c01d1a..00000000000 --- a/docs/cudf/source/api_docs/generated/pandas.Series.array.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.Series.array -=================== - -.. currentmodule:: pandas - -.. autoproperty:: Series.array \ No newline at end of file diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index efc500bc89e..724b5c32a78 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -42,7 +42,6 @@ "sphinx.ext.autosummary", "sphinx_copybutton", "numpydoc", - "sphinx_markdown_tables", "IPython.sphinxext.ipython_console_highlighting", "IPython.sphinxext.ipython_directive", "nbsphinx", @@ -60,7 +59,7 @@ # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = {".rst": "restructuredtext", ".md": "markdown"} +source_suffix = {".rst": "restructuredtext"} # The master toctree document. master_doc = "index" diff --git a/docs/cudf/source/pandas.DataFrame.drop.rst b/docs/cudf/source/pandas.DataFrame.drop.rst deleted file mode 100644 index 9ee305369db..00000000000 --- a/docs/cudf/source/pandas.DataFrame.drop.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.drop -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/pandas.DataFrame.groupby.rst b/docs/cudf/source/pandas.DataFrame.groupby.rst deleted file mode 100644 index f8872657308..00000000000 --- a/docs/cudf/source/pandas.DataFrame.groupby.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.groupby -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.groupby \ No newline at end of file From 871782080af7b1f9e7a36efdd8b382d44cbd9d54 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 14 Jul 2021 18:54:29 -0700 Subject: [PATCH 09/49] add more pages --- .../source/api_docs/general_functions.rst | 10 ++++ .../source/api_docs/general_utilities.rst | 8 ++++ docs/cudf/source/api_docs/groupby.rst | 48 +------------------ docs/cudf/source/api_docs/index.rst | 2 + 4 files changed, 22 insertions(+), 46 deletions(-) create mode 100644 docs/cudf/source/api_docs/general_functions.rst create mode 100644 docs/cudf/source/api_docs/general_utilities.rst diff --git a/docs/cudf/source/api_docs/general_functions.rst b/docs/cudf/source/api_docs/general_functions.rst new file mode 100644 index 00000000000..2cbc1e93df5 --- /dev/null +++ b/docs/cudf/source/api_docs/general_functions.rst @@ -0,0 +1,10 @@ +================= +General Functions +================= + +.. automodule:: cudf.core.reshape + :members: + +.. autofunction:: cudf.to_datetime + +.. autofunction:: cudf.to_numeric diff --git a/docs/cudf/source/api_docs/general_utilities.rst b/docs/cudf/source/api_docs/general_utilities.rst new file mode 100644 index 00000000000..6f50c70498b --- /dev/null +++ b/docs/cudf/source/api_docs/general_utilities.rst @@ -0,0 +1,8 @@ +================= +General Utilities +================= + +.. currentmodule:: cudf.testing + +.. automodule:: cudf.testing.testing + :members: diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst index 8f383b23d67..3dcd7a9f014 100644 --- a/docs/cudf/source/api_docs/groupby.rst +++ b/docs/cudf/source/api_docs/groupby.rst @@ -14,7 +14,6 @@ Indexing, iteration GroupBy.__iter__ GroupBy.groups - GroupBy.get_group .. currentmodule:: cudf @@ -23,7 +22,7 @@ Indexing, iteration Grouper -.. currentmodule:: cudf.core.groupby +.. currentmodule:: cudf.core.groupby.groupby Function application -------------------- @@ -34,8 +33,6 @@ Function application GroupBy.agg SeriesGroupBy.aggregate DataFrameGroupBy.aggregate - SeriesGroupBy.transform - DataFrameGroupBy.transform GroupBy.pipe Computations / descriptive stats @@ -43,38 +40,26 @@ Computations / descriptive stats .. autosummary:: :toctree: api/ - GroupBy.all - GroupBy.any GroupBy.bfill GroupBy.backfill GroupBy.count GroupBy.cumcount GroupBy.cummax GroupBy.cummin - GroupBy.cumprod GroupBy.cumsum GroupBy.ffill - GroupBy.first - GroupBy.head - GroupBy.last GroupBy.max GroupBy.mean GroupBy.median GroupBy.min - GroupBy.ngroup GroupBy.nth - GroupBy.ohlc GroupBy.pad GroupBy.prod - GroupBy.rank - GroupBy.pct_change GroupBy.size - GroupBy.sem GroupBy.std GroupBy.sum GroupBy.var - GroupBy.tail - + The following methods are available in both ``SeriesGroupBy`` and ``DataFrameGroupBy`` objects, but may differ slightly, usually in that the ``DataFrameGroupBy`` version usually permits the specification of an @@ -84,59 +69,30 @@ application to columns of a specific data type. .. autosummary:: :toctree: api/ - DataFrameGroupBy.all - DataFrameGroupBy.any DataFrameGroupBy.backfill DataFrameGroupBy.bfill - DataFrameGroupBy.corr DataFrameGroupBy.count - DataFrameGroupBy.cov DataFrameGroupBy.cumcount DataFrameGroupBy.cummax DataFrameGroupBy.cummin - DataFrameGroupBy.cumprod DataFrameGroupBy.cumsum DataFrameGroupBy.describe - DataFrameGroupBy.diff DataFrameGroupBy.ffill DataFrameGroupBy.fillna DataFrameGroupBy.filter DataFrameGroupBy.hist DataFrameGroupBy.idxmax DataFrameGroupBy.idxmin - DataFrameGroupBy.mad DataFrameGroupBy.nunique DataFrameGroupBy.pad - DataFrameGroupBy.pct_change - DataFrameGroupBy.plot DataFrameGroupBy.quantile - DataFrameGroupBy.rank - DataFrameGroupBy.resample - DataFrameGroupBy.sample DataFrameGroupBy.shift DataFrameGroupBy.size - DataFrameGroupBy.skew - DataFrameGroupBy.take - DataFrameGroupBy.tshift The following methods are available only for ``SeriesGroupBy`` objects. .. autosummary:: :toctree: api/ - SeriesGroupBy.hist - SeriesGroupBy.nlargest - SeriesGroupBy.nsmallest SeriesGroupBy.nunique SeriesGroupBy.unique - SeriesGroupBy.value_counts - SeriesGroupBy.is_monotonic_increasing - SeriesGroupBy.is_monotonic_decreasing - -The following methods are available only for ``DataFrameGroupBy`` objects. - -.. autosummary:: - :toctree: api/ - - DataFrameGroupBy.corrwith - DataFrameGroupBy.boxplot \ No newline at end of file diff --git a/docs/cudf/source/api_docs/index.rst b/docs/cudf/source/api_docs/index.rst index fedec8e9124..41aa6288887 100644 --- a/docs/cudf/source/api_docs/index.rst +++ b/docs/cudf/source/api_docs/index.rst @@ -14,4 +14,6 @@ the left sidebar to see how various elements look on this theme. dataframe index_objects groupby + general_functions + general_utilities From ff5ef1d8e36cc2b1d889d6070449d32eb54c553c Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 11:02:16 -0700 Subject: [PATCH 10/49] remove strike through copy button --- docs/cudf/source/_static/copybutton_pydocs.js | 65 ------------------- docs/cudf/source/_static/params.css | 8 --- docs/cudf/source/conf.py | 1 - 3 files changed, 74 deletions(-) delete mode 100644 docs/cudf/source/_static/copybutton_pydocs.js diff --git a/docs/cudf/source/_static/copybutton_pydocs.js b/docs/cudf/source/_static/copybutton_pydocs.js deleted file mode 100644 index cec05777e6b..00000000000 --- a/docs/cudf/source/_static/copybutton_pydocs.js +++ /dev/null @@ -1,65 +0,0 @@ -$(document).ready(function() { - /* Add a [>>>] button on the top-right corner of code samples to hide - * the >>> and ... prompts and the output and thus make the code - * copyable. */ - var div = $('.highlight-python .highlight,' + - '.highlight-python3 .highlight,' + - '.highlight-pycon .highlight,' + - '.highlight-default .highlight'); - var pre = div.find('pre'); - - // get the styles from the current theme - pre.parent().parent().css('position', 'relative'); - var hide_text = 'Hide the prompts and output'; - var show_text = 'Show the prompts and output'; - var border_width = pre.css('border-top-width'); - var border_style = pre.css('border-top-style'); - var border_color = pre.css('border-top-color'); - var button_styles = { - 'cursor':'pointer', 'position': 'absolute', 'top': '0', 'right': '0', - 'border-color': border_color, 'border-style': border_style, - 'border-width': border_width, 'text-size': '75%', - 'font-family': 'monospace', 'padding-left': '0.2em', 'padding-right': '1.5em', - 'border-radius': '0 3px 0 0', - 'transition': "0.5s" - } - - // create and add the button to all the code blocks that contain >>> - div.each(function(index) { - var jthis = $(this); - if (jthis.find('.gp').length > 0) { - var button = $('>>>'); - button.css(button_styles) - button.attr('title', hide_text); - button.data('hidden', 'false'); - jthis.prepend(button); - } - // tracebacks (.gt) contain bare text elements that need to be - // wrapped in a span to work with .nextUntil() (see later) - jthis.find('pre:has(.gt)').contents().filter(function() { - return ((this.nodeType == 3) && (this.data.trim().length > 0)); - }).wrap(''); - }); - - // define the behavior of the button when it's clicked - $('.copybutton').click(function(e){ - e.preventDefault(); - var button = $(this); - if (button.data('hidden') === 'false') { - // hide the code output - button.parent().find('.go, .gp, .gt').hide(); - button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden'); - button.css('text-decoration', 'line-through'); - button.attr('title', show_text); - button.data('hidden', 'true'); - } else { - // show the code output - button.parent().find('.go, .gp, .gt').show(); - button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible'); - button.css('text-decoration', 'none'); - button.attr('title', hide_text); - button.data('hidden', 'false'); - } - }); -}); - diff --git a/docs/cudf/source/_static/params.css b/docs/cudf/source/_static/params.css index 50a0e8bea3b..fd41184618b 100644 --- a/docs/cudf/source/_static/params.css +++ b/docs/cudf/source/_static/params.css @@ -8,14 +8,6 @@ content: ":"; } -.highlight:hover span#strike_button { - color:#767676; -} - -span#strike_button { - color :#d0ced7; -} - /* Fix for text wrap in sphinx tables: * https://rackerlabs.github.io/docs-rackspace/tools/rtd-tables.html */ diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 724b5c32a78..43ba83d8c46 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -242,7 +242,6 @@ def ignore_internal_references(app, env, node, contnode): def setup(app): - app.add_js_file("copybutton_pydocs.js") app.add_css_file("params.css") app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") app.connect("doctree-read", resolve_aliases) From ae7d8f0cdc8c1af49c9a282166fb2d9476dd74d6 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 11:08:46 -0700 Subject: [PATCH 11/49] cleanup --- docs/cudf/source/api_docs/cudf.DataFrame.drop.rst | 6 ------ docs/cudf/source/api_docs/cudf.DataFrame.where.rst | 6 ------ docs/cudf/source/api_docs/pandas.DataFrame.drop.rst | 6 ------ docs/cudf/source/api_docs/pandas.DataFrame.groupby.rst | 6 ------ docs/cudf/source/cudf.DataFrame.drop.rst | 6 ------ docs/cudf/source/cudf.DataFrame.where.rst | 6 ------ docs/cudf/source/index.rst | 2 ++ 7 files changed, 2 insertions(+), 36 deletions(-) delete mode 100644 docs/cudf/source/api_docs/cudf.DataFrame.drop.rst delete mode 100644 docs/cudf/source/api_docs/cudf.DataFrame.where.rst delete mode 100644 docs/cudf/source/api_docs/pandas.DataFrame.drop.rst delete mode 100644 docs/cudf/source/api_docs/pandas.DataFrame.groupby.rst delete mode 100644 docs/cudf/source/cudf.DataFrame.drop.rst delete mode 100644 docs/cudf/source/cudf.DataFrame.where.rst diff --git a/docs/cudf/source/api_docs/cudf.DataFrame.drop.rst b/docs/cudf/source/api_docs/cudf.DataFrame.drop.rst deleted file mode 100644 index 6d46566674d..00000000000 --- a/docs/cudf/source/api_docs/cudf.DataFrame.drop.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.drop -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/api_docs/cudf.DataFrame.where.rst b/docs/cudf/source/api_docs/cudf.DataFrame.where.rst deleted file mode 100644 index c2035bf11b0..00000000000 --- a/docs/cudf/source/api_docs/cudf.DataFrame.where.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.where -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.where \ No newline at end of file diff --git a/docs/cudf/source/api_docs/pandas.DataFrame.drop.rst b/docs/cudf/source/api_docs/pandas.DataFrame.drop.rst deleted file mode 100644 index 9ee305369db..00000000000 --- a/docs/cudf/source/api_docs/pandas.DataFrame.drop.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.drop -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/api_docs/pandas.DataFrame.groupby.rst b/docs/cudf/source/api_docs/pandas.DataFrame.groupby.rst deleted file mode 100644 index f8872657308..00000000000 --- a/docs/cudf/source/api_docs/pandas.DataFrame.groupby.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.groupby -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.groupby \ No newline at end of file diff --git a/docs/cudf/source/cudf.DataFrame.drop.rst b/docs/cudf/source/cudf.DataFrame.drop.rst deleted file mode 100644 index 6d46566674d..00000000000 --- a/docs/cudf/source/cudf.DataFrame.drop.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.drop -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/cudf.DataFrame.where.rst b/docs/cudf/source/cudf.DataFrame.where.rst deleted file mode 100644 index c2035bf11b0..00000000000 --- a/docs/cudf/source/cudf.DataFrame.where.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.where -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.where \ No newline at end of file diff --git a/docs/cudf/source/index.rst b/docs/cudf/source/index.rst index 950694f69a7..1201e9e7ed3 100644 --- a/docs/cudf/source/index.rst +++ b/docs/cudf/source/index.rst @@ -10,6 +10,8 @@ Welcome to cuDF's documentation! user_guide/index basics/index api_docs/index + dask-cudf.rst + internals.rst Indices and tables From 258d9bbc4be7902b20399d72c43c37f0d429d236 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 11:11:04 -0700 Subject: [PATCH 12/49] remove files --- .../api/cudf.core.dataframe.DataFrame.rst | 261 ------------------ .../api/cudf.core.dataframe.extract_col.rst | 6 - .../api/cudf.core.dataframe.from_pandas.rst | 6 - .../source/api/cudf.core.dataframe.merge.rst | 6 - 4 files changed, 279 deletions(-) delete mode 100644 docs/cudf/source/api/cudf.core.dataframe.DataFrame.rst delete mode 100644 docs/cudf/source/api/cudf.core.dataframe.extract_col.rst delete mode 100644 docs/cudf/source/api/cudf.core.dataframe.from_pandas.rst delete mode 100644 docs/cudf/source/api/cudf.core.dataframe.merge.rst diff --git a/docs/cudf/source/api/cudf.core.dataframe.DataFrame.rst b/docs/cudf/source/api/cudf.core.dataframe.DataFrame.rst deleted file mode 100644 index 15108d48ca1..00000000000 --- a/docs/cudf/source/api/cudf.core.dataframe.DataFrame.rst +++ /dev/null @@ -1,261 +0,0 @@ -DataFrame -========= - -.. currentmodule:: cudf.core.dataframe - -.. autoclass:: DataFrame - :show-inheritance: - - .. rubric:: Attributes Summary - - .. autosummary:: - - ~DataFrame.T - ~DataFrame.at - ~DataFrame.columns - ~DataFrame.dtypes - ~DataFrame.iat - ~DataFrame.iloc - ~DataFrame.index - ~DataFrame.loc - ~DataFrame.ndim - ~DataFrame.shape - ~DataFrame.values - - .. rubric:: Methods Summary - - .. autosummary:: - - ~DataFrame.add - ~DataFrame.agg - ~DataFrame.all - ~DataFrame.any - ~DataFrame.append - ~DataFrame.apply_chunks - ~DataFrame.apply_rows - ~DataFrame.argsort - ~DataFrame.as_gpu_matrix - ~DataFrame.as_matrix - ~DataFrame.assign - ~DataFrame.astype - ~DataFrame.corr - ~DataFrame.count - ~DataFrame.cov - ~DataFrame.cummax - ~DataFrame.cummin - ~DataFrame.cumprod - ~DataFrame.cumsum - ~DataFrame.describe - ~DataFrame.deserialize - ~DataFrame.div - ~DataFrame.drop - ~DataFrame.drop_duplicates - ~DataFrame.equals - ~DataFrame.explode - ~DataFrame.floordiv - ~DataFrame.from_arrow - ~DataFrame.from_pandas - ~DataFrame.from_records - ~DataFrame.groupby - ~DataFrame.hash_columns - ~DataFrame.head - ~DataFrame.info - ~DataFrame.insert - ~DataFrame.isin - ~DataFrame.iteritems - ~DataFrame.iterrows - ~DataFrame.itertuples - ~DataFrame.join - ~DataFrame.keys - ~DataFrame.kurt - ~DataFrame.kurtosis - ~DataFrame.label_encoding - ~DataFrame.max - ~DataFrame.mean - ~DataFrame.melt - ~DataFrame.memory_usage - ~DataFrame.merge - ~DataFrame.min - ~DataFrame.mod - ~DataFrame.mode - ~DataFrame.mul - ~DataFrame.nans_to_nulls - ~DataFrame.nlargest - ~DataFrame.nsmallest - ~DataFrame.one_hot_encoding - ~DataFrame.partition_by_hash - ~DataFrame.pivot - ~DataFrame.pop - ~DataFrame.pow - ~DataFrame.prod - ~DataFrame.product - ~DataFrame.quantile - ~DataFrame.quantiles - ~DataFrame.query - ~DataFrame.radd - ~DataFrame.rdiv - ~DataFrame.reindex - ~DataFrame.rename - ~DataFrame.replace - ~DataFrame.reset_index - ~DataFrame.rfloordiv - ~DataFrame.rmod - ~DataFrame.rmul - ~DataFrame.rolling - ~DataFrame.rpow - ~DataFrame.rsub - ~DataFrame.rtruediv - ~DataFrame.select_dtypes - ~DataFrame.serialize - ~DataFrame.set_index - ~DataFrame.skew - ~DataFrame.sort_index - ~DataFrame.sort_values - ~DataFrame.stack - ~DataFrame.std - ~DataFrame.sub - ~DataFrame.sum - ~DataFrame.tail - ~DataFrame.take - ~DataFrame.to_arrow - ~DataFrame.to_csv - ~DataFrame.to_dict - ~DataFrame.to_dlpack - ~DataFrame.to_feather - ~DataFrame.to_hdf - ~DataFrame.to_json - ~DataFrame.to_orc - ~DataFrame.to_pandas - ~DataFrame.to_parquet - ~DataFrame.to_records - ~DataFrame.to_string - ~DataFrame.transpose - ~DataFrame.truediv - ~DataFrame.unstack - ~DataFrame.update - ~DataFrame.var - - .. rubric:: Attributes Documentation - - .. autoattribute:: T - .. autoattribute:: at - .. autoattribute:: columns - .. autoattribute:: dtypes - .. autoattribute:: iat - .. autoattribute:: iloc - .. autoattribute:: index - .. autoattribute:: loc - .. autoattribute:: ndim - .. autoattribute:: shape - .. autoattribute:: values - - .. rubric:: Methods Documentation - - .. automethod:: add - .. automethod:: agg - .. automethod:: all - .. automethod:: any - .. automethod:: append - .. automethod:: apply_chunks - .. automethod:: apply_rows - .. automethod:: argsort - .. automethod:: as_gpu_matrix - .. automethod:: as_matrix - .. automethod:: assign - .. automethod:: astype - .. automethod:: corr - .. automethod:: count - .. automethod:: cov - .. automethod:: cummax - .. automethod:: cummin - .. automethod:: cumprod - .. automethod:: cumsum - .. automethod:: describe - .. automethod:: deserialize - .. automethod:: div - .. automethod:: drop - .. automethod:: drop_duplicates - .. automethod:: equals - .. automethod:: explode - .. automethod:: floordiv - .. automethod:: from_arrow - .. automethod:: from_pandas - .. automethod:: from_records - .. automethod:: groupby - .. automethod:: hash_columns - .. automethod:: head - .. automethod:: info - .. automethod:: insert - .. automethod:: isin - .. automethod:: iteritems - .. automethod:: iterrows - .. automethod:: itertuples - .. automethod:: join - .. automethod:: keys - .. automethod:: kurt - .. automethod:: kurtosis - .. automethod:: label_encoding - .. automethod:: max - .. automethod:: mean - .. automethod:: melt - .. automethod:: memory_usage - .. automethod:: merge - .. automethod:: min - .. automethod:: mod - .. automethod:: mode - .. automethod:: mul - .. automethod:: nans_to_nulls - .. automethod:: nlargest - .. automethod:: nsmallest - .. automethod:: one_hot_encoding - .. automethod:: partition_by_hash - .. automethod:: pivot - .. automethod:: pop - .. automethod:: pow - .. automethod:: prod - .. automethod:: product - .. automethod:: quantile - .. automethod:: quantiles - .. automethod:: query - .. automethod:: radd - .. automethod:: rdiv - .. automethod:: reindex - .. automethod:: rename - .. automethod:: replace - .. automethod:: reset_index - .. automethod:: rfloordiv - .. automethod:: rmod - .. automethod:: rmul - .. automethod:: rolling - .. automethod:: rpow - .. automethod:: rsub - .. automethod:: rtruediv - .. automethod:: select_dtypes - .. automethod:: serialize - .. automethod:: set_index - .. automethod:: skew - .. automethod:: sort_index - .. automethod:: sort_values - .. automethod:: stack - .. automethod:: std - .. automethod:: sub - .. automethod:: sum - .. automethod:: tail - .. automethod:: take - .. automethod:: to_arrow - .. automethod:: to_csv - .. automethod:: to_dict - .. automethod:: to_dlpack - .. automethod:: to_feather - .. automethod:: to_hdf - .. automethod:: to_json - .. automethod:: to_orc - .. automethod:: to_pandas - .. automethod:: to_parquet - .. automethod:: to_records - .. automethod:: to_string - .. automethod:: transpose - .. automethod:: truediv - .. automethod:: unstack - .. automethod:: update - .. automethod:: var diff --git a/docs/cudf/source/api/cudf.core.dataframe.extract_col.rst b/docs/cudf/source/api/cudf.core.dataframe.extract_col.rst deleted file mode 100644 index 345172b7445..00000000000 --- a/docs/cudf/source/api/cudf.core.dataframe.extract_col.rst +++ /dev/null @@ -1,6 +0,0 @@ -extract_col -=========== - -.. currentmodule:: cudf.core.dataframe - -.. autofunction:: extract_col diff --git a/docs/cudf/source/api/cudf.core.dataframe.from_pandas.rst b/docs/cudf/source/api/cudf.core.dataframe.from_pandas.rst deleted file mode 100644 index 620427315d0..00000000000 --- a/docs/cudf/source/api/cudf.core.dataframe.from_pandas.rst +++ /dev/null @@ -1,6 +0,0 @@ -from_pandas -=========== - -.. currentmodule:: cudf.core.dataframe - -.. autofunction:: from_pandas diff --git a/docs/cudf/source/api/cudf.core.dataframe.merge.rst b/docs/cudf/source/api/cudf.core.dataframe.merge.rst deleted file mode 100644 index 8b4ad51b3e8..00000000000 --- a/docs/cudf/source/api/cudf.core.dataframe.merge.rst +++ /dev/null @@ -1,6 +0,0 @@ -merge -===== - -.. currentmodule:: cudf.core.dataframe - -.. autofunction:: merge From 53af52f383e8961cc5f8309997931e92c4e4306d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 11:11:43 -0700 Subject: [PATCH 13/49] cleanup --- docs/cudf/source/_static/params.css | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/cudf/source/_static/params.css b/docs/cudf/source/_static/params.css index fd41184618b..4dc2d54ddb2 100644 --- a/docs/cudf/source/_static/params.css +++ b/docs/cudf/source/_static/params.css @@ -48,6 +48,4 @@ table.io-supported-types-table thead{ --pst-color-toc-link-hover: var(--pst-color-active-navigation); --pst-color-toc-link-active: var(--pst-color-active-navigation); - - -} \ No newline at end of file +} From 002e9d4fe6a9e1c501c649b14a058d9e8a51cfb9 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 11:14:30 -0700 Subject: [PATCH 14/49] cleanup --- docs/cudf/source/generated/cudf.DataFrame.drop.rst | 6 ------ docs/cudf/source/generated/cudf.DataFrame.where.rst | 6 ------ docs/cudf/source/generated/pandas.DataFrame.drop.rst | 6 ------ docs/cudf/source/generated/pandas.DataFrame.groupby.rst | 6 ------ docs/cudf/source/generated/pandas.Series.array.rst | 6 ------ 5 files changed, 30 deletions(-) delete mode 100644 docs/cudf/source/generated/cudf.DataFrame.drop.rst delete mode 100644 docs/cudf/source/generated/cudf.DataFrame.where.rst delete mode 100644 docs/cudf/source/generated/pandas.DataFrame.drop.rst delete mode 100644 docs/cudf/source/generated/pandas.DataFrame.groupby.rst delete mode 100644 docs/cudf/source/generated/pandas.Series.array.rst diff --git a/docs/cudf/source/generated/cudf.DataFrame.drop.rst b/docs/cudf/source/generated/cudf.DataFrame.drop.rst deleted file mode 100644 index 6d46566674d..00000000000 --- a/docs/cudf/source/generated/cudf.DataFrame.drop.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.drop -=================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/generated/cudf.DataFrame.where.rst b/docs/cudf/source/generated/cudf.DataFrame.where.rst deleted file mode 100644 index c2035bf11b0..00000000000 --- a/docs/cudf/source/generated/cudf.DataFrame.where.rst +++ /dev/null @@ -1,6 +0,0 @@ -cudf.DataFrame.where -==================== - -.. currentmodule:: cudf - -.. automethod:: DataFrame.where \ No newline at end of file diff --git a/docs/cudf/source/generated/pandas.DataFrame.drop.rst b/docs/cudf/source/generated/pandas.DataFrame.drop.rst deleted file mode 100644 index 9ee305369db..00000000000 --- a/docs/cudf/source/generated/pandas.DataFrame.drop.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.drop -===================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.drop \ No newline at end of file diff --git a/docs/cudf/source/generated/pandas.DataFrame.groupby.rst b/docs/cudf/source/generated/pandas.DataFrame.groupby.rst deleted file mode 100644 index f8872657308..00000000000 --- a/docs/cudf/source/generated/pandas.DataFrame.groupby.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.DataFrame.groupby -======================== - -.. currentmodule:: pandas - -.. automethod:: DataFrame.groupby \ No newline at end of file diff --git a/docs/cudf/source/generated/pandas.Series.array.rst b/docs/cudf/source/generated/pandas.Series.array.rst deleted file mode 100644 index e0954c01d1a..00000000000 --- a/docs/cudf/source/generated/pandas.Series.array.rst +++ /dev/null @@ -1,6 +0,0 @@ -pandas.Series.array -=================== - -.. currentmodule:: pandas - -.. autoproperty:: Series.array \ No newline at end of file From 2eeade6f29ab3febc5454e6b3997bda4ed48a036 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 11:25:43 -0700 Subject: [PATCH 15/49] add lists --- docs/cudf/source/api_docs/series.rst | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 947235356d9..663570991ba 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -212,7 +212,7 @@ Data Type Accessor Datetime, Timedelta :ref:`dt ` String :ref:`str ` Categorical :ref:`cat ` -Sparse :ref:`sparse ` +List :ref:`list ` =========================== ================================= .. _api.series.dt: @@ -396,6 +396,27 @@ the ``Series.cat`` accessor. as_unordered +.. _api.series.list: + +List handling +~~~~~~~~~~~~~ + +``Series.list`` can be used to access the values of the series as +lists and apply list methods to it. These can be accessed like +``Series.list.``. + +.. currentmodule:: cudf.core.column.lists.ListMethods +.. autosummary:: + :toctree: api/ + + concat + contains + get + len + sort_values + take + unique + Serialization / IO / conversion ------------------------------- From 69a6f11e219ecc5e22680420be94c1ae7360459a Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 11:27:56 -0700 Subject: [PATCH 16/49] update gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index b398cfc4f88..5372c539759 100644 --- a/.gitignore +++ b/.gitignore @@ -159,3 +159,7 @@ dask-worker-space/ # protobuf **/*_pb2.py + +# Sphinx docs & build artifacts +docs/cudf/source/api_docs/api +docs/cudf/source/api_docs/generated \ No newline at end of file From 1be68ab4607648cf88ea5335dc3507f8fe4b142e Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 11:47:19 -0700 Subject: [PATCH 17/49] remove stale file --- docs/cudf/source/api_docs/api.rst | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 docs/cudf/source/api_docs/api.rst diff --git a/docs/cudf/source/api_docs/api.rst b/docs/cudf/source/api_docs/api.rst deleted file mode 100644 index bc227364351..00000000000 --- a/docs/cudf/source/api_docs/api.rst +++ /dev/null @@ -1,22 +0,0 @@ -*************************************** -API documentation and generated content -*************************************** - -This page contains general code elements that are common -for package documentation. - -Autosummary table and API stub pages -==================================== - -.. autosummary:: - :toctree: - - pandas.DataFrame.drop - cudf.DataFrame.drop - pandas.DataFrame.groupby - cudf.DataFrame.where - -.. autosummary:: - :toctree: generated/ - - pandas.Series.array From 90255f3d68716f964b68e59a5415b700f8a6136f Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 11:48:15 -0700 Subject: [PATCH 18/49] update gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 5372c539759..fabec763545 100644 --- a/.gitignore +++ b/.gitignore @@ -161,5 +161,4 @@ dask-worker-space/ **/*_pb2.py # Sphinx docs & build artifacts -docs/cudf/source/api_docs/api docs/cudf/source/api_docs/generated \ No newline at end of file From d2da932949822c9f6a2beefeb1a967a09bdb88b0 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 12:27:21 -0700 Subject: [PATCH 19/49] move files --- conda/environments/cudf_dev_cuda11.0.yml | 1 - conda/environments/cudf_dev_cuda11.2.yml | 1 - docs/cudf/source/10min-cudf-cupy.ipynb | 1334 ---- docs/cudf/source/10min.ipynb | 6487 ----------------- .../source/Working-with-missing-data.ipynb | 3466 --------- docs/cudf/source/api.rst | 22 - docs/cudf/source/api_docs/window.rst | 108 + docs/cudf/source/basics.rst | 54 - docs/cudf/source/basics/dask-cudf.md | 78 - docs/cudf/source/{ => basics}/dask-cudf.rst | 0 docs/cudf/source/basics/index.rst | 7 +- docs/cudf/source/basics/internals.md | 194 - docs/cudf/source/{ => basics}/internals.rst | 0 .../{ => basics}/io-gds-integration.rst | 0 .../cudf/source/basics/io-supported-types.rst | 112 +- docs/cudf/source/basics/io.rst | 3 +- docs/cudf/source/conf.py | 20 +- docs/cudf/source/groupby.rst | 237 - docs/cudf/source/guide-to-udfs.ipynb | 1716 ----- docs/cudf/source/io-supported-types.rst | 66 - docs/cudf/source/io.rst | 12 - docs/cudf/source/user_guide/10min.ipynb | 2 +- docs/cudf/source/user_guide/groupby.md | 200 - docs/cudf/source/user_guide/index.rst | 2 +- .../cuda-11.0/dev_requirements.txt | 2 +- .../cuda-11.2/dev_requirements.txt | 2 +- 26 files changed, 185 insertions(+), 13941 deletions(-) delete mode 100644 docs/cudf/source/10min-cudf-cupy.ipynb delete mode 100644 docs/cudf/source/10min.ipynb delete mode 100644 docs/cudf/source/Working-with-missing-data.ipynb delete mode 100644 docs/cudf/source/api.rst create mode 100644 docs/cudf/source/api_docs/window.rst delete mode 100644 docs/cudf/source/basics.rst delete mode 100644 docs/cudf/source/basics/dask-cudf.md rename docs/cudf/source/{ => basics}/dask-cudf.rst (100%) delete mode 100644 docs/cudf/source/basics/internals.md rename docs/cudf/source/{ => basics}/internals.rst (100%) rename docs/cudf/source/{ => basics}/io-gds-integration.rst (100%) delete mode 100644 docs/cudf/source/groupby.rst delete mode 100644 docs/cudf/source/guide-to-udfs.ipynb delete mode 100644 docs/cudf/source/io-supported-types.rst delete mode 100644 docs/cudf/source/io.rst delete mode 100644 docs/cudf/source/user_guide/groupby.md diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index 9a509f3d5c4..fbfc0fb8f9c 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -26,7 +26,6 @@ dependencies: - pytest-benchmark - pytest-xdist - sphinx - - sphinx_rtd_theme - sphinxcontrib-websupport - nbsphinx - numpydoc diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index 2635f778d34..c5dab7c9d52 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -26,7 +26,6 @@ dependencies: - pytest-benchmark - pytest-xdist - sphinx - - sphinx_rtd_theme - sphinxcontrib-websupport - nbsphinx - numpydoc diff --git a/docs/cudf/source/10min-cudf-cupy.ipynb b/docs/cudf/source/10min-cudf-cupy.ipynb deleted file mode 100644 index 0985291f3c2..00000000000 --- a/docs/cudf/source/10min-cudf-cupy.ipynb +++ /dev/null @@ -1,1334 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 10 Minutes to cuDF and CuPy\n", - "\n", - "This notebook provides introductory examples of how you can use cuDF and CuPy together to take advantage of CuPy array functionality (such as advanced linear algebra operations)." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import timeit\n", - "\n", - "import cupy as cp\n", - "import cudf" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Converting a cuDF DataFrame to a CuPy Array\n", - "\n", - "If we want to convert a cuDF DataFrame to a CuPy ndarray, There are multiple ways to do it:\n", - "\n", - "1. We can use the [dlpack](https://github.com/dmlc/dlpack) interface.\n", - "\n", - "2. We can also use `DataFrame.values`.\n", - "\n", - "3. We can also convert via the [CUDA array interface](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) by using cuDF's `as_gpu_matrix` and CuPy's `asarray` functionality." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "44.1 µs ± 689 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", - "209 µs ± 2.77 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n", - "208 µs ± 3.14 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n" - ] - } - ], - "source": [ - "nelem = 10000\n", - "df = cudf.DataFrame({'a':range(nelem),\n", - " 'b':range(500, nelem + 500),\n", - " 'c':range(1000, nelem + 1000)}\n", - " )\n", - "\n", - "%timeit arr_cupy = cp.fromDlpack(df.to_dlpack())\n", - "%timeit arr_cupy = df.values\n", - "%timeit arr_cupy = cp.asarray(df.as_gpu_matrix())" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 0, 500, 1000],\n", - " [ 1, 501, 1001],\n", - " [ 2, 502, 1002],\n", - " ...,\n", - " [ 9997, 10497, 10997],\n", - " [ 9998, 10498, 10998],\n", - " [ 9999, 10499, 10999]])" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "arr_cupy = cp.fromDlpack(df.to_dlpack())\n", - "arr_cupy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Converting a cuDF Series to a CuPy Array" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "There are also multiple ways to convert a cuDF Series to a CuPy array:\n", - "\n", - "1. We can pass the Series to `cupy.asarray` as cuDF Series exposes [`__cuda_array_interface__`](https://docs-cupy.chainer.org/en/stable/reference/interoperability.html).\n", - "2. We can leverage the dlpack interface `to_dlpack()`. \n", - "3. We can also use `Series.values` \n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "22.1 µs ± 518 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", - "58.3 µs ± 647 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n", - "80.2 µs ± 647 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" - ] - } - ], - "source": [ - "col = 'a'\n", - "\n", - "%timeit cola_cupy = cp.asarray(df[col])\n", - "%timeit cola_cupy = cp.fromDlpack(df[col].to_dlpack())\n", - "%timeit cola_cupy = df[col].values" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0, 1, 2, ..., 9997, 9998, 9999])" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cola_cupy = cp.asarray(df[col])\n", - "cola_cupy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From here, we can proceed with normal CuPy workflows, such as reshaping the array, getting the diagonal, or calculating the norm." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 0, 1, 2, ..., 197, 198, 199],\n", - " [ 200, 201, 202, ..., 397, 398, 399],\n", - " [ 400, 401, 402, ..., 597, 598, 599],\n", - " ...,\n", - " [9400, 9401, 9402, ..., 9597, 9598, 9599],\n", - " [9600, 9601, 9602, ..., 9797, 9798, 9799],\n", - " [9800, 9801, 9802, ..., 9997, 9998, 9999]])" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "reshaped_arr = cola_cupy.reshape(50, 200)\n", - "reshaped_arr" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0, 201, 402, 603, 804, 1005, 1206, 1407, 1608, 1809, 2010,\n", - " 2211, 2412, 2613, 2814, 3015, 3216, 3417, 3618, 3819, 4020, 4221,\n", - " 4422, 4623, 4824, 5025, 5226, 5427, 5628, 5829, 6030, 6231, 6432,\n", - " 6633, 6834, 7035, 7236, 7437, 7638, 7839, 8040, 8241, 8442, 8643,\n", - " 8844, 9045, 9246, 9447, 9648, 9849])" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "reshaped_arr.diagonal()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(577306.967739)" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cp.linalg.norm(reshaped_arr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Converting a CuPy Array to a cuDF DataFrame\n", - "\n", - "We can also convert a CuPy ndarray to a cuDF DataFrame. Like before, there are multiple ways to do it:\n", - "\n", - "1. **Easiest;** We can directly use the `DataFrame` constructor.\n", - "\n", - "2. We can use CUDA array interface with the `DataFrame` constructor.\n", - "\n", - "3. We can also use the [dlpack](https://github.com/dmlc/dlpack) interface.\n", - "\n", - "For the latter two cases, we'll need to make sure that our CuPy array is Fortran contiguous in memory (if it's not already). We can either transpose the array or simply coerce it to be Fortran contiguous beforehand." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "13.1 ms ± 193 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" - ] - } - ], - "source": [ - "%timeit reshaped_df = cudf.DataFrame(reshaped_arr)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0123456789...190191192193194195196197198199
00123456789...190191192193194195196197198199
1200201202203204205206207208209...390391392393394395396397398399
2400401402403404405406407408409...590591592593594595596597598599
3600601602603604605606607608609...790791792793794795796797798799
4800801802803804805806807808809...990991992993994995996997998999
\n", - "

5 rows × 200 columns

\n", - "
" - ], - "text/plain": [ - " 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \\\n", - "0 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \n", - "1 200 201 202 203 204 205 206 207 208 209 ... 390 391 392 393 \n", - "2 400 401 402 403 404 405 406 407 408 409 ... 590 591 592 593 \n", - "3 600 601 602 603 604 605 606 607 608 609 ... 790 791 792 793 \n", - "4 800 801 802 803 804 805 806 807 808 809 ... 990 991 992 993 \n", - "\n", - " 194 195 196 197 198 199 \n", - "0 194 195 196 197 198 199 \n", - "1 394 395 396 397 398 399 \n", - "2 594 595 596 597 598 599 \n", - "3 794 795 796 797 798 799 \n", - "4 994 995 996 997 998 999 \n", - "\n", - "[5 rows x 200 columns]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "reshaped_df = cudf.DataFrame(reshaped_arr)\n", - "reshaped_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can check whether our array is Fortran contiguous by using cupy.isfortran or looking at the [flags](https://docs-cupy.chainer.org/en/stable/reference/generated/cupy.ndarray.html#cupy.ndarray.flags) of the array." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cp.isfortran(reshaped_arr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this case, we'll need to convert it before going to a cuDF DataFrame. In the next two cells, we create the DataFrame by leveraging dlpack and the CUDA array interface, respectively." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4.9 ms ± 26.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" - ] - } - ], - "source": [ - "%%timeit\n", - "\n", - "fortran_arr = cp.asfortranarray(reshaped_arr)\n", - "reshaped_df = cudf.DataFrame(fortran_arr)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "5.1 ms ± 23.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" - ] - } - ], - "source": [ - "%%timeit\n", - "\n", - "fortran_arr = cp.asfortranarray(reshaped_arr)\n", - "reshaped_df = cudf.from_dlpack(fortran_arr.toDlpack())" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0123456789...190191192193194195196197198199
00123456789...190191192193194195196197198199
1200201202203204205206207208209...390391392393394395396397398399
2400401402403404405406407408409...590591592593594595596597598599
3600601602603604605606607608609...790791792793794795796797798799
4800801802803804805806807808809...990991992993994995996997998999
\n", - "

5 rows × 200 columns

\n", - "
" - ], - "text/plain": [ - " 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \\\n", - "0 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \n", - "1 200 201 202 203 204 205 206 207 208 209 ... 390 391 392 393 \n", - "2 400 401 402 403 404 405 406 407 408 409 ... 590 591 592 593 \n", - "3 600 601 602 603 604 605 606 607 608 609 ... 790 791 792 793 \n", - "4 800 801 802 803 804 805 806 807 808 809 ... 990 991 992 993 \n", - "\n", - " 194 195 196 197 198 199 \n", - "0 194 195 196 197 198 199 \n", - "1 394 395 396 397 398 399 \n", - "2 594 595 596 597 598 599 \n", - "3 794 795 796 797 798 799 \n", - "4 994 995 996 997 998 999 \n", - "\n", - "[5 rows x 200 columns]" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fortran_arr = cp.asfortranarray(reshaped_arr)\n", - "reshaped_df = cudf.DataFrame(fortran_arr)\n", - "reshaped_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Converting a CuPy Array to a cuDF Series\n", - "\n", - "To convert an array to a Series, we can directly pass the array to the `Series` constructor." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0\n", - "1 201\n", - "2 402\n", - "3 603\n", - "4 804\n", - "dtype: int64" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf.Series(reshaped_arr.diagonal()).head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Interweaving CuDF and CuPy for Smooth PyData Workflows\n", - "\n", - "RAPIDS libraries and the entire GPU PyData ecosystem are developing quickly, but sometimes a one library may not have the functionality you need. One example of this might be taking the row-wise sum (or mean) of a Pandas DataFrame. cuDF's support for row-wise operations isn't mature, so you'd need to either transpose the DataFrame or write a UDF and explicitly calculate the sum across each row. Transposing could lead to hundreds of thousands of columns (which cuDF wouldn't perform well with) depending on your data's shape, and writing a UDF can be time intensive.\n", - "\n", - "By leveraging the interoperability of the GPU PyData ecosystem, this operation becomes very easy. Let's take the row-wise sum of our previously reshaped cuDF DataFrame." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0123456789...190191192193194195196197198199
00123456789...190191192193194195196197198199
1200201202203204205206207208209...390391392393394395396397398399
2400401402403404405406407408409...590591592593594595596597598599
3600601602603604605606607608609...790791792793794795796797798799
4800801802803804805806807808809...990991992993994995996997998999
\n", - "

5 rows × 200 columns

\n", - "
" - ], - "text/plain": [ - " 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \\\n", - "0 0 1 2 3 4 5 6 7 8 9 ... 190 191 192 193 \n", - "1 200 201 202 203 204 205 206 207 208 209 ... 390 391 392 393 \n", - "2 400 401 402 403 404 405 406 407 408 409 ... 590 591 592 593 \n", - "3 600 601 602 603 604 605 606 607 608 609 ... 790 791 792 793 \n", - "4 800 801 802 803 804 805 806 807 808 809 ... 990 991 992 993 \n", - "\n", - " 194 195 196 197 198 199 \n", - "0 194 195 196 197 198 199 \n", - "1 394 395 396 397 398 399 \n", - "2 594 595 596 597 598 599 \n", - "3 794 795 796 797 798 799 \n", - "4 994 995 996 997 998 999 \n", - "\n", - "[5 rows x 200 columns]" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "reshaped_df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can just transform it into a CuPy array and use the `axis` argument of `sum`." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 19900, 59900, 99900, 139900, 179900, 219900, 259900,\n", - " 299900, 339900, 379900, 419900, 459900, 499900, 539900,\n", - " 579900, 619900, 659900, 699900, 739900, 779900, 819900,\n", - " 859900, 899900, 939900, 979900, 1019900, 1059900, 1099900,\n", - " 1139900, 1179900, 1219900, 1259900, 1299900, 1339900, 1379900,\n", - " 1419900, 1459900, 1499900, 1539900, 1579900, 1619900, 1659900,\n", - " 1699900, 1739900, 1779900, 1819900, 1859900, 1899900, 1939900,\n", - " 1979900])" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "new_arr = cp.fromDlpack(reshaped_df.to_dlpack())\n", - "new_arr.sum(axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With just that single line, we're able to seamlessly move between data structures in this ecosystem, giving us enormous flexibility without sacrificing speed." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Converting a cuDF DataFrame to a CuPy Sparse Matrix\n", - "\n", - "We can also convert a DataFrame or Series to a CuPy sparse matrix. We might want to do this if downstream processes expect CuPy sparse matrices as an input.\n", - "\n", - "The sparse matrix data structure is defined by three dense arrays. We'll define a small helper function for cleanliness." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "def cudf_to_cupy_sparse_matrix(data, sparseformat='column'):\n", - " \"\"\"Converts a cuDF object to a CuPy Sparse Column matrix.\n", - " \"\"\"\n", - " if sparseformat not in ('row', 'column',):\n", - " raise ValueError(\"Let's focus on column and row formats for now.\")\n", - " \n", - " _sparse_constructor = cp.sparse.csc_matrix\n", - " if sparseformat == 'row':\n", - " _sparse_constructor = cp.sparse.csr_matrix\n", - "\n", - " return _sparse_constructor(cp.fromDlpack(data.to_dlpack()))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can define a sparsely populated DataFrame to illustrate this conversion to either sparse matrix format." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [], - "source": [ - "df = cudf.DataFrame()\n", - "nelem = 10000\n", - "nonzero = 1000\n", - "for i in range(20):\n", - " arr = cp.random.normal(5, 5, nelem)\n", - " arr[cp.random.choice(arr.shape[0], nelem-nonzero, replace=False)] = 0\n", - " df['a' + str(i)] = arr" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
a0a1a2a3a4a5a6a7a8a9a10a11a12a13a14a15a16a17a18a19
00.00.00.0000000.00.00.00.00.00.00.00.0000000.000000.016.8229590.00.0000000.00.00.00.000000
10.00.00.0000000.00.00.00.00.00.00.00.0000000.000000.00.0000000.00.0000000.00.00.00.000000
20.00.06.6189720.00.00.00.00.00.00.00.0000002.256780.00.0000000.00.0000000.00.00.00.000000
30.00.00.0000000.00.00.00.00.00.00.00.0000000.000000.00.0000000.02.7158020.00.00.00.000000
40.00.00.0000000.00.00.00.00.00.00.04.2965680.000000.00.0000000.00.0000000.00.00.04.865495
\n", - "
" - ], - "text/plain": [ - " a0 a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11 \\\n", - "0 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.00000 \n", - "1 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.00000 \n", - "2 0.0 0.0 6.618972 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 2.25678 \n", - "3 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000000 0.00000 \n", - "4 0.0 0.0 0.000000 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.296568 0.00000 \n", - "\n", - " a12 a13 a14 a15 a16 a17 a18 a19 \n", - "0 0.0 16.822959 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", - "1 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", - "2 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 0.000000 \n", - "3 0.0 0.000000 0.0 2.715802 0.0 0.0 0.0 0.000000 \n", - "4 0.0 0.000000 0.0 0.000000 0.0 0.0 0.0 4.865495 " - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sparse_data = cudf_to_cupy_sparse_matrix(df)\n", - "sparse_data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "From here, we could continue our workflow with a CuPy sparse matrix.\n", - "\n", - "For a full list of the functionality built into these libraries, we encourage you to check out the API docs for [cuDF](https://docs.rapids.ai/api/cudf/nightly/) and [CuPy](https://docs-cupy.chainer.org/en/stable/index.html)." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.6" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/cudf/source/10min.ipynb b/docs/cudf/source/10min.ipynb deleted file mode 100644 index a7e959a05a7..00000000000 --- a/docs/cudf/source/10min.ipynb +++ /dev/null @@ -1,6487 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "10 Minutes to cuDF and Dask-cuDF\n", - "=======================\n", - "\n", - "Modeled after 10 Minutes to Pandas, this is a short introduction to cuDF and Dask-cuDF, geared mainly for new users.\n", - "\n", - "### What are these Libraries?\n", - "\n", - "[cuDF](https://github.com/rapidsai/cudf) is a Python GPU DataFrame library (built on the Apache Arrow columnar memory format) for loading, joining, aggregating, filtering, and otherwise manipulating tabular data using a DataFrame style API.\n", - "\n", - "[Dask](https://dask.org/) is a flexible library for parallel computing in Python that makes scaling out your workflow smooth and simple. On the CPU, Dask uses Pandas to execute operations in parallel on DataFrame partitions.\n", - "\n", - "[Dask-cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed by cuDF GPU DataFrames as opposed to Pandas DataFrames. For instance, when you call dask_cudf.read_csv(...), your cluster’s GPUs do the work of parsing the CSV file(s) with underlying cudf.read_csv().\n", - "\n", - "\n", - "### When to use cuDF and Dask-cuDF\n", - "\n", - "If your workflow is fast enough on a single GPU or your data comfortably fits in memory on a single GPU, you would want to use cuDF. If you want to distribute your workflow across multiple GPUs, have more data than you can fit in memory on a single GPU, or want to analyze data spread across many files at once, you would want to use Dask-cuDF." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import cupy as cp\n", - "import pandas as pd\n", - "import cudf\n", - "import dask_cudf\n", - "\n", - "cp.random.seed(12)\n", - "\n", - "#### Portions of this were borrowed and adapted from the\n", - "#### cuDF cheatsheet, existing cuDF documentation,\n", - "#### and 10 Minutes to Pandas." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Object Creation\n", - "---------------" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Creating a `cudf.Series` and `dask_cudf.Series`." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 4\n", - "dtype: int64" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s = cudf.Series([1,2,3,None,4])\n", - "s" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 4\n", - "dtype: int64" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = dask_cudf.from_cudf(s, npartitions=2) \n", - "ds.compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Creating a `cudf.DataFrame` and a `dask_cudf.DataFrame` by specifying values for each column." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00190
11181
22172
33163
44154
55145
66136
77127
88118
99109
1010910
1111811
1212712
1313613
1414514
1515415
1616316
1717217
1818118
1919019
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 19 0\n", - "1 1 18 1\n", - "2 2 17 2\n", - "3 3 16 3\n", - "4 4 15 4\n", - "5 5 14 5\n", - "6 6 13 6\n", - "7 7 12 7\n", - "8 8 11 8\n", - "9 9 10 9\n", - "10 10 9 10\n", - "11 11 8 11\n", - "12 12 7 12\n", - "13 13 6 13\n", - "14 14 5 14\n", - "15 15 4 15\n", - "16 16 3 16\n", - "17 17 2 17\n", - "18 18 1 18\n", - "19 19 0 19" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = cudf.DataFrame({'a': list(range(20)),\n", - " 'b': list(reversed(range(20))),\n", - " 'c': list(range(20))\n", - " })\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00190
11181
22172
33163
44154
55145
66136
77127
88118
99109
1010910
1111811
1212712
1313613
1414514
1515415
1616316
1717217
1818118
1919019
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 19 0\n", - "1 1 18 1\n", - "2 2 17 2\n", - "3 3 16 3\n", - "4 4 15 4\n", - "5 5 14 5\n", - "6 6 13 6\n", - "7 7 12 7\n", - "8 8 11 8\n", - "9 9 10 9\n", - "10 10 9 10\n", - "11 11 8 11\n", - "12 12 7 12\n", - "13 13 6 13\n", - "14 14 5 14\n", - "15 15 4 15\n", - "16 16 3 16\n", - "17 17 2 17\n", - "18 18 1 18\n", - "19 19 0 19" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf = dask_cudf.from_cudf(df, npartitions=2) \n", - "ddf.compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Creating a `cudf.DataFrame` from a pandas `Dataframe` and a `dask_cudf.Dataframe` from a `cudf.Dataframe`.\n", - "\n", - "*Note that best practice for using Dask-cuDF is to read data directly into a `dask_cudf.DataFrame` with something like `read_csv` (discussed below).*" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
000.1
110.2
22null
330.3
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 0 0.1\n", - "1 1 0.2\n", - "2 2 null\n", - "3 3 0.3" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pdf = pd.DataFrame({'a': [0, 1, 2, 3],'b': [0.1, 0.2, None, 0.3]})\n", - "gdf = cudf.DataFrame.from_pandas(pdf)\n", - "gdf" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
000.1
110.2
22null
330.3
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 0 0.1\n", - "1 1 0.2\n", - "2 2 null\n", - "3 3 0.3" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dask_gdf = dask_cudf.from_cudf(gdf, npartitions=2)\n", - "dask_gdf.compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Viewing Data\n", - "-------------" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Viewing the top rows of a GPU dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00190
11181
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 19 0\n", - "1 1 18 1" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00190
11181
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 19 0\n", - "1 1 18 1" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.head(2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Sorting by values." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
1919019
1818118
1717217
1616316
1515415
1414514
1313613
1212712
1111811
1010910
99109
88118
77127
66136
55145
44154
33163
22172
11181
00190
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "19 19 0 19\n", - "18 18 1 18\n", - "17 17 2 17\n", - "16 16 3 16\n", - "15 15 4 15\n", - "14 14 5 14\n", - "13 13 6 13\n", - "12 12 7 12\n", - "11 11 8 11\n", - "10 10 9 10\n", - "9 9 10 9\n", - "8 8 11 8\n", - "7 7 12 7\n", - "6 6 13 6\n", - "5 5 14 5\n", - "4 4 15 4\n", - "3 3 16 3\n", - "2 2 17 2\n", - "1 1 18 1\n", - "0 0 19 0" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.sort_values(by='b')" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
1919019
1818118
1717217
1616316
1515415
1414514
1313613
1212712
1111811
1010910
99109
88118
77127
66136
55145
44154
33163
22172
11181
00190
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "19 19 0 19\n", - "18 18 1 18\n", - "17 17 2 17\n", - "16 16 3 16\n", - "15 15 4 15\n", - "14 14 5 14\n", - "13 13 6 13\n", - "12 12 7 12\n", - "11 11 8 11\n", - "10 10 9 10\n", - "9 9 10 9\n", - "8 8 11 8\n", - "7 7 12 7\n", - "6 6 13 6\n", - "5 5 14 5\n", - "4 4 15 4\n", - "3 3 16 3\n", - "2 2 17 2\n", - "1 1 18 1\n", - "0 0 19 0" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.sort_values(by='b').compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Selection\n", - "------------\n", - "\n", - "## Getting" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Selecting a single column, which initially yields a `cudf.Series` or `dask_cudf.Series`. Calling `compute` results in a `cudf.Series` (equivalent to `df.a`)." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0\n", - "1 1\n", - "2 2\n", - "3 3\n", - "4 4\n", - "5 5\n", - "6 6\n", - "7 7\n", - "8 8\n", - "9 9\n", - "10 10\n", - "11 11\n", - "12 12\n", - "13 13\n", - "14 14\n", - "15 15\n", - "16 16\n", - "17 17\n", - "18 18\n", - "19 19\n", - "Name: a, dtype: int64" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['a']" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0\n", - "1 1\n", - "2 2\n", - "3 3\n", - "4 4\n", - "5 5\n", - "6 6\n", - "7 7\n", - "8 8\n", - "9 9\n", - "10 10\n", - "11 11\n", - "12 12\n", - "13 13\n", - "14 14\n", - "15 15\n", - "16 16\n", - "17 17\n", - "18 18\n", - "19 19\n", - "Name: a, dtype: int64" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf['a'].compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Selection by Label" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Selecting rows from index 2 to index 5 from columns 'a' and 'b'." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
2217
3316
4415
5514
\n", - "
" - ], - "text/plain": [ - " a b\n", - "2 2 17\n", - "3 3 16\n", - "4 4 15\n", - "5 5 14" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.loc[2:5, ['a', 'b']]" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
2217
3316
4415
5514
\n", - "
" - ], - "text/plain": [ - " a b\n", - "2 2 17\n", - "3 3 16\n", - "4 4 15\n", - "5 5 14" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.loc[2:5, ['a', 'b']].compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Selection by Position" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Selecting via integers and integer slices, like numpy/pandas. Note that this functionality is not available for Dask-cuDF DataFrames." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "a 0\n", - "b 19\n", - "c 0\n", - "Name: 0, dtype: int64" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.iloc[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
0019
1118
2217
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 0 19\n", - "1 1 18\n", - "2 2 17" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.iloc[0:3, 0:2]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also select elements of a `DataFrame` or `Series` with direct index access." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
33163
44154
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "3 3 16 3\n", - "4 4 15 4" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[3:5]" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3 null\n", - "4 4\n", - "dtype: int64" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s[3:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Boolean Indexing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Selecting rows in a `DataFrame` or `Series` by direct Boolean indexing." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00190
11181
22172
33163
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 19 0\n", - "1 1 18 1\n", - "2 2 17 2\n", - "3 3 16 3" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.b > 15]" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00190
11181
22172
33163
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 19 0\n", - "1 1 18 1\n", - "2 2 17 2\n", - "3 3 16 3" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf[ddf.b > 15].compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Selecting values from a `DataFrame` where a Boolean condition is met, via the `query` API." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
1616316
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "16 16 3 16" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.query(\"b == 3\")" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
1616316
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "16 16 3 16" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.query(\"b == 3\").compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also pass local variables to Dask-cuDF queries, via the `local_dict` keyword. With standard cuDF, you may either use the `local_dict` keyword or directly pass the variable via the `@` keyword. Supported logical operators include `>`, `<`, `>=`, `<=`, `==`, and `!=`." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
1616316
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "16 16 3 16" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf_comparator = 3\n", - "df.query(\"b == @cudf_comparator\")" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
1616316
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "16 16 3 16" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dask_cudf_comparator = 3\n", - "ddf.query(\"b == @val\", local_dict={'val':dask_cudf_comparator}).compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Using the `isin` method for filtering." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00190
55145
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 19 0\n", - "5 5 14 5" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.a.isin([0, 5])]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## MultiIndex" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "cuDF supports hierarchical indexing of DataFrames using MultiIndex. Grouping hierarchically (see `Grouping` below) automatically produces a DataFrame with a MultiIndex." - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "MultiIndex(levels=[0 a\n", - "1 b\n", - "dtype: object, 0 1\n", - "1 2\n", - "2 3\n", - "3 4\n", - "dtype: int64],\n", - "codes= 0 1\n", - "0 0 0\n", - "1 0 1\n", - "2 1 2\n", - "3 1 3)" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "arrays = [['a', 'a', 'b', 'b'], [1, 2, 3, 4]]\n", - "tuples = list(zip(*arrays))\n", - "idx = cudf.MultiIndex.from_tuples(tuples)\n", - "idx" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This index can back either axis of a DataFrame." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
firstsecond
a10.0826540.967955
20.3994170.441425
b30.7842970.793582
40.0703030.271711
\n", - "
" - ], - "text/plain": [ - " first second\n", - "a 1 0.082654 0.967955\n", - " 2 0.399417 0.441425\n", - "b 3 0.784297 0.793582\n", - " 4 0.070303 0.271711" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf1 = cudf.DataFrame({'first': cp.random.rand(4), 'second': cp.random.rand(4)})\n", - "gdf1.index = idx\n", - "gdf1" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
1234
first0.3433820.0037000.200430.581614
second0.9078120.1015120.241790.224180
\n", - "
" - ], - "text/plain": [ - " a b \n", - " 1 2 3 4\n", - "first 0.343382 0.003700 0.20043 0.581614\n", - "second 0.907812 0.101512 0.24179 0.224180" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf2 = cudf.DataFrame({'first': cp.random.rand(4), 'second': cp.random.rand(4)}).T\n", - "gdf2.columns = idx\n", - "gdf2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Accessing values of a DataFrame with a MultiIndex. Note that slicing is not yet supported." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
firstsecond
01
b30.7842970.793582
\n", - "
" - ], - "text/plain": [ - " first second\n", - "0 1 \n", - "b 3 0.784297 0.793582" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf1.loc[('b', 3)]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Missing Data\n", - "------------" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Missing data can be replaced by using the `fillna` method." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 3\n", - "3 999\n", - "4 4\n", - "dtype: int64" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s.fillna(999)" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 3\n", - "3 999\n", - "4 4\n", - "dtype: int64" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.fillna(999).compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Operations\n", - "------------" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Stats" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Calculating descriptive statistics for a `Series`." - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(2.5, 1.666666666666666)" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s.mean(), s.var()" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(2.5, 1.6666666666666667)" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds.mean().compute(), ds.var().compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Applymap" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Applying functions to a `Series`. Note that applying user defined functions directly with Dask-cuDF is not yet implemented. For now, you can use [map_partitions](http://docs.dask.org/en/stable/generated/dask.dataframe.DataFrame.map_partitions.html) to apply a function to each partition of the distributed dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 10\n", - "1 11\n", - "2 12\n", - "3 13\n", - "4 14\n", - "5 15\n", - "6 16\n", - "7 17\n", - "8 18\n", - "9 19\n", - "10 20\n", - "11 21\n", - "12 22\n", - "13 23\n", - "14 24\n", - "15 25\n", - "16 26\n", - "17 27\n", - "18 28\n", - "19 29\n", - "Name: a, dtype: int64" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def add_ten(num):\n", - " return num + 10\n", - "\n", - "df['a'].applymap(add_ten)" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 10\n", - "1 11\n", - "2 12\n", - "3 13\n", - "4 14\n", - "5 15\n", - "6 16\n", - "7 17\n", - "8 18\n", - "9 19\n", - "10 20\n", - "11 21\n", - "12 22\n", - "13 23\n", - "14 24\n", - "15 25\n", - "16 26\n", - "17 27\n", - "18 28\n", - "19 29\n", - "Name: a, dtype: int64" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf['a'].map_partitions(add_ten).compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Histogramming" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Counting the number of occurrences of each unique value of variable." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 1\n", - "2 1\n", - "3 1\n", - "4 1\n", - "5 1\n", - "6 1\n", - "7 1\n", - "8 1\n", - "9 1\n", - "10 1\n", - "11 1\n", - "12 1\n", - "13 1\n", - "14 1\n", - "15 1\n", - "16 1\n", - "17 1\n", - "18 1\n", - "19 1\n", - "Name: a, dtype: int32" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.a.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 1\n", - "2 1\n", - "3 1\n", - "4 1\n", - "5 1\n", - "6 1\n", - "7 1\n", - "8 1\n", - "9 1\n", - "10 1\n", - "11 1\n", - "12 1\n", - "13 1\n", - "14 1\n", - "15 1\n", - "16 1\n", - "17 1\n", - "18 1\n", - "19 1\n", - "Name: a, dtype: int64" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.a.value_counts().compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## String Methods" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Like pandas, cuDF provides string processing methods in the `str` attribute of `Series`. Full documentation of string methods is a work in progress. Please see the cuDF API documentation for more information." - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 a\n", - "1 b\n", - "2 c\n", - "3 aaba\n", - "4 baca\n", - "5 None\n", - "6 caba\n", - "7 dog\n", - "8 cat\n", - "dtype: object" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s = cudf.Series(['A', 'B', 'C', 'Aaba', 'Baca', None, 'CABA', 'dog', 'cat'])\n", - "s.str.lower()" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 a\n", - "1 b\n", - "2 c\n", - "3 aaba\n", - "4 baca\n", - "5 None\n", - "6 caba\n", - "7 dog\n", - "8 cat\n", - "dtype: object" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds = dask_cudf.from_cudf(s, npartitions=2)\n", - "ds.str.lower().compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Concat" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Concatenating `Series` and `DataFrames` row-wise." - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 5\n", - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 5\n", - "dtype: int64" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s = cudf.Series([1, 2, 3, None, 5])\n", - "cudf.concat([s, s])" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 5\n", - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 5\n", - "dtype: int64" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds2 = dask_cudf.from_cudf(s, npartitions=2)\n", - "dask_cudf.concat([ds2, ds2]).compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Join" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Performing SQL style merges. Note that the dataframe order is not maintained, but may be restored post-merge by sorting by the index." - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
keyvals_avals_b
0a10.0100.0
1c12.0101.0
2e14.0102.0
3b11.0null
4d13.0null
\n", - "
" - ], - "text/plain": [ - " key vals_a vals_b\n", - "0 a 10.0 100.0\n", - "1 c 12.0 101.0\n", - "2 e 14.0 102.0\n", - "3 b 11.0 null\n", - "4 d 13.0 null" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_a = cudf.DataFrame()\n", - "df_a['key'] = ['a', 'b', 'c', 'd', 'e']\n", - "df_a['vals_a'] = [float(i + 10) for i in range(5)]\n", - "\n", - "df_b = cudf.DataFrame()\n", - "df_b['key'] = ['a', 'c', 'e']\n", - "df_b['vals_b'] = [float(i+100) for i in range(3)]\n", - "\n", - "merged = df_a.merge(df_b, on=['key'], how='left')\n", - "merged" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
keyvals_avals_b
0a10.0100.0
1c12.0101.0
2b11.0null
0e14.0102.0
1d13.0null
\n", - "
" - ], - "text/plain": [ - " key vals_a vals_b\n", - "0 a 10.0 100.0\n", - "1 c 12.0 101.0\n", - "2 b 11.0 null\n", - "0 e 14.0 102.0\n", - "1 d 13.0 null" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf_a = dask_cudf.from_cudf(df_a, npartitions=2)\n", - "ddf_b = dask_cudf.from_cudf(df_b, npartitions=2)\n", - "\n", - "merged = ddf_a.merge(ddf_b, on=['key'], how='left').compute()\n", - "merged" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Append" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Appending values from another `Series` or array-like object." - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 5\n", - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 5\n", - "dtype: int64" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s.append(s)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 5\n", - "0 1\n", - "1 2\n", - "2 3\n", - "3 null\n", - "4 5\n", - "dtype: int64" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ds2.append(ds2).compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Grouping" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Like pandas, cuDF and Dask-cuDF support the Split-Apply-Combine groupby paradigm." - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "df['agg_col1'] = [1 if x % 2 == 0 else 0 for x in range(len(df))]\n", - "df['agg_col2'] = [1 if x % 3 == 0 else 0 for x in range(len(df))]\n", - "\n", - "ddf = dask_cudf.from_cudf(df, npartitions=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Grouping and then applying the `sum` function to the grouped data." - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcagg_col2
agg_col1
0100901003
190100904
\n", - "
" - ], - "text/plain": [ - " a b c agg_col2\n", - "agg_col1 \n", - "0 100 90 100 3\n", - "1 90 100 90 4" - ] - }, - "execution_count": 48, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.groupby('agg_col1').sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcagg_col2
agg_col1
0100901003
190100904
\n", - "
" - ], - "text/plain": [ - " a b c agg_col2\n", - "agg_col1 \n", - "0 100 90 100 3\n", - "1 90 100 90 4" - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.groupby('agg_col1').sum().compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Grouping hierarchically then applying the `sum` function to grouped data." - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
agg_col1agg_col2
00736073
1273027
10546054
1364036
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "agg_col1 agg_col2 \n", - "0 0 73 60 73\n", - " 1 27 30 27\n", - "1 0 54 60 54\n", - " 1 36 40 36" - ] - }, - "execution_count": 50, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.groupby(['agg_col1', 'agg_col2']).sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
agg_col1agg_col2
11364036
00736073
10546054
01273027
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "agg_col1 agg_col2 \n", - "1 1 36 40 36\n", - "0 0 73 60 73\n", - "1 0 54 60 54\n", - "0 1 27 30 27" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.groupby(['agg_col1', 'agg_col2']).sum().compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Grouping and applying statistical functions to specific columns, using `agg`." - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
agg_col1
0199.0100
11810.090
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "agg_col1 \n", - "0 19 9.0 100\n", - "1 18 10.0 90" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.groupby('agg_col1').agg({'a':'max', 'b':'mean', 'c':'sum'})" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
agg_col1
0199.0100
11810.090
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "agg_col1 \n", - "0 19 9.0 100\n", - "1 18 10.0 90" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.groupby('agg_col1').agg({'a':'max', 'b':'mean', 'c':'sum'}).compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Transpose" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Transposing a dataframe, using either the `transpose` method or `T` property. Currently, all columns must have the same type. Transposing is not currently implemented in Dask-cuDF." - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
014
125
236
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1 4\n", - "1 2 5\n", - "2 3 6" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sample = cudf.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})\n", - "sample" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
012
a123
b456
\n", - "
" - ], - "text/plain": [ - " 0 1 2\n", - "a 1 2 3\n", - "b 4 5 6" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sample.transpose()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Time Series\n", - "------------\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`DataFrames` supports `datetime` typed columns, which allow users to interact with and filter data based on specific timestamps." - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datevalue
02018-11-200.986051
12018-11-210.232034
22018-11-220.397617
32018-11-230.103839
\n", - "
" - ], - "text/plain": [ - " date value\n", - "0 2018-11-20 0.986051\n", - "1 2018-11-21 0.232034\n", - "2 2018-11-22 0.397617\n", - "3 2018-11-23 0.103839" - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import datetime as dt\n", - "\n", - "date_df = cudf.DataFrame()\n", - "date_df['date'] = pd.date_range('11/20/2018', periods=72, freq='D')\n", - "date_df['value'] = cp.random.sample(len(date_df))\n", - "\n", - "search_date = dt.datetime.strptime('2018-11-23', '%Y-%m-%d')\n", - "date_df.query('date <= @search_date')" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
datevalue
02018-11-200.986051
12018-11-210.232034
22018-11-220.397617
32018-11-230.103839
\n", - "
" - ], - "text/plain": [ - " date value\n", - "0 2018-11-20 0.986051\n", - "1 2018-11-21 0.232034\n", - "2 2018-11-22 0.397617\n", - "3 2018-11-23 0.103839" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "date_ddf = dask_cudf.from_cudf(date_df, npartitions=2)\n", - "date_ddf.query('date <= @search_date', local_dict={'search_date':search_date}).compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Categoricals\n", - "------------" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`DataFrames` support categorical columns." - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgrade
01a
12b
23b
34a
45a
56e
\n", - "
" - ], - "text/plain": [ - " id grade\n", - "0 1 a\n", - "1 2 b\n", - "2 3 b\n", - "3 4 a\n", - "4 5 a\n", - "5 6 e" - ] - }, - "execution_count": 58, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf = cudf.DataFrame({\"id\": [1, 2, 3, 4, 5, 6], \"grade\":['a', 'b', 'b', 'a', 'a', 'e']})\n", - "gdf['grade'] = gdf['grade'].astype('category')\n", - "gdf" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
idgrade
01a
12b
23b
34a
45a
56e
\n", - "
" - ], - "text/plain": [ - " id grade\n", - "0 1 a\n", - "1 2 b\n", - "2 3 b\n", - "3 4 a\n", - "4 5 a\n", - "5 6 e" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dgdf = dask_cudf.from_cudf(gdf, npartitions=2)\n", - "dgdf.compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Accessing the categories of a column. Note that this is currently not supported in Dask-cuDF." - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "StringIndex(['a' 'b' 'e'], dtype='object')" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf.grade.cat.categories" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Accessing the underlying code values of each categorical observation." - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0\n", - "1 1\n", - "2 1\n", - "3 0\n", - "4 0\n", - "5 2\n", - "dtype: int8" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf.grade.cat.codes" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0\n", - "1 1\n", - "2 1\n", - "3 0\n", - "4 0\n", - "5 2\n", - "dtype: int8" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dgdf.grade.cat.codes.compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Converting Data Representation\n", - "--------------------------------" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Pandas" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Converting a cuDF and Dask-cuDF `DataFrame` to a pandas `DataFrame`." - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
\n", - "
" - ], - "text/plain": [ - " a b c agg_col1 agg_col2\n", - "0 0 19 0 1 1\n", - "1 1 18 1 0 0\n", - "2 2 17 2 1 0\n", - "3 3 16 3 0 1\n", - "4 4 15 4 1 0" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head().to_pandas()" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
\n", - "
" - ], - "text/plain": [ - " a b c agg_col1 agg_col2\n", - "0 0 19 0 1 1\n", - "1 1 18 1 0 0\n", - "2 2 17 2 1 0\n", - "3 3 16 3 0 1\n", - "4 4 15 4 1 0" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.compute().head().to_pandas()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Numpy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Converting a cuDF or Dask-cuDF `DataFrame` to a numpy `ndarray`." - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 0, 19, 0, 1, 1],\n", - " [ 1, 18, 1, 0, 0],\n", - " [ 2, 17, 2, 1, 0],\n", - " [ 3, 16, 3, 0, 1],\n", - " [ 4, 15, 4, 1, 0],\n", - " [ 5, 14, 5, 0, 0],\n", - " [ 6, 13, 6, 1, 1],\n", - " [ 7, 12, 7, 0, 0],\n", - " [ 8, 11, 8, 1, 0],\n", - " [ 9, 10, 9, 0, 1],\n", - " [10, 9, 10, 1, 0],\n", - " [11, 8, 11, 0, 0],\n", - " [12, 7, 12, 1, 1],\n", - " [13, 6, 13, 0, 0],\n", - " [14, 5, 14, 1, 0],\n", - " [15, 4, 15, 0, 1],\n", - " [16, 3, 16, 1, 0],\n", - " [17, 2, 17, 0, 0],\n", - " [18, 1, 18, 1, 1],\n", - " [19, 0, 19, 0, 0]])" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.as_matrix()" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[ 0, 19, 0, 1, 1],\n", - " [ 1, 18, 1, 0, 0],\n", - " [ 2, 17, 2, 1, 0],\n", - " [ 3, 16, 3, 0, 1],\n", - " [ 4, 15, 4, 1, 0],\n", - " [ 5, 14, 5, 0, 0],\n", - " [ 6, 13, 6, 1, 1],\n", - " [ 7, 12, 7, 0, 0],\n", - " [ 8, 11, 8, 1, 0],\n", - " [ 9, 10, 9, 0, 1],\n", - " [10, 9, 10, 1, 0],\n", - " [11, 8, 11, 0, 0],\n", - " [12, 7, 12, 1, 1],\n", - " [13, 6, 13, 0, 0],\n", - " [14, 5, 14, 1, 0],\n", - " [15, 4, 15, 0, 1],\n", - " [16, 3, 16, 1, 0],\n", - " [17, 2, 17, 0, 0],\n", - " [18, 1, 18, 1, 1],\n", - " [19, 0, 19, 0, 0]])" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.compute().as_matrix()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Converting a cuDF or Dask-cuDF `Series` to a numpy `ndarray`." - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", - " 17, 18, 19])" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['a'].to_array()" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n", - " 17, 18, 19])" - ] - }, - "execution_count": 68, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf['a'].compute().to_array()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Arrow" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Converting a cuDF or Dask-cuDF `DataFrame` to a PyArrow `Table`." - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "pyarrow.Table\n", - "a: int64\n", - "b: int64\n", - "c: int64\n", - "agg_col1: int64\n", - "agg_col2: int64\n", - "metadata\n", - "--------\n", - "{b'pandas': b'{\"index_columns\": [{\"kind\": \"range\", \"name\": null, \"start\": 0, \"'\n", - " b'stop\": 20, \"step\": 1}], \"column_indexes\": [{\"name\": null, \"field'\n", - " b'_name\": null, \"pandas_type\": \"unicode\", \"numpy_type\": \"object\", '\n", - " b'\"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\": [{\"name\": \"a\", \"'\n", - " b'field_name\": \"a\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\",'\n", - " b' \"metadata\": null}, {\"name\": \"b\", \"field_name\": \"b\", \"pandas_typ'\n", - " b'e\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}, {\"name\": '\n", - " b'\"c\", \"field_name\": \"c\", \"pandas_type\": \"int64\", \"numpy_type\": \"i'\n", - " b'nt64\", \"metadata\": null}, {\"name\": \"agg_col1\", \"field_name\": \"ag'\n", - " b'g_col1\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadat'\n", - " b'a\": null}, {\"name\": \"agg_col2\", \"field_name\": \"agg_col2\", \"panda'\n", - " b's_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}], \"cr'\n", - " b'eator\": {\"library\": \"pyarrow\", \"version\": \"0.15.0\"}, \"pandas_ver'\n", - " b'sion\": \"0.25.3\"}'}" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.to_arrow()" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "pyarrow.Table\n", - "a: int64\n", - "b: int64\n", - "c: int64\n", - "agg_col1: int64\n", - "agg_col2: int64\n", - "__index_level_0__: int64\n", - "metadata\n", - "--------\n", - "{b'pandas': b'{\"index_columns\": [\"__index_level_0__\"], \"column_indexes\": [{\"na'\n", - " b'me\": null, \"field_name\": null, \"pandas_type\": \"unicode\", \"numpy_'\n", - " b'type\": \"object\", \"metadata\": {\"encoding\": \"UTF-8\"}}], \"columns\":'\n", - " b' [{\"name\": \"a\", \"field_name\": \"a\", \"pandas_type\": \"int64\", \"nump'\n", - " b'y_type\": \"int64\", \"metadata\": null}, {\"name\": \"b\", \"field_name\":'\n", - " b' \"b\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\":'\n", - " b' null}, {\"name\": \"c\", \"field_name\": \"c\", \"pandas_type\": \"int64\",'\n", - " b' \"numpy_type\": \"int64\", \"metadata\": null}, {\"name\": \"agg_col1\", '\n", - " b'\"field_name\": \"agg_col1\", \"pandas_type\": \"int64\", \"numpy_type\": '\n", - " b'\"int64\", \"metadata\": null}, {\"name\": \"agg_col2\", \"field_name\": \"'\n", - " b'agg_col2\", \"pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metad'\n", - " b'ata\": null}, {\"name\": null, \"field_name\": \"__index_level_0__\", \"'\n", - " b'pandas_type\": \"int64\", \"numpy_type\": \"int64\", \"metadata\": null}]'\n", - " b', \"creator\": {\"library\": \"pyarrow\", \"version\": \"0.15.0\"}, \"panda'\n", - " b's_version\": \"0.25.3\"}'}" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf.compute().to_arrow()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Getting Data In/Out\n", - "------------------------\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## CSV" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Writing to a CSV file." - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [], - "source": [ - "if not os.path.exists('example_output'):\n", - " os.mkdir('example_output')\n", - " \n", - "df.to_csv('example_output/foo.csv', index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [], - "source": [ - "ddf.compute().to_csv('example_output/foo_dask.csv', index=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Reading from a csv file." - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
\n", - "
" - ], - "text/plain": [ - " a b c agg_col1 agg_col2\n", - "0 0 19 0 1 1\n", - "1 1 18 1 0 0\n", - "2 2 17 2 1 0\n", - "3 3 16 3 0 1\n", - "4 4 15 4 1 0\n", - "5 5 14 5 0 0\n", - "6 6 13 6 1 1\n", - "7 7 12 7 0 0\n", - "8 8 11 8 1 0\n", - "9 9 10 9 0 1\n", - "10 10 9 10 1 0\n", - "11 11 8 11 0 0\n", - "12 12 7 12 1 1\n", - "13 13 6 13 0 0\n", - "14 14 5 14 1 0\n", - "15 15 4 15 0 1\n", - "16 16 3 16 1 0\n", - "17 17 2 17 0 0\n", - "18 18 1 18 1 1\n", - "19 19 0 19 0 0" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = cudf.read_csv('example_output/foo.csv')\n", - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
\n", - "
" - ], - "text/plain": [ - " a b c agg_col1 agg_col2\n", - "0 0 19 0 1 1\n", - "1 1 18 1 0 0\n", - "2 2 17 2 1 0\n", - "3 3 16 3 0 1\n", - "4 4 15 4 1 0\n", - "5 5 14 5 0 0\n", - "6 6 13 6 1 1\n", - "7 7 12 7 0 0\n", - "8 8 11 8 1 0\n", - "9 9 10 9 0 1\n", - "10 10 9 10 1 0\n", - "11 11 8 11 0 0\n", - "12 12 7 12 1 1\n", - "13 13 6 13 0 0\n", - "14 14 5 14 1 0\n", - "15 15 4 15 0 1\n", - "16 16 3 16 1 0\n", - "17 17 2 17 0 0\n", - "18 18 1 18 1 1\n", - "19 19 0 19 0 0" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf = dask_cudf.read_csv('example_output/foo_dask.csv')\n", - "ddf.compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Reading all CSV files in a directory into a single `dask_cudf.DataFrame`, using the star wildcard." - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
\n", - "
" - ], - "text/plain": [ - " a b c agg_col1 agg_col2\n", - "0 0 19 0 1 1\n", - "1 1 18 1 0 0\n", - "2 2 17 2 1 0\n", - "3 3 16 3 0 1\n", - "4 4 15 4 1 0\n", - "5 5 14 5 0 0\n", - "6 6 13 6 1 1\n", - "7 7 12 7 0 0\n", - "8 8 11 8 1 0\n", - "9 9 10 9 0 1\n", - "10 10 9 10 1 0\n", - "11 11 8 11 0 0\n", - "12 12 7 12 1 1\n", - "13 13 6 13 0 0\n", - "14 14 5 14 1 0\n", - "15 15 4 15 0 1\n", - "16 16 3 16 1 0\n", - "17 17 2 17 0 0\n", - "18 18 1 18 1 1\n", - "19 19 0 19 0 0\n", - "0 0 19 0 1 1\n", - "1 1 18 1 0 0\n", - "2 2 17 2 1 0\n", - "3 3 16 3 0 1\n", - "4 4 15 4 1 0\n", - "5 5 14 5 0 0\n", - "6 6 13 6 1 1\n", - "7 7 12 7 0 0\n", - "8 8 11 8 1 0\n", - "9 9 10 9 0 1\n", - "10 10 9 10 1 0\n", - "11 11 8 11 0 0\n", - "12 12 7 12 1 1\n", - "13 13 6 13 0 0\n", - "14 14 5 14 1 0\n", - "15 15 4 15 0 1\n", - "16 16 3 16 1 0\n", - "17 17 2 17 0 0\n", - "18 18 1 18 1 1\n", - "19 19 0 19 0 0" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf = dask_cudf.read_csv('example_output/*.csv')\n", - "ddf.compute()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Parquet" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Writing to parquet files, using the CPU via PyArrow." - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [], - "source": [ - "df.to_parquet('example_output/temp_parquet')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Reading parquet files with a GPU-accelerated parquet reader." - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcagg_col1agg_col2
0019011
1118100
2217210
3316301
4415410
5514500
6613611
7712700
8811810
9910901
101091010
111181100
121271211
131361300
141451410
151541501
161631610
171721700
181811811
191901900
\n", - "
" - ], - "text/plain": [ - " a b c agg_col1 agg_col2\n", - "0 0 19 0 1 1\n", - "1 1 18 1 0 0\n", - "2 2 17 2 1 0\n", - "3 3 16 3 0 1\n", - "4 4 15 4 1 0\n", - "5 5 14 5 0 0\n", - "6 6 13 6 1 1\n", - "7 7 12 7 0 0\n", - "8 8 11 8 1 0\n", - "9 9 10 9 0 1\n", - "10 10 9 10 1 0\n", - "11 11 8 11 0 0\n", - "12 12 7 12 1 1\n", - "13 13 6 13 0 0\n", - "14 14 5 14 1 0\n", - "15 15 4 15 0 1\n", - "16 16 3 16 1 0\n", - "17 17 2 17 0 0\n", - "18 18 1 18 1 1\n", - "19 19 0 19 0 0" - ] - }, - "execution_count": 77, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = cudf.read_parquet('example_output/temp_parquet')\n", - "df" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Writing to parquet files from a `dask_cudf.DataFrame` using PyArrow under the hood." - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [], - "source": [ - "ddf.to_parquet('example_files') " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ORC" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Reading ORC files." - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
boolean1byte1short1int1long1float1double1bytes1string1middle.list.int1middle.list.string1list.int1list.string1mapmap.int1map.string1
0False110246553692233720368547758071.0-15.0\u0000\u0001\u0002\u0003\u0004hi3bye4chani5chani
1True10020486553692233720368547758072.0-5.0bye0bye0mauddib1mauddib
\n", - "
" - ], - "text/plain": [ - " boolean1 byte1 short1 int1 long1 float1 double1 \\\n", - "0 False 1 1024 65536 9223372036854775807 1.0 -15.0 \n", - "1 True 100 2048 65536 9223372036854775807 2.0 -5.0 \n", - "\n", - " bytes1 string1 middle.list.int1 middle.list.string1 list.int1 \\\n", - "0 \u0000\u0001\u0002\u0003\u0004 hi 3 bye 4 \n", - "1 bye 0 bye 0 \n", - "\n", - " list.string1 map map.int1 map.string1 \n", - "0 chani 5 chani \n", - "1 mauddib 1 mauddib " - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = cudf.read_orc('/rapids/cudf/python/cudf/cudf/tests/data/orc/TestOrcFile.test1.orc')\n", - "df2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Dask Performance Tips\n", - "--------------------------------\n", - "\n", - "Like Apache Spark, Dask operations are [lazy](https://en.wikipedia.org/wiki/Lazy_evaluation). Instead of being executed at that moment, most operations are added to a task graph and the actual evaluation is delayed until the result is needed.\n", - "\n", - "Sometimes, though, we want to force the execution of operations. Calling `persist` on a Dask collection fully computes it (or actively computes it in the background), persisting the result into memory. When we're using distributed systems, we may want to wait until `persist` is finished before beginning any downstream operations. We can enforce this contract by using `wait`. Wrapping an operation with `wait` will ensure it doesn't begin executing until all necessary upstream operations have finished.\n", - "\n", - "The snippets below provide basic examples, using `LocalCUDACluster` to create one dask-worker per GPU on the local machine. For more detailed information about `persist` and `wait`, please see the Dask documentation for [persist](https://docs.dask.org/en/latest/api.html#dask.persist) and [wait](https://docs.dask.org/en/latest/futures.html#distributed.wait). Wait relies on the concept of Futures, which is beyond the scope of this tutorial. For more information on Futures, see the Dask [Futures](https://docs.dask.org/en/latest/futures.html) documentation. For more information about multi-GPU clusters, please see the [dask-cuda](https://github.com/rapidsai/dask-cuda) library (documentation is in progress)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, we set up a GPU cluster. With our `client` set up, Dask-cuDF computation will be distributed across the GPUs in the cluster." - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "

Client

\n", - "\n", - "
\n", - "

Cluster

\n", - "
    \n", - "
  • Workers: 4
  • \n", - "
  • Cores: 4
  • \n", - "
  • Memory: 404.32 GB
  • \n", - "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import time\n", - "\n", - "from dask.distributed import Client, wait\n", - "from dask_cuda import LocalCUDACluster\n", - "\n", - "cluster = LocalCUDACluster()\n", - "client = Client(cluster)\n", - "client" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Persisting Data\n", - "Next, we create our Dask-cuDF DataFrame and apply a transformation, storing the result as a new column." - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Dask DataFrame Structure:
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
npartitions=5
0int64int64int64
2000000.........
............
8000000.........
9999999.........
\n", - "
\n", - "
Dask Name: assign, 20 tasks
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "nrows = 10000000\n", - "\n", - "df2 = cudf.DataFrame({'a': cp.arange(nrows), 'b': cp.arange(nrows)})\n", - "ddf2 = dask_cudf.from_cudf(df2, npartitions=5)\n", - "ddf2['c'] = ddf2['a'] + 5\n", - "ddf2" - ] - }, - { - "cell_type": "code", - "execution_count": 82, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Thu Jun 4 05:36:08 2020 \n", - "+-----------------------------------------------------------------------------+\n", - "| NVIDIA-SMI 440.64.00 Driver Version: 440.64.00 CUDA Version: 10.2 |\n", - "|-------------------------------+----------------------+----------------------+\n", - "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", - "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", - "|===============================+======================+======================|\n", - "| 0 Tesla T4 On | 00000000:3B:00.0 Off | 0 |\n", - "| N/A 38C P0 27W / 70W | 743MiB / 15109MiB | 2% Default |\n", - "+-------------------------------+----------------------+----------------------+\n", - "| 1 Tesla T4 On | 00000000:5E:00.0 Off | 0 |\n", - "| N/A 41C P0 26W / 70W | 110MiB / 15109MiB | 0% Default |\n", - "+-------------------------------+----------------------+----------------------+\n", - "| 2 Tesla T4 On | 00000000:AF:00.0 Off | 0 |\n", - "| N/A 35C P0 27W / 70W | 110MiB / 15109MiB | 0% Default |\n", - "+-------------------------------+----------------------+----------------------+\n", - "| 3 Tesla T4 On | 00000000:D8:00.0 Off | 0 |\n", - "| N/A 34C P0 27W / 70W | 110MiB / 15109MiB | 0% Default |\n", - "+-------------------------------+----------------------+----------------------+\n", - " \n", - "+-----------------------------------------------------------------------------+\n", - "| Processes: GPU Memory |\n", - "| GPU PID Type Process name Usage |\n", - "|=============================================================================|\n", - "| 0 57229 C ...sets/pgali/envs/new_cudf_env/bin/python 633MiB |\n", - "| 0 57341 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", - "| 1 57337 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", - "| 2 57336 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", - "| 3 57335 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", - "+-----------------------------------------------------------------------------+\n" - ] - } - ], - "source": [ - "!nvidia-smi" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Because Dask is lazy, the computation has not yet occurred. We can see that there are twenty tasks in the task graph and we've used about 800 MB of memory. We can force computation by using `persist`. By forcing execution, the result is now explicitly in memory and our task graph only contains one task per partition (the baseline)." - ] - }, - { - "cell_type": "code", - "execution_count": 83, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Dask DataFrame Structure:
\n", - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
npartitions=5
0int64int64int64
2000000.........
............
8000000.........
9999999.........
\n", - "
\n", - "
Dask Name: assign, 5 tasks
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 83, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ddf2 = ddf2.persist()\n", - "ddf2" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Thu Jun 4 05:36:09 2020 \n", - "+-----------------------------------------------------------------------------+\n", - "| NVIDIA-SMI 440.64.00 Driver Version: 440.64.00 CUDA Version: 10.2 |\n", - "|-------------------------------+----------------------+----------------------+\n", - "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", - "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", - "|===============================+======================+======================|\n", - "| 0 Tesla T4 On | 00000000:3B:00.0 Off | 0 |\n", - "| N/A 38C P0 27W / 70W | 743MiB / 15109MiB | 0% Default |\n", - "+-------------------------------+----------------------+----------------------+\n", - "| 1 Tesla T4 On | 00000000:5E:00.0 Off | 0 |\n", - "| N/A 42C P0 26W / 70W | 110MiB / 15109MiB | 0% Default |\n", - "+-------------------------------+----------------------+----------------------+\n", - "| 2 Tesla T4 On | 00000000:AF:00.0 Off | 0 |\n", - "| N/A 35C P0 27W / 70W | 110MiB / 15109MiB | 0% Default |\n", - "+-------------------------------+----------------------+----------------------+\n", - "| 3 Tesla T4 On | 00000000:D8:00.0 Off | 0 |\n", - "| N/A 34C P0 27W / 70W | 110MiB / 15109MiB | 0% Default |\n", - "+-------------------------------+----------------------+----------------------+\n", - " \n", - "+-----------------------------------------------------------------------------+\n", - "| Processes: GPU Memory |\n", - "| GPU PID Type Process name Usage |\n", - "|=============================================================================|\n", - "| 0 57229 C ...sets/pgali/envs/new_cudf_env/bin/python 633MiB |\n", - "| 0 57341 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", - "| 1 57337 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", - "| 2 57336 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", - "| 3 57335 C ...sets/pgali/envs/new_cudf_env/bin/python 99MiB |\n", - "+-----------------------------------------------------------------------------+\n" - ] - } - ], - "source": [ - "!nvidia-smi" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Because we forced computation, we now have a larger object in distributed GPU memory." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Wait\n", - "Depending on our workflow or distributed computing setup, we may want to `wait` until all upstream tasks have finished before proceeding with a specific function. This section shows an example of this behavior, adapted from the Dask documentation.\n", - "\n", - "First, we create a new Dask DataFrame and define a function that we'll map to every partition in the dataframe." - ] - }, - { - "cell_type": "code", - "execution_count": 85, - "metadata": {}, - "outputs": [], - "source": [ - "nrows = 10000000\n", - "\n", - "df1 = cudf.DataFrame({'a': cp.arange(nrows), 'b': cp.arange(nrows)})\n", - "ddf1 = dask_cudf.from_cudf(df1, npartitions=100)\n", - "\n", - "def func(df):\n", - " time.sleep(cp.random.randint(1, 60))\n", - " return (df + 5) * 3 - 11" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This function will do a basic transformation of every column in the dataframe, but the time spent in the function will vary due to the `time.sleep` statement randomly adding 1-60 seconds of time. We'll run this on every partition of our dataframe using `map_partitions`, which adds the task to our task-graph, and store the result. We can then call `persist` to force execution." - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [], - "source": [ - "results_ddf = ddf2.map_partitions(func)\n", - "results_ddf = results_ddf.persist()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "However, some partitions will be done **much** sooner than others. If we had downstream processes that should wait for all partitions to be completed, we can enforce that behavior using `wait`." - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DoneAndNotDoneFutures(done={, , , , }, not_done=set())" - ] - }, - "execution_count": 87, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "wait(results_ddf)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "With `wait`, we can safely proceed on in our workflow." - ] - } - ], - "metadata": { - "anaconda-cloud": {}, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.6" - }, - "toc": { - "base_numbering": 1, - "nav_menu": {}, - "number_sections": true, - "sideBar": true, - "skip_h1_title": false, - "title_cell": "Table of Contents", - "title_sidebar": "Contents", - "toc_cell": false, - "toc_position": {}, - "toc_section_display": true, - "toc_window_display": false - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/cudf/source/Working-with-missing-data.ipynb b/docs/cudf/source/Working-with-missing-data.ipynb deleted file mode 100644 index 54fe774060e..00000000000 --- a/docs/cudf/source/Working-with-missing-data.ipynb +++ /dev/null @@ -1,3466 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Working with missing data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In this section, we will discuss missing (also referred to as `NA`) values in cudf. cudf supports having missing values in all dtypes. These missing values are represented by ``. These values are also referenced as \"null values\"." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "1. [How to Detect missing values](#How-to-Detect-missing-values)\n", - "2. [Float dtypes and missing data](#Float-dtypes-and-missing-data)\n", - "3. [Datetimes](#Datetimes)\n", - "4. [Calculations with missing data](#Calculations-with-missing-data)\n", - "5. [Sum/product of Null/nans](#Sum/product-of-Null/nans)\n", - "6. [NA values in GroupBy](#NA-values-in-GroupBy)\n", - "7. [Inserting missing data](#Inserting-missing-data)\n", - "8. [Filling missing values: fillna](#Filling-missing-values:-fillna)\n", - "9. [Filling with cudf Object](#Filling-with-cudf-Object)\n", - "10. [Dropping axis labels with missing data: dropna](#Dropping-axis-labels-with-missing-data:-dropna)\n", - "11. [Replacing generic values](#Replacing-generic-values)\n", - "12. [String/regular expression replacement](#String/regular-expression-replacement)\n", - "13. [Numeric replacement](#Numeric-replacement)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## How to Detect missing values" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To detect missing values, you can use `isna()` and `notna()` functions." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import cudf\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "df = cudf.DataFrame({'a': [1, 2, None, 4], 'b':[0.1, None, 2.3, 17.17]})" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
010.1
12<NA>
2<NA>2.3
3417.17
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1 0.1\n", - "1 2 \n", - "2 2.3\n", - "3 4 17.17" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
0FalseFalse
1FalseTrue
2TrueFalse
3FalseFalse
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 False False\n", - "1 False True\n", - "2 True False\n", - "3 False False" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.isna()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 True\n", - "1 True\n", - "2 False\n", - "3 True\n", - "Name: a, dtype: bool" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['a'].notna()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "One has to be mindful that in Python (and NumPy), the nan's don’t compare equal, but None's do. Note that cudf/NumPy uses the fact that `np.nan != np.nan`, and treats `None` like `np.nan`." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "None == None" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "np.nan == np.nan" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "So as compared to above, a scalar equality comparison versus a None/np.nan doesn’t provide useful information.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 False\n", - "1 False\n", - "2 False\n", - "3 False\n", - "Name: b, dtype: bool" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['b'] == np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "s = cudf.Series([None, 1, 2])" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 \n", - "1 1\n", - "2 2\n", - "dtype: int64" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 False\n", - "1 False\n", - "2 False\n", - "dtype: bool" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s == None" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [], - "source": [ - "s = cudf.Series([1, 2, np.nan], nan_as_null=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1.0\n", - "1 2.0\n", - "2 NaN\n", - "dtype: float64" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 False\n", - "1 False\n", - "2 False\n", - "dtype: bool" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "s == np.nan" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Float dtypes and missing data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Because ``NaN`` is a float, a column of integers with even one missing values is cast to floating-point dtype. However this doesn't happen by default.\n", - "\n", - "By default if a ``NaN`` value is passed to `Series` constructor, it is treated as `` value. " - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 \n", - "dtype: int64" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf.Series([1, 2, np.nan])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Hence to consider a ``NaN`` as ``NaN`` you will have to pass `nan_as_null=False` parameter into `Series` constructor." - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1.0\n", - "1 2.0\n", - "2 NaN\n", - "dtype: float64" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf.Series([1, 2, np.nan], nan_as_null=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Datetimes" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For `datetime64` types, cudf doesn't support having `NaT` values. Instead these values which are specific to numpy and pandas are considered as null values(``) in cudf. The actual underlying value of `NaT` is `min(int64)` and cudf retains the underlying value when converting a cudf object to pandas object.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 2012-01-01 00:00:00.000000\n", - "1 \n", - "2 2012-01-01 00:00:00.000000\n", - "dtype: datetime64[us]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import pandas as pd\n", - "datetime_series = cudf.Series([pd.Timestamp(\"20120101\"), pd.NaT, pd.Timestamp(\"20120101\")])\n", - "datetime_series" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 2012-01-01\n", - "1 NaT\n", - "2 2012-01-01\n", - "dtype: datetime64[ns]" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "datetime_series.to_pandas()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "any operations on rows having `` values in `datetime` column will result in `` value at the same location in resulting column:" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0 days 00:00:00\n", - "1 \n", - "2 0 days 00:00:00\n", - "dtype: timedelta64[us]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "datetime_series - datetime_series" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Calculations with missing data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Null values propagate naturally through arithmetic operations between pandas objects." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "df1 = cudf.DataFrame({'a':[1, None, 2, 3, None], 'b':cudf.Series([np.nan, 2, 3.2, 0.1, 1], nan_as_null=False)})" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "df2 = cudf.DataFrame({'a':[1, 11, 2, 34, 10], 'b':cudf.Series([0.23, 22, 3.2, None, 1])})" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
01NaN
1<NA>2.0
223.2
330.1
4<NA>1.0
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1 NaN\n", - "1 2.0\n", - "2 2 3.2\n", - "3 3 0.1\n", - "4 1.0" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
010.23
11122.0
223.2
334<NA>
4101.0
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1 0.23\n", - "1 11 22.0\n", - "2 2 3.2\n", - "3 34 \n", - "4 10 1.0" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
02NaN
1<NA>24.0
246.4
337<NA>
4<NA>2.0
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 2 NaN\n", - "1 24.0\n", - "2 4 6.4\n", - "3 37 \n", - "4 2.0" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1 + df2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "While summing the data along a series, `NA` values will be treated as `0`." - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 \n", - "2 2\n", - "3 3\n", - "4 \n", - "Name: a, dtype: int64" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1['a']" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "6" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1['a'].sum()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Since `NA` values are treated as `0`, the mean would result to 2 in this case `(1 + 0 + 2 + 3 + 0)/5 = 2`" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2.0" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1['a'].mean()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To preserve `NA` values in the above calculations, `sum` & `mean` support `skipna` parameter.\n", - "By default it's value is\n", - "set to `True`, we can change it to `False` to preserve `NA` values." - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "nan" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1['a'].sum(skipna=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "nan" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1['a'].mean(skipna=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Cumulative methods like `cumsum` and `cumprod` ignore `NA` values by default." - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 \n", - "2 3\n", - "3 6\n", - "4 \n", - "Name: a, dtype: int64" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1['a'].cumsum()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To preserve `NA` values in cumulative methods, provide `skipna=False`." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 \n", - "2 \n", - "3 \n", - "4 \n", - "Name: a, dtype: int64" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1['a'].cumsum(skipna=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Sum/product of Null/nans" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The sum of an empty or all-NA Series of a DataFrame is 0." - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.0" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf.Series([np.nan], nan_as_null=False).sum()" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "nan" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf.Series([np.nan], nan_as_null=False).sum(skipna=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.0" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf.Series([], dtype='float64').sum()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The product of an empty or all-NA Series of a DataFrame is 1." - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.0" - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf.Series([np.nan], nan_as_null=False).prod()" - ] - }, - { - "cell_type": "code", - "execution_count": 36, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "nan" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf.Series([np.nan], nan_as_null=False).prod(skipna=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1.0" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cudf.Series([], dtype='float64').prod()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## NA values in GroupBy" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`NA` groups in GroupBy are automatically excluded. For example:" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
01NaN
1<NA>2.0
223.2
330.1
4<NA>1.0
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1 NaN\n", - "1 2.0\n", - "2 2 3.2\n", - "3 3 0.1\n", - "4 1.0" - ] - }, - "execution_count": 38, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
b
a
23.2
1NaN
30.1
\n", - "
" - ], - "text/plain": [ - " b\n", - "a \n", - "2 3.2\n", - "1 NaN\n", - "3 0.1" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1.groupby('a').mean()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is also possible to include `NA` in groups by passing `dropna=False`" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
b
a
23.2
1NaN
30.1
<NA>1.5
\n", - "
" - ], - "text/plain": [ - " b\n", - "a \n", - "2 3.2\n", - "1 NaN\n", - "3 0.1\n", - " 1.5" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1.groupby('a', dropna=False).mean()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Inserting missing data" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "All dtypes support insertion of missing value by assignment. Any specific location in series can made null by assigning it to `None`." - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [], - "source": [ - "series = cudf.Series([1, 2, 3, 4])" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 3\n", - "3 4\n", - "dtype: int64" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "series" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "metadata": {}, - "outputs": [], - "source": [ - "series[2] = None" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "1 2\n", - "2 \n", - "3 4\n", - "dtype: int64" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "series" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Filling missing values: fillna" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`fillna()` can fill in `NA` & `NaN` values with non-NA data." - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
01NaN
1<NA>2.0
223.2
330.1
4<NA>1.0
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1 NaN\n", - "1 2.0\n", - "2 2 3.2\n", - "3 3 0.1\n", - "4 1.0" - ] - }, - "execution_count": 45, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 10.0\n", - "1 2.0\n", - "2 3.2\n", - "3 0.1\n", - "4 1.0\n", - "Name: b, dtype: float64" - ] - }, - "execution_count": 46, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1['b'].fillna(10)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Filling with cudf Object" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series must match the columns of the frame you wish to fill. The use case of this is to fill a DataFrame with the mean of that column." - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "import cupy as cp\n", - "dff = cudf.DataFrame(cp.random.randn(10, 3), columns=list('ABC'))" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "metadata": {}, - "outputs": [], - "source": [ - "dff.iloc[3:5, 0] = np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "metadata": {}, - "outputs": [], - "source": [ - "dff.iloc[4:6, 1] = np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "metadata": {}, - "outputs": [], - "source": [ - "dff.iloc[5:8, 2] = np.nan" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABC
00.7712450.0510241.199239
1-1.1680410.702664-0.270806
2-1.467009-0.143080-0.806151
3NaN-0.610798-0.272895
4NaNNaN1.396784
5-0.439343NaNNaN
61.093102-0.764758NaN
70.003098-0.722648NaN
8-0.095899-1.285156-0.300566
90.1094652.497843-1.199856
\n", - "
" - ], - "text/plain": [ - " A B C\n", - "0 0.771245 0.051024 1.199239\n", - "1 -1.168041 0.702664 -0.270806\n", - "2 -1.467009 -0.143080 -0.806151\n", - "3 NaN -0.610798 -0.272895\n", - "4 NaN NaN 1.396784\n", - "5 -0.439343 NaN NaN\n", - "6 1.093102 -0.764758 NaN\n", - "7 0.003098 -0.722648 NaN\n", - "8 -0.095899 -1.285156 -0.300566\n", - "9 0.109465 2.497843 -1.199856" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dff" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABC
00.7712450.0510241.199239
1-1.1680410.702664-0.270806
2-1.467009-0.143080-0.806151
3-0.149173-0.610798-0.272895
4-0.149173-0.0343641.396784
5-0.439343-0.034364-0.036322
61.093102-0.764758-0.036322
70.003098-0.722648-0.036322
8-0.095899-1.285156-0.300566
90.1094652.497843-1.199856
\n", - "
" - ], - "text/plain": [ - " A B C\n", - "0 0.771245 0.051024 1.199239\n", - "1 -1.168041 0.702664 -0.270806\n", - "2 -1.467009 -0.143080 -0.806151\n", - "3 -0.149173 -0.610798 -0.272895\n", - "4 -0.149173 -0.034364 1.396784\n", - "5 -0.439343 -0.034364 -0.036322\n", - "6 1.093102 -0.764758 -0.036322\n", - "7 0.003098 -0.722648 -0.036322\n", - "8 -0.095899 -1.285156 -0.300566\n", - "9 0.109465 2.497843 -1.199856" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dff.fillna(dff.mean())" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ABC
00.7712450.0510241.199239
1-1.1680410.702664-0.270806
2-1.467009-0.143080-0.806151
3NaN-0.610798-0.272895
4NaN-0.0343641.396784
5-0.439343-0.034364-0.036322
61.093102-0.764758-0.036322
70.003098-0.722648-0.036322
8-0.095899-1.285156-0.300566
90.1094652.497843-1.199856
\n", - "
" - ], - "text/plain": [ - " A B C\n", - "0 0.771245 0.051024 1.199239\n", - "1 -1.168041 0.702664 -0.270806\n", - "2 -1.467009 -0.143080 -0.806151\n", - "3 NaN -0.610798 -0.272895\n", - "4 NaN -0.034364 1.396784\n", - "5 -0.439343 -0.034364 -0.036322\n", - "6 1.093102 -0.764758 -0.036322\n", - "7 0.003098 -0.722648 -0.036322\n", - "8 -0.095899 -1.285156 -0.300566\n", - "9 0.109465 2.497843 -1.199856" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dff.fillna(dff.mean()[1:3])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Dropping axis labels with missing data: dropna" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Missing data can be excluded using `dropna()`:\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 54, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
01NaN
1<NA>2.0
223.2
330.1
4<NA>1.0
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 1 NaN\n", - "1 2.0\n", - "2 2 3.2\n", - "3 3 0.1\n", - "4 1.0" - ] - }, - "execution_count": 54, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
223.2
330.1
\n", - "
" - ], - "text/plain": [ - " a b\n", - "2 2 3.2\n", - "3 3 0.1" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1.dropna(axis=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
0
1
2
3
4
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: [0, 1, 2, 3, 4]" - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1.dropna(axis=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "An equivalent `dropna()` is available for Series. " - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 1\n", - "2 2\n", - "3 3\n", - "Name: a, dtype: int64" - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df1['a'].dropna()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Replacing generic values" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Often times we want to replace arbitrary values with other values.\n", - "\n", - "`replace()` in Series and `replace()` in DataFrame provides an efficient yet flexible way to perform such replacements." - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [], - "source": [ - "series = cudf.Series([0.0, 1.0, 2.0, 3.0, 4.0])" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0.0\n", - "1 1.0\n", - "2 2.0\n", - "3 3.0\n", - "4 4.0\n", - "dtype: float64" - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "series" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 5.0\n", - "1 1.0\n", - "2 2.0\n", - "3 3.0\n", - "4 4.0\n", - "dtype: float64" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "series.replace(0, 5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can also replace any value with a `` value." - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 \n", - "1 1.0\n", - "2 2.0\n", - "3 3.0\n", - "4 4.0\n", - "dtype: float64" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "series.replace(0, None)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can replace a list of values by a list of other values:" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 4.0\n", - "1 3.0\n", - "2 2.0\n", - "3 1.0\n", - "4 0.0\n", - "dtype: float64" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "series.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also specify a mapping dict:" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 10.0\n", - "1 100.0\n", - "2 2.0\n", - "3 3.0\n", - "4 4.0\n", - "dtype: float64" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "series.replace({0: 10, 1: 100})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "For a DataFrame, you can specify individual values by column:" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "df = cudf.DataFrame({\"a\": [0, 1, 2, 3, 4], \"b\": [5, 6, 7, 8, 9]})" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
005
116
227
338
449
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 0 5\n", - "1 1 6\n", - "2 2 7\n", - "3 3 8\n", - "4 4 9" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
0100100
116
227
338
449
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 100 100\n", - "1 1 6\n", - "2 2 7\n", - "3 3 8\n", - "4 4 9" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.replace({\"a\": 0, \"b\": 5}, 100)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## String/regular expression replacement" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "cudf supports replacing string values using `replace` API:" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": {}, - "outputs": [], - "source": [ - "d = {\"a\": list(range(4)), \"b\": list(\"ab..\"), \"c\": [\"a\", \"b\", None, \"d\"]}" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "metadata": {}, - "outputs": [], - "source": [ - "df = cudf.DataFrame(d)" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00aa
11bb
22.<NA>
33.d
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 a a\n", - "1 1 b b\n", - "2 2 . \n", - "3 3 . d" - ] - }, - "execution_count": 69, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00aa
11bb
22A Dot<NA>
33A Dotd
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 a a\n", - "1 1 b b\n", - "2 2 A Dot \n", - "3 3 A Dot d" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.replace(\".\", \"A Dot\")" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00aa
11<NA><NA>
22A Dot<NA>
33A Dotd
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 a a\n", - "1 1 \n", - "2 2 A Dot \n", - "3 3 A Dot d" - ] - }, - "execution_count": 71, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.replace([\".\", \"b\"], [\"A Dot\", None])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Replace a few different values (list -> list):\n" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00bb
11bb
22--<NA>
33--d
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 b b\n", - "1 1 b b\n", - "2 2 -- \n", - "3 3 -- d" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.replace([\"a\", \".\"], [\"b\", \"--\"])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Only search in column 'b' (dict -> dict):" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
00aa
11bb
22replacement value<NA>
33replacement valued
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 0 a a\n", - "1 1 b b\n", - "2 2 replacement value \n", - "3 3 replacement value d" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.replace({\"b\": \".\"}, {\"b\": \"replacement value\"})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Numeric replacement" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "`replace()` can also be used similar to `fillna()`." - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [], - "source": [ - "df = cudf.DataFrame(cp.random.randn(10, 2))" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "metadata": {}, - "outputs": [], - "source": [ - "df[np.random.rand(df.shape[0]) > 0.5] = 1.5" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
01
0<NA><NA>
1<NA><NA>
20.1231607461.09464783
3<NA><NA>
4<NA><NA>
50.68137677-0.357346253
6<NA><NA>
7<NA><NA>
81.173285961-0.968616065
90.147922362-0.154880098
\n", - "
" - ], - "text/plain": [ - " 0 1\n", - "0 \n", - "1 \n", - "2 0.123160746 1.09464783\n", - "3 \n", - "4 \n", - "5 0.68137677 -0.357346253\n", - "6 \n", - "7 \n", - "8 1.173285961 -0.968616065\n", - "9 0.147922362 -0.154880098" - ] - }, - "execution_count": 76, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.replace(1.5, None)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Replacing more than one value is possible by passing a list.\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "metadata": {}, - "outputs": [], - "source": [ - "df00 = df.iloc[0, 0]" - ] - }, - { - "cell_type": "code", - "execution_count": 78, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
01
05.0000005.000000
15.0000005.000000
20.1231611.094648
35.0000005.000000
45.0000005.000000
50.681377-0.357346
65.0000005.000000
75.0000005.000000
81.173286-0.968616
90.147922-0.154880
\n", - "
" - ], - "text/plain": [ - " 0 1\n", - "0 5.000000 5.000000\n", - "1 5.000000 5.000000\n", - "2 0.123161 1.094648\n", - "3 5.000000 5.000000\n", - "4 5.000000 5.000000\n", - "5 0.681377 -0.357346\n", - "6 5.000000 5.000000\n", - "7 5.000000 5.000000\n", - "8 1.173286 -0.968616\n", - "9 0.147922 -0.154880" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.replace([1.5, df00], [5, 10])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also operate on the DataFrame in place:\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "metadata": {}, - "outputs": [], - "source": [ - "df.replace(1.5, None, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 80, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
01
0<NA><NA>
1<NA><NA>
20.1231607461.09464783
3<NA><NA>
4<NA><NA>
50.68137677-0.357346253
6<NA><NA>
7<NA><NA>
81.173285961-0.968616065
90.147922362-0.154880098
\n", - "
" - ], - "text/plain": [ - " 0 1\n", - "0 \n", - "1 \n", - "2 0.123160746 1.09464783\n", - "3 \n", - "4 \n", - "5 0.68137677 -0.357346253\n", - "6 \n", - "7 \n", - "8 1.173285961 -0.968616065\n", - "9 0.147922362 -0.154880098" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.9" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/cudf/source/api.rst b/docs/cudf/source/api.rst deleted file mode 100644 index bc227364351..00000000000 --- a/docs/cudf/source/api.rst +++ /dev/null @@ -1,22 +0,0 @@ -*************************************** -API documentation and generated content -*************************************** - -This page contains general code elements that are common -for package documentation. - -Autosummary table and API stub pages -==================================== - -.. autosummary:: - :toctree: - - pandas.DataFrame.drop - cudf.DataFrame.drop - pandas.DataFrame.groupby - cudf.DataFrame.where - -.. autosummary:: - :toctree: generated/ - - pandas.Series.array diff --git a/docs/cudf/source/api_docs/window.rst b/docs/cudf/source/api_docs/window.rst new file mode 100644 index 00000000000..6fd8eb0d5f6 --- /dev/null +++ b/docs/cudf/source/api_docs/window.rst @@ -0,0 +1,108 @@ +{{ header }} + +.. _api.window: + +====== +Window +====== + +Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. +Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. +ExponentialMovingWindow objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc. + +.. _api.functions_rolling: + +Rolling window functions +------------------------ +.. currentmodule:: pandas.core.window.rolling + +.. autosummary:: + :toctree: api/ + + Rolling.count + Rolling.sum + Rolling.mean + Rolling.median + Rolling.var + Rolling.std + Rolling.min + Rolling.max + Rolling.corr + Rolling.cov + Rolling.skew + Rolling.kurt + Rolling.apply + Rolling.aggregate + Rolling.quantile + Rolling.sem + +.. _api.functions_window: + +Weighted window functions +------------------------- +.. currentmodule:: pandas.core.window.rolling + +.. autosummary:: + :toctree: api/ + + Window.mean + Window.sum + Window.var + Window.std + +.. _api.functions_expanding: + +Expanding window functions +-------------------------- +.. currentmodule:: pandas.core.window.expanding + +.. autosummary:: + :toctree: api/ + + Expanding.count + Expanding.sum + Expanding.mean + Expanding.median + Expanding.var + Expanding.std + Expanding.min + Expanding.max + Expanding.corr + Expanding.cov + Expanding.skew + Expanding.kurt + Expanding.apply + Expanding.aggregate + Expanding.quantile + Expanding.sem + +.. _api.functions_ewm: + +Exponentially-weighted window functions +--------------------------------------- +.. currentmodule:: pandas.core.window.ewm + +.. autosummary:: + :toctree: api/ + + ExponentialMovingWindow.mean + ExponentialMovingWindow.std + ExponentialMovingWindow.var + ExponentialMovingWindow.corr + ExponentialMovingWindow.cov + +.. _api.indexers_window: + +Window indexer +-------------- +.. currentmodule:: pandas + +Base class for defining custom window boundaries. + +.. autosummary:: + :toctree: api/ + + api.indexers.BaseIndexer + api.indexers.FixedForwardWindowIndexer + api.indexers.VariableOffsetWindowIndexer +{"mode":"full","isActive":false} \ No newline at end of file diff --git a/docs/cudf/source/basics.rst b/docs/cudf/source/basics.rst deleted file mode 100644 index 15b4b43662b..00000000000 --- a/docs/cudf/source/basics.rst +++ /dev/null @@ -1,54 +0,0 @@ -Basics -====== - - -Supported Dtypes ----------------- - -cuDF uses dtypes for Series or individual columns of a DataFrame. cuDF uses NumPy dtypes, NumPy provides support for ``float``, ``int``, ``bool``, -``'timedelta64[s]'``, ``'timedelta64[ms]'``, ``'timedelta64[us]'``, ``'timedelta64[ns]'``, ``'datetime64[s]'``, ``'datetime64[ms]'``, -``'datetime64[us]'``, ``'datetime64[ns]'`` (note that NumPy does not support timezone-aware datetimes). - - -The following table lists all of cudf types. For methods requiring dtype arguments, strings can be specified as indicated. See the respective documentation sections for more on each type. - - -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Kind of Data | Data Type | Scalar | String Aliases | -+========================+==================+=====================================================================================+=============================================+ -| Integer | | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_, | ``'int8'``, ``'int16'``, ``'int32'``, | -| | | np.uint32_, np.uint64_ | ``'int64'``, ``'uint8'``, ``'uint16'``, | -| | | | ``'uint32'``, ``'uint64'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Float | | np.float32_, np.float64_ | ``'float32'``, ``'float64'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Strings | | `str `_ | ``'string'``, ``'object'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Datetime | | np.datetime64_ | ``'datetime64[s]'``, ``'datetime64[ms]'``, | -| | | | ``'datetime64[us]'``, ``'datetime64[ns]'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Timedelta | | np.timedelta64_ | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,| -| (duration type) | | | ``'timedelta64[us]'``, ``'timedelta64[ns]'``| -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Categorical | CategoricalDtype | (none) | ``'category'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Boolean | | np.bool_ | ``'bool'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Decimal | Decimal64Dtype | (none) | (none) | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ - -**Note: All dtypes above are Nullable** - -.. _np.int8: -.. _np.int16: -.. _np.int32: -.. _np.int64: -.. _np.uint8: -.. _np.uint16: -.. _np.uint32: -.. _np.uint64: -.. _np.float32: -.. _np.float64: -.. _np.bool: https://numpy.org/doc/stable/user/basics.types.html -.. _np.datetime64: https://numpy.org/doc/stable/reference/arrays.datetime.html#basic-datetimes -.. _np.timedelta64: https://numpy.org/doc/stable/reference/arrays.datetime.html#datetime-and-timedelta-arithmetic diff --git a/docs/cudf/source/basics/dask-cudf.md b/docs/cudf/source/basics/dask-cudf.md deleted file mode 100644 index 92ef4eb1c46..00000000000 --- a/docs/cudf/source/basics/dask-cudf.md +++ /dev/null @@ -1,78 +0,0 @@ -Multi-GPU with Dask-cuDF -======================== - -cuDF is a single-GPU library. For Multi-GPU cuDF solutions we use [Dask](https://dask.org/) and the [dask-cudf package](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf), which is able to scale cuDF across multiple GPUs on a single machine, or multiple GPUs across many machines in a cluster. - -[Dask DataFrame](http://docs.dask.org/en/latest/dataframe.html) was originally designed to scale Pandas, orchestrating many Pandas DataFrames spread across many CPUs into a cohesive parallel DataFrame. Because cuDF currently implements only a subset of Pandas’s API, not all Dask DataFrame operations work with cuDF. - -The following is tested and expected to work: - -What works ----------- - -- Data ingestion - - ``dask_cudf.read_csv`` - - Use standard Dask ingestion with Pandas, then convert to cuDF (For - Parquet and other formats this is often decently fast) -- Linear operations - - Element-wise operations: ``df.x + df.y``, ``df ** 2`` - - Assignment: ``df['z'] = df.x + df.y`` - - Row-wise selections: ``df[df.x > 0]`` - - Loc: ``df.loc['2001-01-01': '2005-02-02']`` - - Date time/string accessors: ``df.timestamp.dt.dayofweek`` - - ... and most similar operations in this category that are already implemented in cuDF -- Reductions - - Like ``sum``, ``mean``, ``max``, ``count``, and so on on ``Series`` objects - - Support for reductions on full dataframes - - ``std`` - - Custom reductions with [dask.dataframe.reduction](http://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.Series.reduction) -- Groupby aggregations - - On single columns: ``df.groupby('x').y.max()`` - - With custom aggregations: - - groupby standard deviation - - grouping on multiple columns - - groupby agg for multiple outputs -- Joins: - - On full unsorted columns: ``left.merge(right, on='id')`` (expensive) - - On sorted indexes: ``left.merge(right, left_index=True, right_index=True)`` (fast) - - On large and small dataframes: ``left.merge(cudf_df, on='id')`` (fast) -- Rolling operations -- Converting to and from other forms - - Dask + Pandas to Dask + cuDF ``df.map_partitions(cudf.from_pandas)`` - - Dask + cuDF to Dask + Pandas ``df.map_partitions(lambda df: df.to_pandas())`` - - cuDF to Dask + cuDF: ``dask.dataframe.from_pandas(df, npartitions=20)`` - - Dask + cuDF to cuDF: ``df.compute()`` - -Additionally all generic Dask operations, like ``compute``, ``persist``, -``visualize`` and so on work regardless. - - -Developing the API ------------------- - -Above we mention the following: - -> and most similar operations in this category that are already implemented in cuDF - -This is because it is difficult to create a comprehensive list of operations in -the cuDF and Pandas libraries. The API is large enough to be difficult to track -effectively. For any operation that operates row-wise like ``fillna`` or -``query`` things will likely, but not certainly work. If operations don't work -it is often due to a slight inconsistency between Pandas and cuDF that is -generally easy to fix. We encourage users to look at the [cuDF issue -tracker](https://github.com/rapidsai/cudf/issues) to see if their issue has -already been reported and, if not, -[raise a new issue](https://github.com/rapidsai/cudf/issues/new). - - -Navigating the API ------------------- - -This project reuses the -[Dask DataFrame](https://docs.dask.org/en/latest/dataframe.html) project, which -was originally designed for Pandas, with the newer library cuDF. Because we use -the same Dask classes for both projects there are often methods that are -implemented for Pandas, but not yet for cuDF. As a result users looking at the -full Dask DataFrame API can be misleading, and often lead to frustration when -operations that are advertised in the Dask API do not work as expected with -cuDF. We apologize for this in advance. diff --git a/docs/cudf/source/dask-cudf.rst b/docs/cudf/source/basics/dask-cudf.rst similarity index 100% rename from docs/cudf/source/dask-cudf.rst rename to docs/cudf/source/basics/dask-cudf.rst diff --git a/docs/cudf/source/basics/index.rst b/docs/cudf/source/basics/index.rst index d4cb7259d17..b08fa1f9988 100644 --- a/docs/cudf/source/basics/index.rst +++ b/docs/cudf/source/basics/index.rst @@ -8,7 +8,6 @@ Basics basics io.rst - groupby.md - dask-cudf.md - internals.md - + groupby.rst + dask-cudf.rst + internals.rst diff --git a/docs/cudf/source/basics/internals.md b/docs/cudf/source/basics/internals.md deleted file mode 100644 index d0a2a324c17..00000000000 --- a/docs/cudf/source/basics/internals.md +++ /dev/null @@ -1,194 +0,0 @@ -cuDF internals -============== - -The cuDF API closely matches that of the [Pandas](https://pandas.pydata.org/) library. -Thus, we have the types `cudf.Series`, `cudf.DataFrame` and `cudf.Index` which look and -feel very much like their Pandas counterparts. - -Under the hood, however, cuDF uses data structures very different from Pandas. In this document, -we describe these internal data structures. - -## Column - -Columns are cuDF's core data structure and they are modeled after -the [Apache Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html). - -A column represents a sequence of values, any number of which may be "null". Columns are -specialized based on the type of data they contain. Thus we have `NumericalColumn`, `StringColumn`, -`DatetimeColumn`, etc., - -A column is composed of the following: - -* A **data type**, specifying the type of each element. -* A **data buffer** that may store the data for the column elements. - Some column types do not have a data buffer, instead storing data in the children columns. -* A **mask buffer** whose bits represent the validity (null or not null) of each element. - Columns whose elements are all "valid" may not have a mask buffer. Mask buffers are padded - to 64 bytes. -* A tuple of **children** columns, which enable the representation complex types such as - columns with non-fixed width elements such as strings or lists. -* A **size** indicating the number of elements in the column. -* An integer **offset**: a column may represent a "slice" of another column, - in which case this offset represents the first element of the slice. The size of - the column then gives the extent of the slice. A column that is not a slice - has an offset of 0. - -For example, the `NumericalColumn` backing a Series with 1000 elements of type 'int32' -and containing nulls is composed of: - -1. A data buffer of size 4000 bytes (sizeof(int32) * 1000) -2. A mask buffer of size 128 bytes (1000/8 padded to a multiple of 64 bytes) -3. No children columns - -As another example, the `StringColumn` backing the Series -`['do', 'you', 'have', 'any', 'cheese?']` is composed of: - -1. No data buffer -2. No mask buffer as there are no nulls in the Series -3. Two children columns: - - A column of 8-bit characters `['d', 'o', 'y', 'o', 'u', h' ... '?']` - - A column of "offsets" to the characters column (in this case, `[0, 2, 5, 9, 12, 19]`) - -## Buffer - -The data and mask buffers of a column represent data in GPU memory (a.k.a *device memory*), -and are object of type `cudf.core.buffer.Buffer`. - -Buffers can be constructed from array-like objects that live either on the host (e.g., numpy arrays) -or the device (e.g., cupy arrays). Arrays must be of `uint8` dtype or viewed as such. - -When constructing a Buffer from a host object such as a numpy array, new device memory is allocated: - -```python ->>> from cudf.core.buffer import Buffer ->>> buf = Buffer(np.array([1, 2, 3], dtype='int64').view("uint8")) ->>> print(buf.ptr) # address of new device memory allocation -140050901762560 ->>> print(buf.size) -24 ->>> print(buf._owner) - -``` - -cuDF uses the [RMM](https://github.com/rapidsai/rmm) library for allocating device memory. -You can read more about device memory allocation with RMM -[here](https://github.com/rapidsai/rmm#devicebuffers). - -When constructing a Buffer from a device object such as a CuPy array, no new device memory is -allocated. Instead, the Buffer points to the existing allocation, keeping a reference to the device -array: - -```python ->>> import cupy as cp ->>> c_ary = cp.asarray([1, 2, 3], dtype='int64') ->>> buf = Buffer(c_ary.view("uint8")) ->>> print(c_ary.data.mem.ptr) -140050901762560 ->>> print(buf.ptr) -140050901762560 ->>> print(buf.size) -24 ->>> print(buf._owner is c_ary) -True -``` - -An uninitialized block of device memory can be allocated with `Buffer.empty`: - -```python ->>> buf = Buffer.empty(10) ->>> print(buf.size) -10 ->>> print(buf._owner) - -``` - -## ColumnAccessor - -cuDF `Series`, `DataFrame` and `Index` are all subclasses of an internal `Frame` class. -The underlying data structure of `Frame` is an ordered, dictionary-like object -known as `ColumnAccessor`, which can be accessed via the `._data` attribute: - -```python ->>> a = cudf.DataFrame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) ->>> a._data -ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) -``` - -ColumnAccessor is an ordered mapping of column labels to columns. In addition to behaving -like an OrderedDict, it supports things like selecting multiple columns (both by index and label), as well as hierarchical indexing. - -```python ->>> from cudf.core.column_accessor import ColumnAccessor -``` - -The values of a ColumnAccessor are coerced to Columns during construction: - -```python ->>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c']}) ->>> ca['x'] - ->>> ca['y'] - ->>> ca.pop('x') - ->>> ca -ColumnAccessor(OrderedColumnDict([('y', )]), multiindex=False, level_names=(None,)) -``` - -Columns can be inserted at a specified location: - -```python ->>> ca.insert('z', [3, 4, 5], loc=1) ->>> ca -ColumnAccessor(OrderedColumnDict([('x', ), ('z', ), ('y', )]), multiindex=False, level_names=(None,)) -``` - -Selecting columns by index: - -```python ->>> ca = ColumnAccessor({'x': [1, 2, 3], 'y': ['a', 'b', 'c'], 'z': [4, 5, 6]}) ->>> ca.select_by_index(1) -ColumnAccessor(OrderedColumnDict([('y', )]), multiindex=False, level_names=(None,)) ->>> ca.select_by_index([0, 1]) -ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) ->>> ca.select_by_index(slice(1, 3)) -ColumnAccessor(OrderedColumnDict([('y', ), ('z', )]), multiindex=False, level_names=(None,)) -``` - -Selecting columns by label: - -```python ->>> ca.select_by_label(['y', 'z']) -ColumnAccessor(OrderedColumnDict([('y', ), ('z', )]), multiindex=False, level_names=(None,)) ->>> ca.select_by_label(slice('x', 'y')) -ColumnAccessor(OrderedColumnDict([('x', ), ('y', )]), multiindex=False, level_names=(None,)) -``` - -A ColumnAccessor with tuple keys (and constructed with `multiindex=True`) -can be hierarchically indexed: - -```python ->>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], 'b': [4, 5, 6]}, multiindex=True) ->>> ca.select_by_label('a') -ColumnAccessor(OrderedColumnDict([('b', ), ('c', )]), multiindex=False, level_names=(None,)) ->>> ca.select_by_label(('a', 'b')) -ColumnAccessor(OrderedColumnDict([(('a', 'b'), )]), multiindex=False, level_names=(None,)) -``` - -"Wildcard" indexing is also allowed: - -```python ->>> ca = ColumnAccessor({('a', 'b'): [1, 2, 3], ('a', 'c'): [2, 3, 4], ('d', 'b'): [4, 5, 6]}, multiindex=True) ->>> ca.select_by_label((slice(None), 'b')) -ColumnAccessor(OrderedColumnDict([(('a', 'b'), ), (('d', 'b'), )]), multiindex=True, level_names=(None, None)) -``` - -Finally, ColumnAccessors can convert to Pandas `Index` or `MultiIndex` objects: - -```python ->>> ca.to_pandas_index() -MultiIndex([('a', 'b'), - ('a', 'c'), - ('d', 'b')], - ) -``` diff --git a/docs/cudf/source/internals.rst b/docs/cudf/source/basics/internals.rst similarity index 100% rename from docs/cudf/source/internals.rst rename to docs/cudf/source/basics/internals.rst diff --git a/docs/cudf/source/io-gds-integration.rst b/docs/cudf/source/basics/io-gds-integration.rst similarity index 100% rename from docs/cudf/source/io-gds-integration.rst rename to docs/cudf/source/basics/io-gds-integration.rst diff --git a/docs/cudf/source/basics/io-supported-types.rst b/docs/cudf/source/basics/io-supported-types.rst index e09e155ef92..a74f3239044 100644 --- a/docs/cudf/source/basics/io-supported-types.rst +++ b/docs/cudf/source/basics/io-supported-types.rst @@ -5,58 +5,62 @@ The following table lists are compatible cudf types for each supported IO format .. rst-class:: io-supported-types-table .. table:: - :widths: 15 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 + :widths: 15 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+-----------------+--------+--------+--------+--------+ - | | CSV | Parquet | JSON | ORC | AVRO | HDF | DLPack | Feather | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | Data Type | Writer | Reader | Writer | Reader | Writer | Reader | Writer | Reader | Reader | Writer | Reader | Writer | Reader | Writer | Reader | - +=======================+========+========+========+========+========+========+========+========+========+========+========+========+========+========+========+ - | int8 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | int16 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | int32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | int64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | uint8 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | uint16 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | uint32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | uint64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | float32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | float64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | bool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | str | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | category | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | list | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | timedelta64[s] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | timedelta64[ms] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | timedelta64[us] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | timedelta64[ns] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | datetime64[s] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | datetime64[ms] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | datetime64[us] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | datetime64[ns] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | struct | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ - | decimal | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | - +-----------------------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+--------+ + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+-------------------+--------+--------+---------+---------+ + | | CSV | Parquet | JSON | ORC | AVRO | HDF | DLPack | Feather | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | Data Type | Writer | Reader | Writer | Reader | Writer¹ | Reader | Writer | Reader | Reader | Writer¹ | Reader¹ | Writer | Reader | Writer¹ | Reader¹ | + +=======================+========+========+========+========+=========+========+========+========+========+=========+=========+========+========+=========+=========+ + | int8 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | int16 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | int32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | int64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | uint8 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | uint16 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | uint32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | uint64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | float32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | float64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | bool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | str | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | category | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | list | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | timedelta64[s] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | timedelta64[ms] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | timedelta64[us] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | timedelta64[ns] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | datetime64[s] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | datetime64[ms] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | datetime64[us] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | datetime64[ns] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | struct | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + | decimal64 | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | + +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ + +**Notes:** + +* [¹] - Not GPU-accelerated. diff --git a/docs/cudf/source/basics/io.rst b/docs/cudf/source/basics/io.rst index 5186473ae10..e88162d8f52 100644 --- a/docs/cudf/source/basics/io.rst +++ b/docs/cudf/source/basics/io.rst @@ -8,4 +8,5 @@ This page contains Input / Output related APIs in cuDF. :maxdepth: 2 :caption: Contents: - io-supported-types.rst \ No newline at end of file + io-supported-types.rst + io-gds-integration.rst \ No newline at end of file diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 43ba83d8c46..dd2c4f4deed 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -114,15 +114,15 @@ # on_rtd is whether we are on readthedocs.org on_rtd = os.environ.get("READTHEDOCS", None) == "True" -# if not on_rtd: -# # only import and set the theme if we're building docs locally -# # otherwise, readthedocs.org uses their theme by default, -# # so no need to specify it -# import sphinx_rtd_theme -# -# html_theme = "pydata_sphinx_theme" -# # html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] -# +if not on_rtd: + # only import and set the theme if we're building docs locally + # otherwise, readthedocs.org uses their theme by default, + # so no need to specify it + import pydata_sphinx_theme + + html_theme = "pydata_sphinx_theme" + html_theme_path = pydata_sphinx_theme.get_html_theme_path() + # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -242,7 +242,7 @@ def ignore_internal_references(app, env, node, contnode): def setup(app): - app.add_css_file("params.css") + # app.add_css_file("params.css") app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") app.connect("doctree-read", resolve_aliases) app.connect("missing-reference", ignore_internal_references) diff --git a/docs/cudf/source/groupby.rst b/docs/cudf/source/groupby.rst deleted file mode 100644 index a6ce9db6817..00000000000 --- a/docs/cudf/source/groupby.rst +++ /dev/null @@ -1,237 +0,0 @@ -GroupBy -======= - -cuDF supports a small (but important) subset of Pandas' `groupby -API `__. - -Summary of supported operations -------------------------------- - -1. Grouping by one or more columns -2. Basic aggregations such as "sum", "mean", etc. -3. Quantile aggregation -4. A "collect" or ``list`` aggregation for collecting values in a group - into lists -5. Automatic exclusion of columns with unsupported dtypes ("nuisance" - columns) when aggregating -6. Iterating over the groups of a GroupBy object -7. ``GroupBy.groups`` API that returns a mapping of group keys to row - labels -8. ``GroupBy.apply`` API for performing arbitrary operations on each - group. Note that this has very limited functionality compared to the - equivalent Pandas function. See the section on - `apply <#groupby-apply>`__ for more details. -9. ``GroupBy.pipe`` similar to - `Pandas `__. - -Grouping --------- - -A GroupBy object is created by grouping the values of a ``Series`` or -``DataFrame`` by one or more columns: - -.. code:: python - - import cudf - - >>> df = cudf.DataFrame({'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]}) - >>> df - >>> gb1 = df.groupby('a') # grouping by a single column - >>> gb2 = df.groupby(['a', 'b']) # grouping by multiple columns - >>> gb3 = df.groupby(cudf.Series(['a', 'a', 'b', 'b', 'b'])) # grouping by an external column - -.. warning:: - - cuDF uses `sort=False` by default to achieve better performance, which provides no gaurentee to the group order in outputs. This deviates from Pandas default behavior. - - For example: - - .. code-block:: python - - >>> df = cudf.DataFrame({'a' : [2, 2, 1], 'b' : [42, 21, 11]}) - >>> df.groupby('a').sum() - b - a - 2 63 - 1 11 - >>> df.to_pandas().groupby('a').sum() - b - a - 1 11 - 2 63 - - Setting `sort=True` will produce Pandas-like output, but with some performance penalty: - - .. code-block:: python - - >>> df.groupby('a', sort=True).sum() - b - a - 1 11 - 2 63 - -Grouping by index levels -~~~~~~~~~~~~~~~~~~~~~~~~ - -You can also group by one or more levels of a MultiIndex: - -.. code:: python - - >>> df = cudf.DataFrame( - ... {'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]} - ... ).set_index(['a', 'b']) - ... - >>> df.groupby(level='a') - -The ``Grouper`` object -~~~~~~~~~~~~~~~~~~~~~~ - -A ``Grouper`` can be used to disambiguate between columns and levels -when they have the same name: - -.. code:: python - - >>> df - b c - b - 1 1 1 - 1 1 2 - 1 2 3 - 2 2 4 - 2 3 5 - >>> df.groupby('b', level='b') # ValueError: Cannot specify both by and level - >>> df.groupby([cudf.Grouper(key='b'), cudf.Grouper(level='b')]) # OK - -Aggregation ------------ - -Aggregations on groups is supported via the ``agg`` method: - -.. code:: python - - >>> df - a b c - 0 1 1 1 - 1 1 1 2 - 2 1 2 3 - 3 2 2 4 - 4 2 3 5 - >>> df.groupby('a').agg('sum') - b c - a - 1 4 6 - 2 5 9 - >>> df.groupby('a').agg({'b': ['sum', 'min'], 'c': 'mean'}) - b c - sum min mean - a - 1 4 1 2.0 - 2 5 2 4.5 - -The following table summarizes the available aggregations and the types -that support them: - -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| Aggregations / dtypes | Numeric | Datetime | String | Categorical | List | Struct | Interval | Decimal | -+====================================+===========+============+==========+===============+========+==========+============+===========+ -| count | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| size | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| sum | ✅ | ✅ | | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| idxmin | ✅ | ✅ | | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| idxmax | ✅ | ✅ | | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| min | ✅ | ✅ | ✅ | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| max | ✅ | ✅ | ✅ | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| mean | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| var | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| std | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| quantile | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| median | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| nunique | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| nth | ✅ | ✅ | ✅ | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| collect | ✅ | ✅ | ✅ | | ✅ | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| unique | ✅ | ✅ | ✅ | ✅ | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ - -GroupBy apply -------------- - -To apply function on each group, use the ``GroupBy.apply()`` method: - -.. code:: python - - >>> df - a b c - 0 1 1 1 - 1 1 1 2 - 2 1 2 3 - 3 2 2 4 - 4 2 3 5 - >>> df.groupby('a').apply(lambda x: x.max() - x.min()) - a b c - a - 0 0 1 2 - 1 0 1 1 - -Limitations -~~~~~~~~~~~ - -- ``apply`` works by applying the provided function to each group - sequentially, and concatenating the results together. **This can be - very slow**, especially for a large number of small groups. For a - small number of large groups, it can give acceptable performance - -- The results may not always match Pandas exactly. For example, cuDF - may return a ``DataFrame`` containing a single column where Pandas - returns a ``Series``. Some post-processing may be required to match - Pandas behavior. - -- cuDF does not support some of the exceptional cases that Pandas - supports with ``apply``, such as calling |describe|_ inside the - callable. - - .. |describe| replace:: ``describe`` - .. _describe: https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#flexible-apply - -Rolling window calculations ---------------------------- - -Use the ``GroupBy.rolling()`` method to perform rolling window -calculations on each group: - -.. code:: python - - >>> df - a b c - 0 1 1 1 - 1 1 1 2 - 2 1 2 3 - 3 2 2 4 - 4 2 3 5 - -Rolling window sum on each group with a window size of 2: - -.. code:: python - - >>> df.groupby('a').rolling(2).sum() - a b c - a - 1 0 - 1 2 2 3 - 2 2 3 5 - 2 3 - 4 4 5 9 diff --git a/docs/cudf/source/guide-to-udfs.ipynb b/docs/cudf/source/guide-to-udfs.ipynb deleted file mode 100644 index 3299414ac7e..00000000000 --- a/docs/cudf/source/guide-to-udfs.ipynb +++ /dev/null @@ -1,1716 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Overview of User Defined Functions with cuDF" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Like many tabular data processing APIs, cuDF provides a range of composable, DataFrame style operators. While out of the box functions are flexible and useful, it is sometimes necessary to write custom code, or user-defined functions (UDFs), that can be applied to rows, columns, and other groupings of the cells making up the DataFrame.\n", - "\n", - "In conjunction with the broader GPU PyData ecosystem, cuDF provides interfaces to run UDFs on a variety of data structures. Currently, we can only execute UDFs on numeric and Boolean typed data (support for strings is being planned). This guide covers writing and executing UDFs on the following data structures:\n", - "\n", - "- Series\n", - "- DataFrame\n", - "- Rolling Windows Series\n", - "- Groupby DataFrames\n", - "- CuPy NDArrays\n", - "- Numba DeviceNDArrays\n", - "\n", - "It also demonstrates cuDF's default null handling behavior, and how to write UDFs that can interact with null values in a limited fashion." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Overview\n", - "\n", - "When cuDF executes a UDF, it gets just-in-time (JIT) compiled into a CUDA kernel (either explicitly or implicitly) and is run on the GPU. Exploring CUDA and GPU architecture in-depth is out of scope for this guide. At a high level:\n", - "\n", - "- Compute is spread across multiple \"blocks\", which have access to both global memory and their own block local memory\n", - "- Within each block, many \"threads\" operate independently and simultaneously access their block-specific shared memory with low latency\n", - "\n", - "\n", - "This guide covers APIs that automatically handle dividing columns into chunks and assigning them into different GPU blocks for parallel computation (see [apply_chunks](https://docs.rapids.ai/api/cudf/stable/api.html#cudf.core.dataframe.DataFrame.apply_chunks) or the [numba CUDA JIT API](https://numba.pydata.org/numba-doc/dev/cuda/index.html) if you need to control this yourself)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Series UDFs\n", - "\n", - "You can execute UDFs on Series in two ways:\n", - "\n", - "- Writing a standard Python function and using `applymap`\n", - "- Writing a Numba kernel and using Numba's `forall` syntax\n", - "\n", - "Using `applymap` is simpler, but writing a Numba kernel offers the flexibility to build more complex functions (we'll be writing only simple kernels in this guide).\n", - "\n", - "Let's start by importing a few libraries and creating a DataFrame of several Series." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
0-0.691674TrueDan
10.480099FalseBob
2-0.473370TrueXavier
30.067479TrueAlice
4-0.970850FalseSarah
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 -0.691674 True Dan\n", - "1 0.480099 False Bob\n", - "2 -0.473370 True Xavier\n", - "3 0.067479 True Alice\n", - "4 -0.970850 False Sarah" - ] - }, - "execution_count": 1, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import numpy as np\n", - "\n", - "import cudf\n", - "from cudf.datasets import randomdata \n", - "\n", - "df = randomdata(nrows=10, dtypes={'a':float, 'b':bool, 'c':str}, seed=12)\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we'll define a basic Python function and call it as a UDF with `applymap`." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def udf(x):\n", - " if x > 0:\n", - " return x + 5\n", - " else:\n", - " return x - 5" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 -5.691674\n", - "1 5.480099\n", - "2 -5.473370\n", - "3 5.067479\n", - "4 -5.970850\n", - "5 5.837494\n", - "6 5.801430\n", - "7 -5.933157\n", - "8 5.913899\n", - "9 -5.725581\n", - "Name: a, dtype: float64" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df['a'].applymap(udf)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "That's all there is to it. For more complex UDFs, though, we'd want to write an actual Numba kernel.\n", - "\n", - "For more complex logic (for instance, accessing values from multiple input columns or rows, you'll need to use a more complex API. There are several types. First we'll cover writing and running a Numba JITed CUDA kernel.\n", - "\n", - "The easiest way to write a Numba kernel is to use `cuda.grid(1)` to manage our thread indices, and then leverage Numba's `forall` method to configure the kernel for us. Below, define a basic multiplication kernel as an example and use `@cuda.jit` to compile it." - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "from numba import cuda\n", - "\n", - "@cuda.jit\n", - "def multiply(in_col, out_col, multiplier):\n", - " i = cuda.grid(1)\n", - " if i < in_col.size: # boundary guard\n", - " out_col[i] = in_col[i] * multiplier" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This kernel will take an input array, multiply it by a configurable value (supplied at runtime), and store the result in an output array. Notice that we wrapped our logic in an `if` statement. Because we can launch more threads than the size of our array, we need to make sure that we don't use threads with an index that would be out of bounds. Leaving this out can result in undefined behavior.\n", - "\n", - "To execute our kernel, we just need to pre-allocate an output array and leverage the `forall` method mentioned above. First, we create a Series of all `0.0` in our DataFrame, since we want `float64` output. Next, we run the kernel with `forall`. `forall` requires us to specify our desired number of tasks, so we'll supply in the length of our Series (which we store in `size`). The [__cuda_array_interface__](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) is what allows us to directly call our Numba kernel on our Series." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "size = len(df['a'])\n", - "df['e'] = 0.0\n", - "multiply.forall(size)(df['a'], df['e'], 10.0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "After calling our kernel, our DataFrame is now populated with the result." - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abce
0-0.691674TrueDan-6.916743
10.480099FalseBob4.800994
2-0.473370TrueXavier-4.733700
30.067479TrueAlice0.674788
4-0.970850FalseSarah-9.708501
\n", - "
" - ], - "text/plain": [ - " a b c e\n", - "0 -0.691674 True Dan -6.916743\n", - "1 0.480099 False Bob 4.800994\n", - "2 -0.473370 True Xavier -4.733700\n", - "3 0.067479 True Alice 0.674788\n", - "4 -0.970850 False Sarah -9.708501" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Note that, while we're operating on the Series `df['e']`, the kernel executes on the [DeviceNDArray](https://numba.pydata.org/numba-doc/dev/cuda/memory.html#device-arrays) \\\"underneath\\\" the Series. If you ever need to access the underlying DeviceNDArray of a Series, you can do so with `Series.data.mem`. We'll use this during an example in the Null Handling section of this guide." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## DataFrame UDFs\n", - "\n", - "We could apply a UDF on a DataFrame like we did above with `forall`. We'd need to write a kernel that expects multiple inputs, and pass multiple Series as arguments when we execute our kernel. Because this is fairly common and can be difficult to manage, cuDF provides two APIs to streamline this: `apply_rows` and `apply_chunks`. Below, we walk through an example of using `apply_rows`. `apply_chunks` works in a similar way, but also offers more control over low-level kernel behavior.\n", - "\n", - "Now that we have two numeric columns in our DataFrame, let's write a kernel that uses both of them." - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "def conditional_add(x, y, out):\n", - " for i, (a, e) in enumerate(zip(x, y)):\n", - " if a > 0:\n", - " out[i] = a + e\n", - " else:\n", - " out[i] = a" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Notice that we need to `enumerate` through our `zipped` function arguments (which either match or are mapped to our input column names). We can pass this kernel to `apply_rows`. We'll need to specify a few arguments:\n", - "- incols\n", - " - A list of names of input columns that match the function arguments. Or, a dictionary mapping input column names to their corresponding function arguments such as `{'col1': 'arg1'}`.\n", - "- outcols\n", - " - A dictionary defining our output column names and their data types. These names must match our function arguments.\n", - "- kwargs (optional)\n", - " - We can optionally pass keyword arguments as a dictionary. Since we don't need any, we pass an empty one.\n", - " \n", - "While it looks like our function is looping sequentially through our columns, it actually executes in parallel in multiple threads on the GPU. This parallelism is the heart of GPU-accelerated computing. With that background, we're ready to use our UDF." - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abceout
0-0.691674TrueDan-6.916743-0.691674
10.480099FalseBob4.8009945.281093
2-0.473370TrueXavier-4.733700-0.473370
30.067479TrueAlice0.6747880.742267
4-0.970850FalseSarah-9.708501-0.970850
\n", - "
" - ], - "text/plain": [ - " a b c e out\n", - "0 -0.691674 True Dan -6.916743 -0.691674\n", - "1 0.480099 False Bob 4.800994 5.281093\n", - "2 -0.473370 True Xavier -4.733700 -0.473370\n", - "3 0.067479 True Alice 0.674788 0.742267\n", - "4 -0.970850 False Sarah -9.708501 -0.970850" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = df.apply_rows(conditional_add, \n", - " incols={'a':'x', 'e':'y'},\n", - " outcols={'out': np.float64},\n", - " kwargs={}\n", - " )\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As expected, we see our conditional addition worked. At this point, we've successfully executed UDFs on the core data structures of cuDF." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Rolling Window UDFs\n", - "\n", - "For time-series data, we may need to operate on a small \\\"window\\\" of our column at a time, processing each portion independently. We could slide (\\\"roll\\\") this window over the entire column to answer questions like \\\"What is the 3-day moving average of a stock price over the past year?\"\n", - "\n", - "We can apply more complex functions to rolling windows to `rolling` Series and DataFrames using `apply`. This example is adapted from cuDF's [API documentation](https://docs.rapids.ai/api/cudf/stable/api.html#cudf.core.dataframe.DataFrame.rolling). First, we'll create an example Series and then create a `rolling` object from the Series." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 16.0\n", - "1 25.0\n", - "2 36.0\n", - "3 49.0\n", - "4 64.0\n", - "5 81.0\n", - "dtype: float64" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ser = cudf.Series([16, 25, 36, 49, 64, 81], dtype='float64')\n", - "ser" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Rolling [window=3,min_periods=3,center=False]" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rolling = ser.rolling(window=3, min_periods=3, center=False)\n", - "rolling" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we'll define a function to use on our rolling windows. We created this one to highlight how you can include things like loops, mathematical functions, and conditionals. Rolling window UDFs do not yet support null values." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "import math\n", - "\n", - "def example_func(window):\n", - " b = 0\n", - " for a in window:\n", - " b = max(b, math.sqrt(a))\n", - " if b == 8:\n", - " return 100 \n", - " return b" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can execute the function by passing it to `apply`. With `window=3`, `min_periods=3`, and `center=False`, our first two values are `null`." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 null\n", - "1 null\n", - "2 6.0\n", - "3 7.0\n", - "4 100.0\n", - "5 9.0\n", - "dtype: float64" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rolling.apply(example_func)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can apply this function to every column in a DataFrame, too." - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
055.055.0
156.056.0
257.057.0
358.058.0
459.059.0
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 55.0 55.0\n", - "1 56.0 56.0\n", - "2 57.0 57.0\n", - "3 58.0 58.0\n", - "4 59.0 59.0" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 = cudf.DataFrame()\n", - "df2['a'] = np.arange(55, 65, dtype='float64')\n", - "df2['b'] = np.arange(55, 65, dtype='float64')\n", - "df2.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
0nullnull
1nullnull
27.5498344357.549834435
37.6157731067.615773106
47.6811457487.681145748
57.7459666927.745966692
67.8102496767.810249676
77.8740078747.874007874
87.9372539337.937253933
9100.0100.0
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 null null\n", - "1 null null\n", - "2 7.549834435 7.549834435\n", - "3 7.615773106 7.615773106\n", - "4 7.681145748 7.681145748\n", - "5 7.745966692 7.745966692\n", - "6 7.810249676 7.810249676\n", - "7 7.874007874 7.874007874\n", - "8 7.937253933 7.937253933\n", - "9 100.0 100.0" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "rolling = df2.rolling(window=3, min_periods=3, center=False)\n", - "rolling.apply(example_func)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## GroupBy DataFrame UDFs\n", - "\n", - "We can also apply UDFs to grouped DataFrames using `apply_grouped`. This example is also drawn and adapted from the RAPIDS [API documentation](https://docs.rapids.ai/api/cudf/stable/api.html#cudf.core.groupby.groupby.GroupBy.apply_grouped).\n", - "\n", - "First, we'll group our DataFrame based on column `b`, which is either True or False. Note that we currently need to pass `method=\"cudf\"` to use UDFs with GroupBy objects." - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abceout
0-0.691674TrueDan-6.916743-0.691674
10.480099FalseBob4.8009945.281093
2-0.473370TrueXavier-4.733700-0.473370
30.067479TrueAlice0.6747880.742267
4-0.970850FalseSarah-9.708501-0.970850
\n", - "
" - ], - "text/plain": [ - " a b c e out\n", - "0 -0.691674 True Dan -6.916743 -0.691674\n", - "1 0.480099 False Bob 4.800994 5.281093\n", - "2 -0.473370 True Xavier -4.733700 -0.473370\n", - "3 0.067479 True Alice 0.674788 0.742267\n", - "4 -0.970850 False Sarah -9.708501 -0.970850" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/envs/rapids/lib/python3.7/site-packages/cudf/core/dataframe.py:2559: UserWarning: as_index==True not supported due to the lack of multi-index with legacy groupby function. Use hash method for multi-index\n", - " \"as_index==True not supported due to the lack of \"\n" - ] - } - ], - "source": [ - "grouped = df.groupby(['b'], method=\"cudf\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next we'll define a function to apply to each group independently. In this case, we'll take the rolling average of column `e`, and call that new column `rolling_avg_e`." - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], - "source": [ - "def rolling_avg(e, rolling_avg_e):\n", - " win_size = 3\n", - " for i in range(cuda.threadIdx.x, len(e), cuda.blockDim.x):\n", - " if i < win_size - 1:\n", - " # If there is not enough data to fill the window,\n", - " # take the average to be NaN\n", - " rolling_avg_e[i] = np.nan\n", - " else:\n", - " total = 0\n", - " for j in range(i - win_size + 1, i + 1):\n", - " total += e[j]\n", - " rolling_avg_e[i] = total / win_size" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can execute this with a very similar API to `apply_rows`. This time, though, it's going to execute independently for each group." - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abceoutrolling_avg_e
00.480099FalseBob4.8009945.281093NaN
1-0.970850FalseSarah-9.708501-0.970850NaN
20.801430FalseSarah8.0142978.8157271.035597
3-0.933157FalseQuinn-9.331571-0.933157-3.675258
4-0.691674TrueDan-6.916743-0.691674NaN
5-0.473370TrueXavier-4.733700-0.473370NaN
60.067479TrueAlice0.6747880.742267-3.658552
70.837494TrueWendy8.3749409.2124341.438676
80.913899TrueUrsula9.13898710.0528856.062905
9-0.725581TrueGeorge-7.255814-0.7255813.419371
\n", - "
" - ], - "text/plain": [ - " a b c e out rolling_avg_e\n", - "0 0.480099 False Bob 4.800994 5.281093 NaN\n", - "1 -0.970850 False Sarah -9.708501 -0.970850 NaN\n", - "2 0.801430 False Sarah 8.014297 8.815727 1.035597\n", - "3 -0.933157 False Quinn -9.331571 -0.933157 -3.675258\n", - "4 -0.691674 True Dan -6.916743 -0.691674 NaN\n", - "5 -0.473370 True Xavier -4.733700 -0.473370 NaN\n", - "6 0.067479 True Alice 0.674788 0.742267 -3.658552\n", - "7 0.837494 True Wendy 8.374940 9.212434 1.438676\n", - "8 0.913899 True Ursula 9.138987 10.052885 6.062905\n", - "9 -0.725581 True George -7.255814 -0.725581 3.419371" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results = grouped.apply_grouped(rolling_avg,\n", - " incols=['e'],\n", - " outcols=dict(rolling_avg_e=np.float64))\n", - "results" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Notice how, with a window size of three in the kernel, the first two values in each group for our output column are null." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Numba Kernels on CuPy Arrays\n", - "\n", - "We can also execute Numba kernels on CuPy NDArrays, again thanks to the `__cuda_array_interface__`. We can even run the same UDF on the Series and the CuPy array. First, we define a Series and then create a CuPy array from that Series." - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 1., 2., 3., 4., 10.])" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import cupy as cp\n", - "\n", - "s = cudf.Series([1.0, 2, 3, 4, 10])\n", - "arr = cp.asarray(s)\n", - "arr" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we define a UDF and execute it on our Series. We need to allocate a Series of the same size for our output, which we'll call `out`." - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 5\n", - "1 10\n", - "2 15\n", - "3 20\n", - "4 50\n", - "dtype: int32" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cudf.utils import cudautils\n", - "\n", - "@cuda.jit\n", - "def multiply_by_5(x, out):\n", - " i = cuda.grid(1)\n", - " if i < x.size:\n", - " out[i] = x[i] * 5\n", - " \n", - "out = cudf.Series(cudautils.zeros(len(s), dtype='int32'))\n", - "multiply_by_5.forall(s.shape[0])(s, out)\n", - "out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Finally, we execute the same function on our array. We allocate an empty array `out` to store our results." - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([ 5., 10., 15., 20., 50.])" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out = cp.empty_like(arr)\n", - "multiply_by_5.forall(arr.size)(arr, out)\n", - "out" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Null Handling in UDFs\n", - "\n", - "Above, we covered most basic usage of UDFs with cuDF.\n", - "\n", - "The remainder of the guide focuses on considerations for executing UDFs on DataFrames containing null values. If your UDFs will read or write any column containing nulls, **you should read this section carefully**.\n", - "\n", - "Writing UDFs that can handle null values is complicated by the fact that a separate bitmask is used to identify when a value is valid and when it's null. By default, DataFrame methods for applying UDFs like `apply_rows` will handle nulls pessimistically (all rows with a null value will be removed from the output if they are used in the kernel). Exploring how not handling not pessimistically can lead to undefined behavior is outside the scope of this guide. Suffice it to say, pessimistic null handling is the safe and consistent approach. You can see an example below." - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abc
09631005997
19771026null
2null10261019
31078null985
49799821011
\n", - "
" - ], - "text/plain": [ - " a b c\n", - "0 963 1005 997\n", - "1 977 1026 null\n", - "2 null 1026 1019\n", - "3 1078 null 985\n", - "4 979 982 1011" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "def gpu_add(a, b, out):\n", - " for i, (x, y) in enumerate(zip(a, b)):\n", - " out[i] = x + y\n", - "\n", - "df = randomdata(nrows=5, dtypes={'a':int, 'b':int, 'c':int}, seed=12)\n", - "df.loc[2, 'a'] = None\n", - "df.loc[3, 'b'] = None\n", - "df.loc[1, 'c'] = None\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In the dataframe above, there are three null values. Each column has a null in a different row. When we use our UDF with `apply_rows`, our output should have two nulls due to pessimistic null handling (because we're not using column `c`, the null value there does not matter to us)." - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abcout
096310059971968.0
19771026null2003.0
2null10261019null
31078null985null
497998210111961.0
\n", - "
" - ], - "text/plain": [ - " a b c out\n", - "0 963 1005 997 1968.0\n", - "1 977 1026 null 2003.0\n", - "2 null 1026 1019 null\n", - "3 1078 null 985 null\n", - "4 979 982 1011 1961.0" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df = df.apply_rows(gpu_add, \n", - " incols=['a', 'b'],\n", - " outcols={'out':np.float64},\n", - " kwargs={})\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As expected, we end up with two nulls in our output. The null values from the columns we used propogated to our output, but the null from the column we ignored did not." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Operating on Null Values\n", - "\n", - "If you don't need to conditionally handle null values in your UDFs, feel free to skip these final two sections.\n", - "\n", - "As a developer or data scientist, you may sometimes need to write UDFs that operate on null values. This means you need to think about the null bitmask array when writing your UDF. As a note, cuDF allows you to turn off pessimistic null handling in `apply_rows`. Instead of doing this, if you need to operate on null values we recommend writing standard `Numba.cuda` kernels. To help you interact with null bitmasks from Python, cuDF provides the `mask_get` utility function. The following example illustrates how you can use `mask_get` in Numba kernels like we used earlier in this guide." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Standard Numba Kernels\n", - "\n", - "First, we import `mask_get` and create a DataFrame with some null values." - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ab
0-0.691674315True
10.480099393False
2nullTrue
30.067478787True
4nullFalse
\n", - "
" - ], - "text/plain": [ - " a b\n", - "0 -0.691674315 True\n", - "1 0.480099393 False\n", - "2 null True\n", - "3 0.067478787 True\n", - "4 null False" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from cudf.utils.cudautils import mask_get\n", - "\n", - "df = randomdata(nrows=10, dtypes={'a':float, 'b':bool}, seed=12)\n", - "df.loc[[2,4], 'a'] = None\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we'll define a simple kernel like before, with a couple of differences. This kernel needs access to the null bitmask, so we include a `validity_mask` argument. We also wrap our logic in a conditional based on the results of `mask_get`:\n", - "- If the result of `mask_get` for that index **is** valid (there is a value), do the multiplication\n", - "- If the result of `mask_get` for that index **is not** valid (it's null), set the output -999999" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "@cuda.jit\n", - "def gpu_kernel_masked(in_col, validity_mask, out_col, multiplier):\n", - " i = cuda.grid(1)\n", - " if i < in_col.size:\n", - " valid = mask_get(validity_mask, i)\n", - " if valid:\n", - " out_col[i] = in_col[i] * multiplier\n", - " else:\n", - " out_col[i] = -999999" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We now grab the underlying DeviceArrays and execute our kernel like we did previously, except that this time we also pass in the DeviceArray of our column's null mask. Because Numba doesn't yet handle masked GPU arrays, we can't directly pass our `Series` here." - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
abresult
0-0.691674315True-6.916743
10.480099393False4.800994
2nullTrue-999999.000000
30.067478787True0.674788
4nullFalse-999999.000000
\n", - "
" - ], - "text/plain": [ - " a b result\n", - "0 -0.691674315 True -6.916743\n", - "1 0.480099393 False 4.800994\n", - "2 null True -999999.000000\n", - "3 0.067478787 True 0.674788\n", - "4 null False -999999.000000" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import rmm # RAPIDS Memory Manager\n", - "\n", - "a_dary = df.a._column.data.mem\n", - "a_mask = df.a.nullmask.mem\n", - "output_dary = rmm.device_array_like(a_dary)\n", - "\n", - "gpu_kernel_masked.forall(output_dary.size)(a_dary, a_mask, output_dary, 10)\n", - "df['result'] = output_dary\n", - "df.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "This guide has covered a lot of content. At this point, you should hopefully feel comfortable writing UDFs (with or without null values) that operate on\n", - "\n", - "- Series\n", - "- DataFrame\n", - "- Rolling Windows\n", - "- GroupBy DataFrames\n", - "- CuPy NDArrays\n", - "- Numba DeviceNDArrays\n", - "\n", - "\n", - "For more information please see the [cuDF](https://docs.rapids.ai/api/cudf/nightly/), [Numba.cuda](https://numba.pydata.org/numba-doc/dev/cuda/index.html), and [CuPy](https://docs-cupy.chainer.org/en/stable/) documentation." - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/cudf/source/io-supported-types.rst b/docs/cudf/source/io-supported-types.rst deleted file mode 100644 index a74f3239044..00000000000 --- a/docs/cudf/source/io-supported-types.rst +++ /dev/null @@ -1,66 +0,0 @@ -I/O Supported dtypes -==================== - -The following table lists are compatible cudf types for each supported IO format. - -.. rst-class:: io-supported-types-table -.. table:: - :widths: 15 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 - - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+-------------------+--------+--------+---------+---------+ - | | CSV | Parquet | JSON | ORC | AVRO | HDF | DLPack | Feather | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | Data Type | Writer | Reader | Writer | Reader | Writer¹ | Reader | Writer | Reader | Reader | Writer¹ | Reader¹ | Writer | Reader | Writer¹ | Reader¹ | - +=======================+========+========+========+========+=========+========+========+========+========+=========+=========+========+========+=========+=========+ - | int8 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | int16 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | int32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | int64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | uint8 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | uint16 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | uint32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | uint64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | float32 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | float64 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | bool | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | str | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | category | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | list | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | timedelta64[s] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | timedelta64[ms] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | timedelta64[us] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | timedelta64[ns] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | datetime64[s] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | datetime64[ms] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | datetime64[us] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | datetime64[ns] | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | struct | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - | decimal64 | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | - +-----------------------+--------+--------+--------+--------+---------+--------+--------+--------+--------+---------+---------+--------+--------+---------+---------+ - -**Notes:** - -* [¹] - Not GPU-accelerated. diff --git a/docs/cudf/source/io.rst b/docs/cudf/source/io.rst deleted file mode 100644 index e88162d8f52..00000000000 --- a/docs/cudf/source/io.rst +++ /dev/null @@ -1,12 +0,0 @@ -~~~~~~~~~~~~~~ -Input / Output -~~~~~~~~~~~~~~ - -This page contains Input / Output related APIs in cuDF. - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - io-supported-types.rst - io-gds-integration.rst \ No newline at end of file diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb index d31a3b426d7..a7e959a05a7 100644 --- a/docs/cudf/source/user_guide/10min.ipynb +++ b/docs/cudf/source/user_guide/10min.ipynb @@ -2524,7 +2524,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Applying functions to a `Series`. Note that applying user defined functions directly with Dask-cuDF is not yet implemented. For now, you can use [map_partitions](http://docs.dask.org/en/stable/dataframe-api.html#dask.dataframe.DataFrame.map_partitions) to apply a function to each partition of the distributed dataframe." + "Applying functions to a `Series`. Note that applying user defined functions directly with Dask-cuDF is not yet implemented. For now, you can use [map_partitions](http://docs.dask.org/en/stable/generated/dask.dataframe.DataFrame.map_partitions.html) to apply a function to each partition of the distributed dataframe." ] }, { diff --git a/docs/cudf/source/user_guide/groupby.md b/docs/cudf/source/user_guide/groupby.md deleted file mode 100644 index 12d1c846329..00000000000 --- a/docs/cudf/source/user_guide/groupby.md +++ /dev/null @@ -1,200 +0,0 @@ -Using GroupBy -============= - -cuDF supports a small (but important) subset of -Pandas' [groupby API](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html). - -## Summary of supported operations - -1. Grouping by one or more columns -1. Basic aggregations such as "sum", "mean", etc. -1. Quantile aggregation -1. A "collect" or `list` aggregation for collecting values in a group into lists -1. Automatic exclusion of columns with unsupported dtypes ("nuisance" columns) when aggregating -1. Iterating over the groups of a GroupBy object -1. `GroupBy.groups` API that returns a mapping of group keys to row labels -1. `GroupBy.apply` API for performing arbitrary operations on each group. Note that - this has very limited functionality compared to the equivalent Pandas function. - See the section on [apply](#groupby-apply) for more details. -1. `GroupBy.pipe` similar to [Pandas](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#piping-function-calls). - -## How to do Grouping - -A GroupBy object is created by grouping the values of a `Series` or `DataFrame` -by one or more columns: - -```python -import cudf - ->>> df = cudf.DataFrame({'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]}) ->>> df ->>> gb1 = df.groupby('a') # grouping by a single column ->>> gb2 = df.groupby(['a', 'b']) # grouping by multiple columns ->>> gb3 = df.groupby(cudf.Series(['a', 'a', 'b', 'b', 'b'])) # grouping by an external column -``` - -``` warning:: - cuDF uses `sort=False` by default to achieve better performance, which provides no gaurentee to the group order in outputs. This deviates from Pandas default behavior. - - For example: - - .. code-block:: python - - >>> df = cudf.DataFrame({'a' : [2, 2, 1], 'b' : [42, 21, 11]}) - >>> df.groupby('a').sum() - b - a - 2 63 - 1 11 - >>> df.to_pandas().groupby('a').sum() - b - a - 1 11 - 2 63 - - Setting `sort=True` will produce Pandas-like output, but with some performance penalty: - - .. code-block:: python - - >>> df.groupby('a', sort=True).sum() - b - a - 1 11 - 2 63 - -``` - -### Grouping by index levels - -You can also group by one or more levels of a MultiIndex: - -```python ->>> df = cudf.DataFrame( -... {'a': [1, 1, 1, 2, 2], 'b': [1, 1, 2, 2, 3], 'c': [1, 2, 3, 4, 5]} -... ).set_index(['a', 'b']) -... ->>> df.groupby(level='a') -``` - -### The `Grouper` object - -A `Grouper` can be used to disambiguate between columns and levels when they have the same name: - -```python ->>> df - b c -b -1 1 1 -1 1 2 -1 2 3 -2 2 4 -2 3 5 ->>> df.groupby('b', level='b') # ValueError: Cannot specify both by and level ->>> df.groupby([cudf.Grouper(key='b'), cudf.Grouper(level='b')]) # OK -``` - -## Aggregation - -Aggregations on groups is supported via the `agg` method: - -```python ->>> df - a b c -0 1 1 1 -1 1 1 2 -2 1 2 3 -3 2 2 4 -4 2 3 5 ->>> df.groupby('a').agg('sum') - b c -a -1 4 6 -2 5 9 ->>> df.groupby('a').agg({'b': ['sum', 'min'], 'c': 'mean'}) - b c - sum min mean -a -1 4 1 2.0 -2 5 2 4.5 -``` - -The following table summarizes the available aggregations and the types that support them: - -| Aggregations\dtypes | Numeric | Datetime | String | Categorical | List | Struct | Interval | Decimal | -| ------------------- | -------- | ------- | -------- | ----------- | ---- | ------ | -------- | ------- | -| count | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -| size | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -| sum | ✅ | ✅ | | | | | | ✅ | -| idxmin | ✅ | ✅ | | | | | | ✅ | -| idxmax | ✅ | ✅ | | | | | | ✅ | -| min | ✅ | ✅ | ✅ | | | | | ✅ | -| max | ✅ | ✅ | ✅ | | | | | ✅ | -| mean | ✅ | ✅ | | | | | | | -| var | ✅ | ✅ | | | | | | | -| std | ✅ | ✅ | | | | | | | -| quantile | ✅ | ✅ | | | | | | | -| median | ✅ | ✅ | | | | | | | -| nunique | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -| nth | ✅ | ✅ | ✅ | | | | | ✅ | -| collect | ✅ | ✅ | ✅ | | ✅ | | | ✅ | -| unique | ✅ | ✅ | ✅ | ✅ | | | | | - -## GroupBy apply - -To apply function on each group, use the `GroupBy.apply()` method: - -```python ->>> df - a b c -0 1 1 1 -1 1 1 2 -2 1 2 3 -3 2 2 4 -4 2 3 5 ->>> df.groupby('a').apply(lambda x: x.max() - x.min()) - a b c -a -0 0 1 2 -1 0 1 1 -``` - -### Limitations - -* `apply` works by applying the provided function to each group sequentially, - and concatenating the results together. **This can be very slow**, especially - for a large number of small groups. For a small number of large groups, it - can give acceptable performance - -* The results may not always match Pandas exactly. For example, cuDF may return - a `DataFrame` containing a single column where Pandas returns a `Series`. - Some post-processing may be required to match Pandas behavior. - -* cuDF does not support some of the exceptional cases that Pandas supports with - `apply`, such as [`describe`](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#flexible-apply). - -## Rolling window calculations - -Use the `GroupBy.rolling()` method to perform rolling window calculations on each group: - -```python ->>> df - a b c -0 1 1 1 -1 1 1 2 -2 1 2 3 -3 2 2 4 -4 2 3 5 -``` - -Rolling window sum on each group with a window size of 2: - -```python ->>> df.groupby('a').rolling(2).sum() - a b c -a -1 0 - 1 2 2 3 - 2 2 3 5 -2 3 - 4 4 5 9 -``` diff --git a/docs/cudf/source/user_guide/index.rst b/docs/cudf/source/user_guide/index.rst index 4cd5eeb00d8..20ccaeac50a 100644 --- a/docs/cudf/source/user_guide/index.rst +++ b/docs/cudf/source/user_guide/index.rst @@ -10,4 +10,4 @@ User Guide 10min-cudf-cupy.ipynb guide-to-udfs.ipynb Working-with-missing-data.ipynb - groupby + groupby.rst diff --git a/python/cudf/requirements/cuda-11.0/dev_requirements.txt b/python/cudf/requirements/cuda-11.0/dev_requirements.txt index efb22ddd5a4..f69c246832b 100644 --- a/python/cudf/requirements/cuda-11.0/dev_requirements.txt +++ b/python/cudf/requirements/cuda-11.0/dev_requirements.txt @@ -23,6 +23,7 @@ packaging pandas>=1.0,<1.3.0dev0 pandoc==2.0a4 protobuf +pydata-sphinx-theme pyorc pytest pytest-benchmark @@ -33,7 +34,6 @@ setuptools sphinx sphinx-copybutton sphinx-markdown-tables -sphinx_rtd_theme sphinxcontrib-websupport transformers typing_extensions diff --git a/python/cudf/requirements/cuda-11.2/dev_requirements.txt b/python/cudf/requirements/cuda-11.2/dev_requirements.txt index cb88f74399f..e55dc2f921a 100644 --- a/python/cudf/requirements/cuda-11.2/dev_requirements.txt +++ b/python/cudf/requirements/cuda-11.2/dev_requirements.txt @@ -23,6 +23,7 @@ packaging pandas>=1.0,<1.3.0dev0 pandoc==2.0a4 protobuf +pydata-sphinx-theme pyorc pytest pytest-benchmark @@ -33,7 +34,6 @@ setuptools sphinx sphinx-copybutton sphinx-markdown-tables -sphinx_rtd_theme sphinxcontrib-websupport transformers typing_extensions From 98cdcdb5d1d89d108bed76c6bae1e13d297a3d84 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 12:28:15 -0700 Subject: [PATCH 20/49] update gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fabec763545..c523dcf37ff 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,4 @@ dask-worker-space/ **/*_pb2.py # Sphinx docs & build artifacts -docs/cudf/source/api_docs/generated \ No newline at end of file +docs/cudf/source/api_docs/generated/* \ No newline at end of file From bd52cfc950333690f2b7a061953f0970d05551d1 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 12:39:37 -0700 Subject: [PATCH 21/49] revert setup.py changes --- python/cudf/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/setup.py b/python/cudf/setup.py index f0c5cda2b47..54921396b6f 100644 --- a/python/cudf/setup.py +++ b/python/cudf/setup.py @@ -218,7 +218,7 @@ def run(self): extensions, nthreads=nthreads, compiler_directives=dict( - profile=False, language_level=3, embedsignature=True, binding=True + profile=False, language_level=3, embedsignature=True ), ), packages=find_packages(include=["cudf", "cudf.*"]), From b16b6933357a097065dfa155077f663fb24ada17 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 12:52:23 -0700 Subject: [PATCH 22/49] update --- docs/cudf/source/index.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/cudf/source/index.rst b/docs/cudf/source/index.rst index 1201e9e7ed3..950694f69a7 100644 --- a/docs/cudf/source/index.rst +++ b/docs/cudf/source/index.rst @@ -10,8 +10,6 @@ Welcome to cuDF's documentation! user_guide/index basics/index api_docs/index - dask-cudf.rst - internals.rst Indices and tables From bee5a3f5a8a31bfd7fc7326092b44425bf2a723d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 12:53:12 -0700 Subject: [PATCH 23/49] update gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index c523dcf37ff..aee3d072de2 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,5 @@ dask-worker-space/ **/*_pb2.py # Sphinx docs & build artifacts -docs/cudf/source/api_docs/generated/* \ No newline at end of file +docs/cudf/source/api_docs/generated/* +docs/cudf/source/api_docs/api/* \ No newline at end of file From 412c56497b9bf53f2f9973508ced05e432fbddfa Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 20 Jul 2021 15:07:15 -0700 Subject: [PATCH 24/49] fix ymls --- conda/environments/cudf_dev_cuda11.0.yml | 2 +- conda/environments/cudf_dev_cuda11.2.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml index fbfc0fb8f9c..ceb63494561 100644 --- a/conda/environments/cudf_dev_cuda11.0.yml +++ b/conda/environments/cudf_dev_cuda11.0.yml @@ -56,7 +56,7 @@ dependencies: - nvtx>=0.2.1 - cachetools - transformers - - pydata_sphinx_theme + - pydata-sphinx-theme - pip: - git+https://github.com/dask/dask.git@main - git+https://github.com/dask/distributed.git@main diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml index c5dab7c9d52..61f88c08e24 100644 --- a/conda/environments/cudf_dev_cuda11.2.yml +++ b/conda/environments/cudf_dev_cuda11.2.yml @@ -56,7 +56,7 @@ dependencies: - nvtx>=0.2.1 - cachetools - transformers - - pydata_sphinx_theme + - pydata-sphinx-theme - pip: - git+https://github.com/dask/dask.git@main - git+https://github.com/dask/distributed.git@main From 27bfee51ef69f3590b7848389f871e50dce451e7 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 21 Jul 2021 12:13:34 -0700 Subject: [PATCH 25/49] remove not needed css file --- docs/cudf/source/conf.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index e40cd097693..11a81fcf5a0 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -242,7 +242,6 @@ def ignore_internal_references(app, env, node, contnode): def setup(app): - # app.add_css_file("params.css") - app.add_css_file("https://docs.rapids.ai/assets/css/custom.css") + app.add_css_file("params.css") app.connect("doctree-read", resolve_aliases) app.connect("missing-reference", ignore_internal_references) From b6fc21f4c7f7478c528625c056c4279b68db9303 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 21 Jul 2021 13:07:33 -0700 Subject: [PATCH 26/49] add window --- docs/cudf/source/api_docs/index.rst | 1 + docs/cudf/source/api_docs/window.rst | 88 +--------------------------- 2 files changed, 3 insertions(+), 86 deletions(-) diff --git a/docs/cudf/source/api_docs/index.rst b/docs/cudf/source/api_docs/index.rst index 41aa6288887..7948f6d240c 100644 --- a/docs/cudf/source/api_docs/index.rst +++ b/docs/cudf/source/api_docs/index.rst @@ -16,4 +16,5 @@ the left sidebar to see how various elements look on this theme. groupby general_functions general_utilities + window diff --git a/docs/cudf/source/api_docs/window.rst b/docs/cudf/source/api_docs/window.rst index 6fd8eb0d5f6..9f94f620949 100644 --- a/docs/cudf/source/api_docs/window.rst +++ b/docs/cudf/source/api_docs/window.rst @@ -1,20 +1,16 @@ -{{ header }} - .. _api.window: ====== Window ====== -Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. -Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. -ExponentialMovingWindow objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc. +Rolling objects are returned by ``.rolling`` calls: :func:`cudf.DataFrame.rolling`, :func:`cudf.Series.rolling`, etc. .. _api.functions_rolling: Rolling window functions ------------------------ -.. currentmodule:: pandas.core.window.rolling +.. currentmodule:: cudf.core.window.rolling .. autosummary:: :toctree: api/ @@ -22,87 +18,7 @@ Rolling window functions Rolling.count Rolling.sum Rolling.mean - Rolling.median - Rolling.var - Rolling.std Rolling.min Rolling.max - Rolling.corr - Rolling.cov - Rolling.skew - Rolling.kurt Rolling.apply - Rolling.aggregate - Rolling.quantile - Rolling.sem - -.. _api.functions_window: - -Weighted window functions -------------------------- -.. currentmodule:: pandas.core.window.rolling - -.. autosummary:: - :toctree: api/ - - Window.mean - Window.sum - Window.var - Window.std - -.. _api.functions_expanding: - -Expanding window functions --------------------------- -.. currentmodule:: pandas.core.window.expanding - -.. autosummary:: - :toctree: api/ - - Expanding.count - Expanding.sum - Expanding.mean - Expanding.median - Expanding.var - Expanding.std - Expanding.min - Expanding.max - Expanding.corr - Expanding.cov - Expanding.skew - Expanding.kurt - Expanding.apply - Expanding.aggregate - Expanding.quantile - Expanding.sem - -.. _api.functions_ewm: - -Exponentially-weighted window functions ---------------------------------------- -.. currentmodule:: pandas.core.window.ewm - -.. autosummary:: - :toctree: api/ - - ExponentialMovingWindow.mean - ExponentialMovingWindow.std - ExponentialMovingWindow.var - ExponentialMovingWindow.corr - ExponentialMovingWindow.cov - -.. _api.indexers_window: - -Window indexer --------------- -.. currentmodule:: pandas - -Base class for defining custom window boundaries. - -.. autosummary:: - :toctree: api/ - api.indexers.BaseIndexer - api.indexers.FixedForwardWindowIndexer - api.indexers.VariableOffsetWindowIndexer -{"mode":"full","isActive":false} \ No newline at end of file From 15d6d72222694768c01e4c34f7eb4be1df36cbca Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 21 Jul 2021 14:44:41 -0700 Subject: [PATCH 27/49] add missing dataframe APIs --- docs/cudf/source/api_docs/dataframe.rst | 39 ++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index 07fba0808e4..d2006adc6ca 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -94,6 +94,9 @@ Function application, GroupBy & window .. autosummary:: :toctree: api/ + DataFrame.apply + DataFrame.apply_chunks + DataFrame.apply_rows DataFrame.pipe DataFrame.agg DataFrame.groupby @@ -126,6 +129,7 @@ Computations / descriptive stats DataFrame.prod DataFrame.product DataFrame.quantile + DataFrame.quantiles DataFrame.rank DataFrame.round DataFrame.skew @@ -146,9 +150,12 @@ Reindexing / selection / label manipulation DataFrame.rename DataFrame.reset_index DataFrame.sample + DataFrame.searchsorted DataFrame.set_index + DataFrame.repeat DataFrame.tail DataFrame.take + DataFrame.tile .. _api.dataframe.missing: @@ -161,6 +168,7 @@ Missing data handling DataFrame.fillna DataFrame.isna DataFrame.isnull + DataFrame.nans_to_nulls DataFrame.notna DataFrame.notnull DataFrame.replace @@ -170,7 +178,11 @@ Reshaping, sorting, transposing .. autosummary:: :toctree: api/ + DataFrame.argsort + DataFrame.interleave_columns + DataFrame.partition_by_hash DataFrame.pivot + DataFrame.scatter_by_map DataFrame.sort_values DataFrame.sort_index DataFrame.nlargest @@ -182,7 +194,7 @@ Reshaping, sorting, transposing DataFrame.T DataFrame.transpose -Combining / comparing / joining / merging +Combining / comparing / joining / merging / encoding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ @@ -192,6 +204,23 @@ Combining / comparing / joining / merging DataFrame.join DataFrame.merge DataFrame.update + DataFrame.label_encoding + DataFrame.one_hot_encoding + +Numerical operations +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + DataFrame.acos + DataFrame.asin + DataFrame.atan + DataFrame.cos + DataFrame.exp + DataFrame.log + DataFrame.sin + DataFrame.sqrt + DataFrame.tan Time Series-related ~~~~~~~~~~~~~~~~~~~ @@ -205,13 +234,21 @@ Serialization / IO / conversion .. autosummary:: :toctree: api/ + DataFrame.as_gpu_matrix + DataFrame.as_matrix + DataFrame.from_arrow + DataFrame.from_pandas DataFrame.from_records + DataFrame.hash_columns + DataFrame.to_arrow + DataFrame.to_dlpack DataFrame.to_parquet DataFrame.to_pickle DataFrame.to_csv DataFrame.to_hdf DataFrame.to_dict DataFrame.to_json + DataFrame.to_pandas DataFrame.to_feather DataFrame.to_records DataFrame.to_string From 641864d0931de2a67815015edda3dbff73fa1377 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Wed, 21 Jul 2021 16:00:13 -0700 Subject: [PATCH 28/49] add missing Series APIs --- docs/cudf/source/api_docs/series.rst | 53 +++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 5 deletions(-) diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 663570991ba..e5d8f79f75a 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -1,6 +1,3 @@ -.. meta:: - :my-var: a-for-apple - ====== Series ====== @@ -40,6 +37,10 @@ Conversion Series.copy Series.to_list Series.__array__ + Series.as_index + Series.as_mask + Series.scale + Indexing, iteration ------------------- @@ -63,7 +64,9 @@ Binary operator functions Series.add Series.sub + Series.subtract Series.mul + Series.multiply Series.truediv Series.floordiv Series.mod @@ -89,6 +92,7 @@ Function application, GroupBy & window .. autosummary:: :toctree: api/ + Series.applymap Series.map Series.groupby Series.rolling @@ -104,6 +108,7 @@ Computations / descriptive stats Series.abs Series.all Series.any + Series.ceil Series.clip Series.corr Series.count @@ -114,7 +119,9 @@ Computations / descriptive stats Series.cumsum Series.describe Series.diff + Series.digitize Series.factorize + Series.floor Series.kurt Series.max Series.mean @@ -152,9 +159,13 @@ Reindexing / selection / label manipulation Series.reindex Series.rename Series.reset_index + Series.reverse Series.sample + Series.set_index + Series.set_mask Series.take Series.tail + Series.tile Series.truncate Series.where Series.mask @@ -168,6 +179,7 @@ Missing data handling Series.fillna Series.isna Series.isnull + Series.nans_to_nulls Series.notna Series.notnull Series.replace @@ -178,19 +190,38 @@ Reshaping, sorting :toctree: api/ Series.argsort + Series.interleave_columns Series.sort_values Series.sort_index Series.explode + Series.scatter_by_map Series.searchsorted Series.repeat -Combining / comparing / joining / merging ------------------------------------------ +Combining / comparing / joining / merging / encoding +---------------------------------------------------- .. autosummary:: :toctree: api/ Series.append Series.update + Series.label_encoding + Series.one_hot_encoding + +Numerical operations +~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Series.acos + Series.asin + Series.atan + Series.cos + Series.exp + Series.log + Series.sin + Series.sqrt + Series.tan Time Series-related ------------------- @@ -424,7 +455,19 @@ Serialization / IO / conversion .. autosummary:: :toctree: api/ + Series.to_array + Series.to_arrow + Series.to_dlpack Series.to_frame + Series.to_gpu_array Series.to_hdf Series.to_json + Series.to_pandas Series.to_string + Series.from_arrow + Series.from_categorical + Series.from_masked_array + Series.from_pandas + Series.hash_encode + Series.hash_values + \ No newline at end of file From 590b8ee372af47a27f0b317dcea9cc2cb3f565ad Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 29 Jul 2021 18:24:57 -0700 Subject: [PATCH 29/49] add index template --- .../autosummary/class_without_autosummary.rst | 6 +++++ docs/cudf/source/api_docs/index_objects.rst | 27 ++++++++++--------- docs/cudf/source/conf.py | 18 ++++++++++++- 3 files changed, 37 insertions(+), 14 deletions(-) create mode 100644 docs/cudf/source/_templates/autosummary/class_without_autosummary.rst diff --git a/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst b/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst new file mode 100644 index 00000000000..b57a7ceebb0 --- /dev/null +++ b/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst @@ -0,0 +1,6 @@ +{{ fullname }} +{{ underline }} + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} \ No newline at end of file diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 5bd11a97b62..2c5e4c6555b 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -20,18 +20,20 @@ Properties .. autosummary:: :toctree: api/ - Index.values + Index.empty + Index.gpu_values Index.is_monotonic Index.is_monotonic_increasing Index.is_monotonic_decreasing Index.is_unique - Index.shape Index.name Index.names Index.ndim + Index.nlevels + Index.shape Index.size - Index.empty - Index.memory_usage + Index.values + Modifying and computations ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -69,6 +71,13 @@ Missing values Index.isna Index.notna +Memory usage +~~~~~~~~~~~~ +.. autosummary:: + :toctree: api/ + + Index.memory_usage + Conversion ~~~~~~~~~~ .. autosummary:: @@ -120,21 +129,13 @@ Numeric Index ------------- .. autosummary:: :toctree: api/ + :template: autosummary/class_without_autosummary.rst RangeIndex Int64Index UInt64Index Float64Index -.. We need this autosummary so that the methods are generated. -.. Separate block, since they aren't classes. - -.. autosummary:: - :toctree: api/ - - RangeIndex.start - RangeIndex.stop - RangeIndex.step .. _api.categoricalindex: diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index d2892a492b4..cda4edd9543 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -89,7 +89,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = [] +exclude_patterns = ['venv', "**/includes/**",] # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" @@ -242,8 +242,24 @@ def ignore_internal_references(app, env, node, contnode): node["reftarget"] = "" return contnode +def process_class_docstrings(app, what, name, obj, options, lines): + """ + For those classes for which we use :: + :template: autosummary/class_without_autosummary.rst + the documented attributes/methods have to be listed in the class + docstring. However, if one of those lists is empty, we use 'None', + which then generates warnings in sphinx / ugly html output. + This "autodoc-process-docstring" event connector removes that part + from the processed docstring. + """ + if what == "class": + if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex"}: + + cut_index = lines.index(':Attributes:') + lines[:] = lines[:cut_index] def setup(app): app.add_css_file("params.css") app.connect("doctree-read", resolve_aliases) app.connect("missing-reference", ignore_internal_references) + app.connect("autodoc-process-docstring", process_class_docstrings) \ No newline at end of file From 6e8f6989eda547a215b074f5521bd403233c4bf7 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 29 Jul 2021 21:34:01 -0700 Subject: [PATCH 30/49] misc --- docs/cudf/source/api_docs/dataframe.rst | 2 +- docs/cudf/source/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index d2006adc6ca..416e22a551b 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -195,7 +195,7 @@ Reshaping, sorting, transposing DataFrame.transpose Combining / comparing / joining / merging / encoding -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: api/ diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index cda4edd9543..7aebb6787d1 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -210,7 +210,7 @@ numpydoc_show_inherited_class_members = True numpydoc_class_members_toctree = False -autoclass_content = "init" +autoclass_content = "class" # Replace API shorthands with fullname _reftarget_aliases = { From 122b9818a91cf61b458ab8d2fcdbc0715d00ad7a Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 29 Jul 2021 21:58:31 -0700 Subject: [PATCH 31/49] add docstrings --- python/cudf/cudf/core/index.py | 200 +++++++++++++++++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 691b6ab2e29..54aac63243a 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2007,42 +2007,242 @@ def __init__(self, data=None, dtype=None, copy=False, name=None): class Int8Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + Int8Index is a special case of Index with purely + integer(``int8``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + Int8Index + """ _dtype = np.int8 class Int16Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + Int16Index is a special case of Index with purely + integer(``int16``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + Int16Index + """ _dtype = np.int16 class Int32Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + Int32Index is a special case of Index with purely + integer(``int32``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + Int32Index + """ _dtype = np.int32 class Int64Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + Int64Index is a special case of Index with purely + integer(``int64``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + Int64Index + """ _dtype = np.int64 class UInt8Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + UInt8Index is a special case of Index with purely + integer(``uint64``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + UInt8Index + """ _dtype = np.uint8 class UInt16Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + UInt16Index is a special case of Index with purely + integer(``uint16``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + UInt16Index + """ _dtype = np.uint16 class UInt32Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + UInt32Index is a special case of Index with purely + integer(``uint32``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + UInt32Index + """ _dtype = np.uint32 class UInt64Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + UInt64Index is a special case of Index with purely + integer(``uint64``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + UInt64Index + """ _dtype = np.uint64 class Float32Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + Float32Index is a special case of Index with purely + float(``float32``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + Float32Index + """ _dtype = np.float32 class Float64Index(NumericIndex): + """ + Immutable, ordered and sliceable sequence of labels. + The basic object storing row labels for all cuDF objects. + Float64Index is a special case of Index with purely + float(``float64``) labels. + + Parameters + ---------- + data : array-like (1-dimensional) + dtype : NumPy dtype, + but not used. + copy : bool + Make a copy of input data. + name : object + Name to be stored in the index. + + Returns + ------- + Float64Index + """ _dtype = np.float64 From 5f57b0e6c7ba073b7d0197316fc6120c3c399df7 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 29 Jul 2021 21:58:45 -0700 Subject: [PATCH 32/49] update conf.py --- docs/cudf/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 7aebb6787d1..4f3e300d193 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -253,7 +253,7 @@ def process_class_docstrings(app, what, name, obj, options, lines): from the processed docstring. """ if what == "class": - if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex"}: + if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex", "cudf.IntervalIndex"}: cut_index = lines.index(':Attributes:') lines[:] = lines[:cut_index] From f22166c63609ecfec99a24fb3f93b113022eda10 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 30 Jul 2021 15:54:27 -0700 Subject: [PATCH 33/49] fix Index --- docs/cudf/source/api_docs/index_objects.rst | 4 ++++ docs/cudf/source/conf.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 2c5e4c6555b..3679ac0a665 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -186,6 +186,7 @@ MultiIndex ---------- .. autosummary:: :toctree: api/ + :template: autosummary/class_without_autosummary.rst MultiIndex @@ -198,6 +199,7 @@ MultiIndex constructors MultiIndex.from_tuples MultiIndex.from_product MultiIndex.from_frame + MultiIndex.from_arrow MultiIndex properties ~~~~~~~~~~~~~~~~~~~~~ @@ -231,6 +233,7 @@ DatetimeIndex ------------- .. autosummary:: :toctree: api/ + :template: autosummary/class_without_autosummary.rst DatetimeIndex @@ -267,6 +270,7 @@ TimedeltaIndex -------------- .. autosummary:: :toctree: api/ + :template: autosummary/class_without_autosummary.rst TimedeltaIndex diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 4f3e300d193..de056f48284 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -253,7 +253,7 @@ def process_class_docstrings(app, what, name, obj, options, lines): from the processed docstring. """ if what == "class": - if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex", "cudf.IntervalIndex"}: + if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex", "cudf.IntervalIndex", "cudf.MultiIndex", "cudf.DatetimeIndex", "cudf.TimedeltaIndex", "cudf.TimedeltaIndex"}: cut_index = lines.index(':Attributes:') lines[:] = lines[:cut_index] From 60754b40ad988b6b924635d83758c46c25095e16 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 30 Jul 2021 18:15:10 -0700 Subject: [PATCH 34/49] fix error in docs --- docs/cudf/source/api_docs/dataframe.rst | 1 - docs/cudf/source/api_docs/groupby.rst | 2 - docs/cudf/source/api_docs/index_objects.rst | 1 - docs/cudf/source/api_docs/series.rst | 4 +- docs/cudf/source/user_guide/index.rst | 1 - python/cudf/cudf/core/index.py | 17 ++ python/cudf/cudf/core/multiindex.py | 176 +++++++++++++++++++- python/cudf/cudf/core/series.py | 83 ++++----- 8 files changed, 234 insertions(+), 51 deletions(-) diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index 416e22a551b..f5920c8f11c 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -243,7 +243,6 @@ Serialization / IO / conversion DataFrame.to_arrow DataFrame.to_dlpack DataFrame.to_parquet - DataFrame.to_pickle DataFrame.to_csv DataFrame.to_hdf DataFrame.to_dict diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst index 3dcd7a9f014..27a314fa425 100644 --- a/docs/cudf/source/api_docs/groupby.rst +++ b/docs/cudf/source/api_docs/groupby.rst @@ -79,8 +79,6 @@ application to columns of a specific data type. DataFrameGroupBy.describe DataFrameGroupBy.ffill DataFrameGroupBy.fillna - DataFrameGroupBy.filter - DataFrameGroupBy.hist DataFrameGroupBy.idxmax DataFrameGroupBy.idxmin DataFrameGroupBy.nunique diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 3679ac0a665..75d21021a91 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -59,7 +59,6 @@ Compatibility with MultiIndex :toctree: api/ Index.set_names - Index.droplevel Missing values ~~~~~~~~~~~~~~ diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index e5d8f79f75a..675d73ed9ff 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -166,7 +166,6 @@ Reindexing / selection / label manipulation Series.take Series.tail Series.tile - Series.truncate Series.where Series.mask @@ -393,6 +392,7 @@ strings and apply several methods to it. These can be accessed like correct template (otherwise they would be created in the Series/Index class page) .. + .. currentmodule:: cudf .. autosummary:: :toctree: api/ :template: autosummary/accessor.rst @@ -400,8 +400,6 @@ strings and apply several methods to it. These can be accessed like Series.str Series.cat Series.dt - Series.sparse - DataFrame.sparse Index.str .. _api.series.cat: diff --git a/docs/cudf/source/user_guide/index.rst b/docs/cudf/source/user_guide/index.rst index 20ccaeac50a..1061008eb3c 100644 --- a/docs/cudf/source/user_guide/index.rst +++ b/docs/cudf/source/user_guide/index.rst @@ -10,4 +10,3 @@ User Guide 10min-cudf-cupy.ipynb guide-to-udfs.ipynb Working-with-missing-data.ipynb - groupby.rst diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 54aac63243a..57937f247aa 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2027,6 +2027,7 @@ class Int8Index(NumericIndex): ------- Int8Index """ + _dtype = np.int8 @@ -2051,6 +2052,7 @@ class Int16Index(NumericIndex): ------- Int16Index """ + _dtype = np.int16 @@ -2075,6 +2077,7 @@ class Int32Index(NumericIndex): ------- Int32Index """ + _dtype = np.int32 @@ -2099,6 +2102,7 @@ class Int64Index(NumericIndex): ------- Int64Index """ + _dtype = np.int64 @@ -2123,6 +2127,7 @@ class UInt8Index(NumericIndex): ------- UInt8Index """ + _dtype = np.uint8 @@ -2147,6 +2152,7 @@ class UInt16Index(NumericIndex): ------- UInt16Index """ + _dtype = np.uint16 @@ -2171,6 +2177,7 @@ class UInt32Index(NumericIndex): ------- UInt32Index """ + _dtype = np.uint32 @@ -2195,6 +2202,7 @@ class UInt64Index(NumericIndex): ------- UInt64Index """ + _dtype = np.uint64 @@ -2219,6 +2227,7 @@ class Float32Index(NumericIndex): ------- Float32Index """ + _dtype = np.float32 @@ -2243,6 +2252,7 @@ class Float64Index(NumericIndex): ------- Float64Index """ + _dtype = np.float64 @@ -2693,6 +2703,13 @@ def components(self): @property def inferred_freq(self): + """ + Infers frequency of TimedeltaIndex. + + Notes + ----- + This property is currently not supported. + """ raise NotImplementedError("inferred_freq is not yet supported") diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index 51423d604c2..decef4ab184 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -612,6 +612,30 @@ def to_arrow(self): @property def codes(self): + """ + Returns the codes of the underlying MultiIndex. + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame({'a':[1, 2, 3], 'b':[10, 11, 12]}) + >>> cudf.MultiIndex.from_frame(df) + MultiIndex([(1, 10), + (2, 11), + (3, 12)], + names=['a', 'b']) + >>> midx = cudf.MultiIndex.from_frame(df) + >>> midx + MultiIndex([(1, 10), + (2, 11), + (3, 12)], + names=['a', 'b']) + >>> midx.codes + a b + 0 0 0 + 1 1 1 + 2 2 2 + """ if self._codes is None: self._compute_levels_and_codes() return self._codes @@ -625,6 +649,37 @@ def nlevels(self): @property def levels(self): + """ + Returns list of levels in the MultiIndex + + Returns + ------- + List of Series objects + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame({'a':[1, 2, 3], 'b':[10, 11, 12]}) + >>> cudf.MultiIndex.from_frame(df) + MultiIndex([(1, 10), + (2, 11), + (3, 12)], + names=['a', 'b']) + >>> midx = cudf.MultiIndex.from_frame(df) + >>> midx + MultiIndex([(1, 10), + (2, 11), + (3, 12)], + names=['a', 'b']) + >>> midx.levels + [0 1 + 1 2 + 2 3 + dtype: int64, 0 10 + 1 11 + 2 12 + dtype: int64] + """ if self._levels is None: self._compute_levels_and_codes() return self._levels @@ -1126,6 +1181,37 @@ def _concat(cls, objs): @classmethod def from_tuples(cls, tuples, names=None): + """ + Convert list of tuples to MultiIndex. + + Parameters + ---------- + tuples : list / sequence of tuple-likes + Each tuple is the index of one row/column. + names : list / sequence of str, optional + Names for the levels in the index. + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> tuples = [(1, 'red'), (1, 'blue'), + ... (2, 'red'), (2, 'blue')] + >>> cudf.MultiIndex.from_tuples(tuples, names=('number', 'color')) + MultiIndex([(1, 'red'), + (1, 'blue'), + (2, 'red'), + (2, 'blue')], + names=['number', 'color']) + """ # Use Pandas for handling Python host objects pdi = pd.MultiIndex.from_tuples(tuples, names=names) result = cls.from_pandas(pdi) @@ -1190,11 +1276,97 @@ def values(self): return self._source_data.values @classmethod - def from_frame(cls, dataframe, names=None): - return cls(source_data=dataframe, names=names) + def from_frame(cls, df, names=None): + """ + Make a MultiIndex from a DataFrame. + + Parameters + ---------- + df : DataFrame + DataFrame to be converted to MultiIndex. + names : list-like, optional + If no names are provided, use the column names, or tuple of column + names if the columns is a MultiIndex. If a sequence, overwrite + names with the given sequence. + + Returns + ------- + MultiIndex + The MultiIndex representation of the given DataFrame. + + See Also + -------- + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_product : Make a MultiIndex from cartesian product + of iterables. + + Examples + -------- + >>> import cudf + >>> df = cudf.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], + ... ['NJ', 'Temp'], ['NJ', 'Precip']], + ... columns=['a', 'b']) + >>> df + a b + 0 HI Temp + 1 HI Precip + 2 NJ Temp + 3 NJ Precip + >>> cudf.MultiIndex.from_frame(df) + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], + names=['a', 'b']) + + Using explicit names, instead of the column names + + >>> cudf.MultiIndex.from_frame(df, names=['state', 'observation']) + MultiIndex([('HI', 'Temp'), + ('HI', 'Precip'), + ('NJ', 'Temp'), + ('NJ', 'Precip')], + names=['state', 'observation']) + """ + return cls(source_data=df, names=names) @classmethod def from_product(cls, arrays, names=None): + """ + Make a MultiIndex from the cartesian product of multiple iterables. + + Parameters + ---------- + iterables : list / sequence of iterables + Each iterable has unique labels for each level of the index. + names : list / sequence of str, optional + Names for the levels in the index. + If not explicitly provided, names will be inferred from the + elements of iterables if an element has a name attribute + + Returns + ------- + MultiIndex + + See Also + -------- + MultiIndex.from_tuples : Convert list of tuples to MultiIndex. + MultiIndex.from_frame : Make a MultiIndex from a DataFrame. + + Examples + -------- + >>> numbers = [0, 1, 2] + >>> colors = ['green', 'purple'] + >>> cudf.MultiIndex.from_product([numbers, colors], + ... names=['number', 'color']) + MultiIndex([(0, 'green'), + (0, 'purple'), + (1, 'green'), + (1, 'purple'), + (2, 'green'), + (2, 'purple')], + names=['number', 'color']) + """ # Use Pandas for handling Python host objects pdi = pd.MultiIndex.from_product(arrays, names=names) result = cls.from_pandas(pdi) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index fb197fbc90d..15d6d827ba0 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -64,6 +64,48 @@ class Series(SingleColumnFrame, Serializable): + """ + One-dimensional GPU array (including time series). + + Labels need not be unique but must be a hashable type. The object + supports both integer- and label-based indexing and provides a + host of methods for performing operations involving the index. + Statistical methods from ndarray have been overridden to + automatically exclude missing data (currently represented + as null/NaN). + + Operations between Series (`+`, `-`, `/`, `*`, `**`) align + values based on their associated index values-– they need + not be the same length. The result index will be the + sorted union of the two indexes. + + ``Series`` objects are used as columns of ``DataFrame``. + + Parameters + ---------- + data : array-like, Iterable, dict, or scalar value + Contains data stored in Series. + + index : array-like or Index (1d) + Values must be hashable and have the same length + as data. Non-unique index values are allowed. Will + default to RangeIndex (0, 1, 2, …, n) if not provided. + If both a dict and index sequence are used, the index will + override the keys found in the dict. + + dtype : str, numpy.dtype, or ExtensionDtype, optional + Data type for the output Series. If not specified, + this will be inferred from data. + + name : str, optional + The name to give to the Series. + + nan_as_null : bool, Default True + If ``None``/``True``, converts ``np.nan`` values to + ``null`` values. + If ``False``, leaves ``np.nan`` values as is. + """ + # The `constructor*` properties are used by `dask` (and `dask_cudf`) @property def _constructor(self): @@ -171,47 +213,6 @@ def from_masked_array(cls, data, mask, null_count=None): def __init__( self, data=None, index=None, dtype=None, name=None, nan_as_null=True, ): - """ - One-dimensional GPU array (including time series). - - Labels need not be unique but must be a hashable type. The object - supports both integer- and label-based indexing and provides a - host of methods for performing operations involving the index. - Statistical methods from ndarray have been overridden to - automatically exclude missing data (currently represented - as null/NaN). - - Operations between Series (`+`, `-`, `/`, `*`, `**`) align - values based on their associated index values-– they need - not be the same length. The result index will be the - sorted union of the two indexes. - - ``Series`` objects are used as columns of ``DataFrame``. - - Parameters - ---------- - data : array-like, Iterable, dict, or scalar value - Contains data stored in Series. - - index : array-like or Index (1d) - Values must be hashable and have the same length - as data. Non-unique index values are allowed. Will - default to RangeIndex (0, 1, 2, …, n) if not provided. - If both a dict and index sequence are used, the index will - override the keys found in the dict. - - dtype : str, numpy.dtype, or ExtensionDtype, optional - Data type for the output Series. If not specified, - this will be inferred from data. - - name : str, optional - The name to give to the Series. - - nan_as_null : bool, Default True - If ``None``/``True``, converts ``np.nan`` values to - ``null`` values. - If ``False``, leaves ``np.nan`` values as is. - """ if isinstance(data, pd.Series): if name is None: name = data.name From 1386ff2ed9b02cc8a8133ed0c8274c32ae45acf9 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 2 Aug 2021 18:24:58 -0700 Subject: [PATCH 35/49] fix autosummary with workarounds --- .../autosummary/class_with_autosummary.rst | 33 +++++++++++++++++++ docs/cudf/source/api_docs/series.rst | 7 ++++ docs/cudf/source/conf.py | 10 ++++-- 3 files changed, 47 insertions(+), 3 deletions(-) create mode 100644 docs/cudf/source/_templates/autosummary/class_with_autosummary.rst diff --git a/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst b/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst new file mode 100644 index 00000000000..f86822bc567 --- /dev/null +++ b/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst @@ -0,0 +1,33 @@ +{% extends "!autosummary/class.rst" %} + +{% block methods %} +{% if methods %} + +.. + HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. + .. autosummary:: + :toctree: + {% for item in all_methods %} + {%- if not item.startswith('_') or item in ['__call__'] %} + {{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + +{% endif %} +{% endblock %} + +{% block attributes %} +{% if attributes %} + +.. + HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages. + .. autosummary:: + :toctree: + {% for item in all_attributes %} + {%- if not item.startswith('_') %} + {{ name }}.{{ item }} + {%- endif -%} + {%- endfor %} + +{% endif %} +{% endblock %} \ No newline at end of file diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst index 675d73ed9ff..ffa809268f3 100644 --- a/docs/cudf/source/api_docs/series.rst +++ b/docs/cudf/source/api_docs/series.rst @@ -7,6 +7,7 @@ Constructor ----------- .. autosummary:: :toctree: api/ + :template: autosummary/class_with_autosummary.rst Series @@ -19,14 +20,20 @@ Attributes Series.index Series.values + Series.data Series.dtype Series.shape Series.ndim + Series.nullable + Series.nullmask + Series.null_count Series.size Series.memory_usage Series.has_nulls Series.empty Series.name + Series.valid_count + Series.values_host Conversion ---------- diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index de056f48284..0f85fd09399 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -209,6 +209,7 @@ # Config numpydoc numpydoc_show_inherited_class_members = True numpydoc_class_members_toctree = False +numpydoc_attributes_as_param_list = False autoclass_content = "class" @@ -255,11 +256,14 @@ def process_class_docstrings(app, what, name, obj, options, lines): if what == "class": if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex", "cudf.IntervalIndex", "cudf.MultiIndex", "cudf.DatetimeIndex", "cudf.TimedeltaIndex", "cudf.TimedeltaIndex"}: - cut_index = lines.index(':Attributes:') - lines[:] = lines[:cut_index] + # cut_index = lines.index(':Attributes:') + # lines[:] = lines[:cut_index] + pass + + def setup(app): app.add_css_file("params.css") app.connect("doctree-read", resolve_aliases) app.connect("missing-reference", ignore_internal_references) - app.connect("autodoc-process-docstring", process_class_docstrings) \ No newline at end of file + app.connect("autodoc-process-docstring", process_class_docstrings) From 0a5ed1632af8a1e53af7370238aed7f89e056609 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 2 Aug 2021 19:14:18 -0700 Subject: [PATCH 36/49] refactor init docs --- python/cudf/cudf/core/column/categorical.py | 134 +++++----- python/cudf/cudf/core/column/datetime.py | 24 +- python/cudf/cudf/core/column/numerical.py | 19 +- python/cudf/cudf/core/column/string.py | 41 +-- python/cudf/cudf/core/column/timedelta.py | 35 +-- python/cudf/cudf/core/frame.py | 6 + python/cudf/cudf/core/groupby/groupby.py | 282 ++++++++++---------- python/cudf/cudf/core/index.py | 2 +- python/cudf/cudf/core/series.py | 6 +- python/cudf/cudf/core/tools/datetimes.py | 120 ++++----- python/cudf/cudf/core/window/rolling.py | 15 +- 11 files changed, 351 insertions(+), 333 deletions(-) diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 48398e03b2d..f435e0fa88c 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -49,62 +49,63 @@ class CategoricalAccessor(ColumnMethods): + """ + Accessor object for categorical properties of the Series values. + Be aware that assigning to `categories` is a inplace operation, + while all methods return new categorical data per default. + + Parameters + ---------- + column : Column + parent : Series or CategoricalIndex + + Examples + -------- + >>> s = cudf.Series([1,2,3], dtype='category') + >>> s + >>> s + 0 1 + 1 2 + 2 3 + dtype: category + Categories (3, int64): [1, 2, 3] + >>> s.cat.categories + Int64Index([1, 2, 3], dtype='int64') + >>> s.cat.reorder_categories([3,2,1]) + 0 1 + 1 2 + 2 3 + dtype: category + Categories (3, int64): [3, 2, 1] + >>> s.cat.remove_categories([1]) + 0 + 1 2 + 2 3 + dtype: category + Categories (2, int64): [2, 3] + >>> s.cat.set_categories(list('abcde')) + 0 + 1 + 2 + dtype: category + Categories (5, object): ['a', 'b', 'c', 'd', 'e'] + >>> s.cat.as_ordered() + 0 1 + 1 2 + 2 3 + dtype: category + Categories (3, int64): [1 < 2 < 3] + >>> s.cat.as_unordered() + 0 1 + 1 2 + 2 3 + dtype: category + Categories (3, int64): [1, 2, 3] + """ + _column: CategoricalColumn def __init__(self, parent: SeriesOrIndex): - """ - Accessor object for categorical properties of the Series values. - Be aware that assigning to `categories` is a inplace operation, - while all methods return new categorical data per default. - - Parameters - ---------- - column : Column - parent : Series or CategoricalIndex - - Examples - -------- - >>> s = cudf.Series([1,2,3], dtype='category') - >>> s - >>> s - 0 1 - 1 2 - 2 3 - dtype: category - Categories (3, int64): [1, 2, 3] - >>> s.cat.categories - Int64Index([1, 2, 3], dtype='int64') - >>> s.cat.reorder_categories([3,2,1]) - 0 1 - 1 2 - 2 3 - dtype: category - Categories (3, int64): [3, 2, 1] - >>> s.cat.remove_categories([1]) - 0 - 1 2 - 2 3 - dtype: category - Categories (2, int64): [2, 3] - >>> s.cat.set_categories(list('abcde')) - 0 - 1 - 2 - dtype: category - Categories (5, object): ['a', 'b', 'c', 'd', 'e'] - >>> s.cat.as_ordered() - 0 1 - 1 2 - 2 3 - dtype: category - Categories (3, int64): [1 < 2 < 3] - >>> s.cat.as_unordered() - 0 1 - 1 2 - 2 3 - dtype: category - Categories (3, int64): [1, 2, 3] - """ if not is_categorical_dtype(parent.dtype): raise AttributeError( "Can only use .cat accessor with a 'category' dtype" @@ -648,7 +649,19 @@ def reorder_categories( class CategoricalColumn(column.ColumnBase): - """Implements operations for Columns of Categorical type + """ + Implements operations for Columns of Categorical type + + Parameters + ---------- + dtype : CategoricalDtype + mask : Buffer + The validity mask + offset : int + Data offset + children : Tuple[ColumnBase] + Two non-null columns containing the categories and codes + respectively """ dtype: cudf.core.dtypes.CategoricalDtype @@ -664,18 +677,7 @@ def __init__( null_count: int = None, children: Tuple["column.ColumnBase", ...] = (), ): - """ - Parameters - ---------- - dtype : CategoricalDtype - mask : Buffer - The validity mask - offset : int - Data offset - children : Tuple[ColumnBase] - Two non-null columns containing the categories and codes - respectively - """ + if size is None: for child in children: assert child.offset == 0 diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index f3d1880b290..623d0e43f5d 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -52,6 +52,19 @@ class DatetimeColumn(column.ColumnBase): + """ + A Column implementation for Date-time types. + + Parameters + ---------- + data : Buffer + The datetime values + dtype : np.dtype + The data type + mask : Buffer; optional + The validity mask + """ + def __init__( self, data: Buffer, @@ -61,16 +74,7 @@ def __init__( offset: int = 0, null_count: int = None, ): - """ - Parameters - ---------- - data : Buffer - The datetime values - dtype : np.dtype - The data type - mask : Buffer; optional - The validity mask - """ + dtype = np.dtype(dtype) if data.size % dtype.itemsize: raise ValueError("Buffer size must be divisible by element size") diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index a3f4a82a7dc..29211b0f855 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -36,6 +36,17 @@ class NumericalColumn(NumericalBaseColumn): + """ + A Column object for Numeric types. + + Parameters + ---------- + data : Buffer + dtype : np.dtype + The dtype associated with the data Buffer + mask : Buffer, optional + """ + def __init__( self, data: Buffer, @@ -45,14 +56,6 @@ def __init__( offset: int = 0, null_count: int = None, ): - """ - Parameters - ---------- - data : Buffer - dtype : np.dtype - The dtype associated with the data Buffer - mask : Buffer, optional - """ dtype = np.dtype(dtype) if data.size % dtype.itemsize: raise ValueError("Buffer size must be divisible by element size") diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 92c57477465..d90afdb7717 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -98,17 +98,18 @@ def str_to_boolean(column: StringColumn): class StringMethods(ColumnMethods): + """ + Vectorized string functions for Series and Index. + + This mimics pandas ``df.str`` interface. nulls stay null + unless handled otherwise by a particular method. + Patterned after Python’s string methods, with some + inspiration from R’s stringr package. + """ + _column: StringColumn def __init__(self, parent): - """ - Vectorized string functions for Series and Index. - - This mimics pandas ``df.str`` interface. nulls stay null - unless handled otherwise by a particular method. - Patterned after Python’s string methods, with some - inspiration from R’s stringr package. - """ value_type = ( parent.dtype.leaf_type if is_list_dtype(parent.dtype) @@ -4861,7 +4862,18 @@ def _expected_types_format(types): class StringColumn(column.ColumnBase): - """Implements operations for Columns of String type + """ + Implements operations for Columns of String type + + Parameters + ---------- + mask : Buffer + The validity mask + offset : int + Data offset + children : Tuple[Column] + Two non-null columns containing the string data and offsets + respectively """ _start_offset: Optional[int] @@ -4876,17 +4888,6 @@ def __init__( null_count: int = None, children: Tuple["column.ColumnBase", ...] = (), ): - """ - Parameters - ---------- - mask : Buffer - The validity mask - offset : int - Data offset - children : Tuple[Column] - Two non-null columns containing the string data and offsets - respectively - """ dtype = np.dtype("object") if size is None: diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py index a27c20cc50c..b73353dd720 100644 --- a/python/cudf/cudf/core/column/timedelta.py +++ b/python/cudf/cudf/core/column/timedelta.py @@ -34,6 +34,24 @@ class TimeDeltaColumn(column.ColumnBase): + """ + Parameters + ---------- + data : Buffer + The Timedelta values + dtype : np.dtype + The data type + size : int + Size of memory allocation. + mask : Buffer; optional + The validity mask + offset : int + Data offset + null_count : int, optional + The number of null values. + If None, it is calculated automatically. + """ + def __init__( self, data: Buffer, @@ -43,23 +61,6 @@ def __init__( offset: int = 0, null_count: int = None, ): - """ - Parameters - ---------- - data : Buffer - The Timedelta values - dtype : np.dtype - The data type - size : int - Size of memory allocation. - mask : Buffer; optional - The validity mask - offset : int - Data offset - null_count : int, optional - The number of null values. - If None, it is calculated automatically. - """ dtype = np.dtype(dtype) if data.size % dtype.itemsize: raise ValueError("Buffer size must be divisible by element size") diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 6ecb0bcc139..9759d3e9328 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -3642,6 +3642,12 @@ def shape(self): return (len(self),) def __iter__(self): + """ + Iterating over a GPU object is not effecient and hence not supported. + + Consider using ``.to_arrow()``, ``.to_pandas()`` or ``.values_host`` + if you wish to iterate over the values. + """ cudf.utils.utils.raise_iteration_error(obj=self) def __len__(self): diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 29c29691389..3c3743bd646 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -1018,93 +1018,93 @@ def _mimic_pandas_order( class DataFrameGroupBy(GroupBy, GetAttrGetItemMixin): + """ + Group DataFrame using a mapper or by a Series of columns. + + A groupby operation involves some combination of splitting the object, + applying a function, and combining the results. This can be used to + group large amounts of data and compute operations on these groups. + + Parameters + ---------- + by : mapping, function, label, or list of labels + Used to determine the groups for the groupby. If by is a + function, it’s called on each value of the object’s index. + If a dict or Series is passed, the Series or dict VALUES will + be used to determine the groups (the Series’ values are first + aligned; see .align() method). If a cupy array is passed, the + values are used as-is determine the groups. A label or list + of labels may be passed to group by the columns in self. + Notice that a tuple is interpreted as a (single) key. + level : int, level name, or sequence of such, default None + If the axis is a MultiIndex (hierarchical), group by a particular + level or levels. + as_index : bool, default True + For aggregated output, return object with group labels as + the index. Only relevant for DataFrame input. + as_index=False is effectively “SQL-style” grouped output. + sort : bool, default False + Sort result by group key. Differ from Pandas, cudf defaults to + ``False`` for better performance. Note this does not influence + the order of observations within each group. Groupby preserves + the order of rows within each group. + dropna : bool, optional + If True (default), do not include the "null" group. + + Returns + ------- + DataFrameGroupBy + Returns a groupby object that contains information + about the groups. + + Examples + -------- + >>> import cudf + >>> import pandas as pd + >>> df = cudf.DataFrame({'Animal': ['Falcon', 'Falcon', + ... 'Parrot', 'Parrot'], + ... 'Max Speed': [380., 370., 24., 26.]}) + >>> df + Animal Max Speed + 0 Falcon 380.0 + 1 Falcon 370.0 + 2 Parrot 24.0 + 3 Parrot 26.0 + >>> df.groupby(['Animal']).mean() + Max Speed + Animal + Falcon 375.0 + Parrot 25.0 + + >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], + ... ['Captive', 'Wild', 'Captive', 'Wild']] + >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) + >>> df = cudf.DataFrame({'Max Speed': [390., 350., 30., 20.]}, + index=index) + >>> df + Max Speed + Animal Type + Falcon Captive 390.0 + Wild 350.0 + Parrot Captive 30.0 + Wild 20.0 + >>> df.groupby(level=0).mean() + Max Speed + Animal + Falcon 370.0 + Parrot 25.0 + >>> df.groupby(level="Type").mean() + Max Speed + Type + Wild 185.0 + Captive 210.0 + """ + _PROTECTED_KEYS = frozenset(("obj",)) def __init__( self, obj, by=None, level=None, sort=False, as_index=True, dropna=True ): - """ - Group DataFrame using a mapper or by a Series of columns. - - A groupby operation involves some combination of splitting the object, - applying a function, and combining the results. This can be used to - group large amounts of data and compute operations on these groups. - - Parameters - ---------- - by : mapping, function, label, or list of labels - Used to determine the groups for the groupby. If by is a - function, it’s called on each value of the object’s index. - If a dict or Series is passed, the Series or dict VALUES will - be used to determine the groups (the Series’ values are first - aligned; see .align() method). If a cupy array is passed, the - values are used as-is determine the groups. A label or list - of labels may be passed to group by the columns in self. - Notice that a tuple is interpreted as a (single) key. - level : int, level name, or sequence of such, default None - If the axis is a MultiIndex (hierarchical), group by a particular - level or levels. - as_index : bool, default True - For aggregated output, return object with group labels as - the index. Only relevant for DataFrame input. - as_index=False is effectively “SQL-style” grouped output. - sort : bool, default False - Sort result by group key. Differ from Pandas, cudf defaults to - ``False`` for better performance. Note this does not influence - the order of observations within each group. Groupby preserves - the order of rows within each group. - dropna : bool, optional - If True (default), do not include the "null" group. - - Returns - ------- - DataFrameGroupBy - Returns a groupby object that contains information - about the groups. - - Examples - -------- - >>> import cudf - >>> import pandas as pd - >>> df = cudf.DataFrame({'Animal': ['Falcon', 'Falcon', - ... 'Parrot', 'Parrot'], - ... 'Max Speed': [380., 370., 24., 26.]}) - >>> df - Animal Max Speed - 0 Falcon 380.0 - 1 Falcon 370.0 - 2 Parrot 24.0 - 3 Parrot 26.0 - >>> df.groupby(['Animal']).mean() - Max Speed - Animal - Falcon 375.0 - Parrot 25.0 - - >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'], - ... ['Captive', 'Wild', 'Captive', 'Wild']] - >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type')) - >>> df = cudf.DataFrame({'Max Speed': [390., 350., 30., 20.]}, - index=index) - >>> df - Max Speed - Animal Type - Falcon Captive 390.0 - Wild 350.0 - Parrot Captive 30.0 - Wild 20.0 - >>> df.groupby(level=0).mean() - Max Speed - Animal - Falcon 370.0 - Parrot 25.0 - >>> df.groupby(level="Type").mean() - Max Speed - Type - Wild 185.0 - Captive 210.0 - - """ super().__init__( obj=obj, by=by, @@ -1127,68 +1127,68 @@ def nunique(self): class SeriesGroupBy(GroupBy): + """ + Group Series using a mapper or by a Series of columns. + + A groupby operation involves some combination of splitting the object, + applying a function, and combining the results. This can be used to + group large amounts of data and compute operations on these groups. + + Parameters + ---------- + by : mapping, function, label, or list of labels + Used to determine the groups for the groupby. If by is a + function, it’s called on each value of the object’s index. + If a dict or Series is passed, the Series or dict VALUES will + be used to determine the groups (the Series’ values are first + aligned; see .align() method). If an cupy array is passed, the + values are used as-is determine the groups. A label or list + of labels may be passed to group by the columns in self. + Notice that a tuple is interpreted as a (single) key. + level : int, level name, or sequence of such, default None + If the axis is a MultiIndex (hierarchical), group by a particular + level or levels. + as_index : bool, default True + For aggregated output, return object with group labels as + the index. Only relevant for DataFrame input. + as_index=False is effectively “SQL-style” grouped output. + sort : bool, default False + Sort result by group key. Differ from Pandas, cudf defaults to + ``False`` for better performance. Note this does not influence + the order of observations within each group. Groupby preserves + the order of rows within each group. + + Returns + ------- + SeriesGroupBy + Returns a groupby object that contains information + about the groups. + + Examples + -------- + >>> ser = cudf.Series([390., 350., 30., 20.], + ... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], + ... name="Max Speed") + >>> ser + Falcon 390.0 + Falcon 350.0 + Parrot 30.0 + Parrot 20.0 + Name: Max Speed, dtype: float64 + >>> ser.groupby(level=0).mean() + Falcon 370.0 + Parrot 25.0 + Name: Max Speed, dtype: float64 + >>> ser.groupby(ser > 100).mean() + Max Speed + False 25.0 + True 370.0 + Name: Max Speed, dtype: float64 + """ + def __init__( self, obj, by=None, level=None, sort=False, as_index=True, dropna=True ): - """ - Group Series using a mapper or by a Series of columns. - - A groupby operation involves some combination of splitting the object, - applying a function, and combining the results. This can be used to - group large amounts of data and compute operations on these groups. - - Parameters - ---------- - by : mapping, function, label, or list of labels - Used to determine the groups for the groupby. If by is a - function, it’s called on each value of the object’s index. - If a dict or Series is passed, the Series or dict VALUES will - be used to determine the groups (the Series’ values are first - aligned; see .align() method). If an cupy array is passed, the - values are used as-is determine the groups. A label or list - of labels may be passed to group by the columns in self. - Notice that a tuple is interpreted as a (single) key. - level : int, level name, or sequence of such, default None - If the axis is a MultiIndex (hierarchical), group by a particular - level or levels. - as_index : bool, default True - For aggregated output, return object with group labels as - the index. Only relevant for DataFrame input. - as_index=False is effectively “SQL-style” grouped output. - sort : bool, default False - Sort result by group key. Differ from Pandas, cudf defaults to - ``False`` for better performance. Note this does not influence - the order of observations within each group. Groupby preserves - the order of rows within each group. - - Returns - ------- - SeriesGroupBy - Returns a groupby object that contains information - about the groups. - - Examples - -------- - >>> ser = cudf.Series([390., 350., 30., 20.], - ... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], - ... name="Max Speed") - >>> ser - Falcon 390.0 - Falcon 350.0 - Parrot 30.0 - Parrot 20.0 - Name: Max Speed, dtype: float64 - >>> ser.groupby(level=0).mean() - Falcon 370.0 - Parrot 25.0 - Name: Max Speed, dtype: float64 - >>> ser.groupby(ser > 100).mean() - Max Speed - False 25.0 - True 370.0 - Name: Max Speed, dtype: float64 - - """ super().__init__( obj=obj, by=by, diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 57937f247aa..97d764cdf60 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -3095,7 +3095,7 @@ def __repr__(self): + ")" ) - @copy_docstring(StringMethods.__init__) # type: ignore + @copy_docstring(StringMethods) # type: ignore @property def str(self): return StringMethods(parent=self) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 15d6d827ba0..d295cf0c691 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2355,17 +2355,17 @@ def __invert__(self): def cat(self): return CategoricalAccessor(parent=self) - @copy_docstring(StringMethods.__init__) # type: ignore + @copy_docstring(StringMethods) # type: ignore @property def str(self): return StringMethods(parent=self) - @copy_docstring(ListMethods.__init__) # type: ignore + @copy_docstring(ListMethods) # type: ignore @property def list(self): return ListMethods(parent=self) - @copy_docstring(StructMethods.__init__) # type: ignore + @copy_docstring(StructMethods) # type: ignore @property def struct(self): return StructMethods(parent=self) diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py index 00f60cfc8b5..181fa64240e 100644 --- a/python/cudf/cudf/core/tools/datetimes.py +++ b/python/cudf/cudf/core/tools/datetimes.py @@ -345,6 +345,66 @@ def get_units(value): class DateOffset: + """ + An object used for binary ops where calendrical arithmetic + is desired rather than absolute time arithmetic. Used to + add or subtract a whole number of periods, such as several + months or years, to a series or index of datetime dtype. + Works similarly to pd.DateOffset, but stores the offset + on the device (GPU). + + Parameters + ---------- + n : int, default 1 + The number of time periods the offset represents. + **kwds + Temporal parameter that add to or replace the offset value. + Parameters that **add** to the offset (like Timedelta): + - months + + See Also + -------- + pandas.DateOffset : The equivalent Pandas object that this + object replicates + + Examples + -------- + >>> from cudf import DateOffset + >>> ts = cudf.Series([ + "2000-01-01 00:00:00.012345678", + "2000-01-31 00:00:00.012345678", + "2000-02-29 00:00:00.012345678", + ], dtype='datetime64[ns]) + >>> ts + DateOffset(months=3) + 0 2000-04-01 00:00:00.012345678 + 1 2000-04-30 00:00:00.012345678 + 2 2000-05-29 00:00:00.012345678 + dtype: datetime64[ns] + >>> ts - DateOffset(months=12) + 0 1999-01-01 00:00:00.012345678 + 1 1999-01-31 00:00:00.012345678 + 2 1999-02-28 00:00:00.012345678 + dtype: datetime64[ns] + + Notes + ----- + Note that cuDF does not yet support DateOffset arguments + that 'replace' units in the datetime data being operated on + such as + - year + - month + - week + - day + - hour + - minute + - second + - microsecond + - millisecond + - nanosecond + + cuDF does not yet support rounding via a `normalize` + keyword argument. + """ _UNITS_TO_CODES = { "nanoseconds": "ns", @@ -362,66 +422,6 @@ class DateOffset: _CODES_TO_UNITS = {v: k for k, v in _UNITS_TO_CODES.items()} def __init__(self, n=1, normalize=False, **kwds): - """ - An object used for binary ops where calendrical arithmetic - is desired rather than absolute time arithmetic. Used to - add or subtract a whole number of periods, such as several - months or years, to a series or index of datetime dtype. - Works similarly to pd.DateOffset, but stores the offset - on the device (GPU). - - Parameters - ---------- - n : int, default 1 - The number of time periods the offset represents. - **kwds - Temporal parameter that add to or replace the offset value. - Parameters that **add** to the offset (like Timedelta): - - months - - See Also - -------- - pandas.DateOffset : The equivalent Pandas object that this - object replicates - - Examples - -------- - >>> from cudf import DateOffset - >>> ts = cudf.Series([ - "2000-01-01 00:00:00.012345678", - "2000-01-31 00:00:00.012345678", - "2000-02-29 00:00:00.012345678", - ], dtype='datetime64[ns]) - >>> ts + DateOffset(months=3) - 0 2000-04-01 00:00:00.012345678 - 1 2000-04-30 00:00:00.012345678 - 2 2000-05-29 00:00:00.012345678 - dtype: datetime64[ns] - >>> ts - DateOffset(months=12) - 0 1999-01-01 00:00:00.012345678 - 1 1999-01-31 00:00:00.012345678 - 2 1999-02-28 00:00:00.012345678 - dtype: datetime64[ns] - - Notes - ----- - Note that cuDF does not yet support DateOffset arguments - that 'replace' units in the datetime data being operated on - such as - - year - - month - - week - - day - - hour - - minute - - second - - microsecond - - millisecond - - nanosecond - - cuDF does not yet support rounding via a `normalize` - keyword argument. - """ if normalize: raise NotImplementedError( "normalize not yet supported for DateOffset" diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py index d9a2fd89165..733f65a3b5d 100644 --- a/python/cudf/cudf/core/window/rolling.py +++ b/python/cudf/cudf/core/window/rolling.py @@ -353,14 +353,15 @@ def __repr__(self): class RollingGroupby(Rolling): - def __init__(self, groupby, window, min_periods=None, center=False): - """ - Grouped rolling window calculation. + """ + Grouped rolling window calculation. - See also - -------- - cudf.core.window.Rolling - """ + See also + -------- + cudf.core.window.Rolling + """ + + def __init__(self, groupby, window, min_periods=None, center=False): sort_order = groupby.grouping.keys.argsort() # TODO: there may be overlap between the columns From 58604a11a251186d61f2222375f4ad90cbdc8c9a Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 2 Aug 2021 19:26:43 -0700 Subject: [PATCH 37/49] refactor init docs --- python/cudf/cudf/core/buffer.py | 34 ++--- python/cudf/cudf/core/column_accessor.py | 26 ++-- python/cudf/cudf/core/dataframe.py | 167 ++++++++++++----------- python/cudf/cudf/core/dtypes.py | 119 ++++++++-------- python/cudf/cudf/core/index.py | 22 +-- python/cudf/cudf/core/scalar.py | 77 +++++------ 6 files changed, 225 insertions(+), 220 deletions(-) diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py index c6875052685..8d80e488e2e 100644 --- a/python/cudf/cudf/core/buffer.py +++ b/python/cudf/cudf/core/buffer.py @@ -15,6 +15,23 @@ class Buffer(Serializable): + """ + A Buffer represents a device memory allocation. + + Parameters + ---------- + data : Buffer, array_like, int + An array-like object or integer representing a + device or host pointer to pre-allocated memory. + size : int, optional + Size of memory allocation. Required if a pointer + is passed for `data`. + owner : object, optional + Python object to which the lifetime of the memory + allocation is tied. If provided, a reference to this + object is kept in this Buffer. + """ + ptr: int size: int _owner: Any @@ -22,22 +39,7 @@ class Buffer(Serializable): def __init__( self, data: Any = None, size: Optional[int] = None, owner: Any = None ): - """ - A Buffer represents a device memory allocation. - - Parameters - ---------- - data : Buffer, array_like, int - An array-like object or integer representing a - device or host pointer to pre-allocated memory. - size : int, optional - Size of memory allocation. Required if a pointer - is passed for `data`. - owner : object, optional - Python object to which the lifetime of the memory - allocation is tied. If provided, a reference to this - object is kept in this Buffer. - """ + if isinstance(data, Buffer): self.ptr = data.ptr self.size = data.size diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py index 607b8ac307b..56882f89af8 100644 --- a/python/cudf/cudf/core/column_accessor.py +++ b/python/cudf/cudf/core/column_accessor.py @@ -80,6 +80,19 @@ def _to_flat_dict(d): class ColumnAccessor(MutableMapping): + """ + Parameters + ---------- + data : mapping + Mapping of keys to column values. + multiindex : bool, optional + Whether tuple keys represent a hierarchical + index with multiple "levels" (default=False). + level_names : tuple, optional + Tuple containing names for each of the levels. + For a non-hierarchical index, a tuple of size 1 + may be passe. + """ _data: "Dict[Any, ColumnBase]" multiindex: bool @@ -91,19 +104,6 @@ def __init__( multiindex: bool = False, level_names=None, ): - """ - Parameters - ---------- - data : mapping - Mapping of keys to column values. - multiindex : bool, optional - Whether tuple keys represent a hierarchical - index with multiple "levels" (default=False). - level_names : tuple, optional - Tuple containing names for each of the levels. - For a non-hierarchical index, a tuple of size 1 - may be passe. - """ if data is None: data = {} # TODO: we should validate the keys of `data` diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 0355b677337..f6778e1d176 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -69,100 +69,101 @@ class DataFrame(Frame, Serializable, GetAttrGetItemMixin): + """ + A GPU Dataframe object. - _PROTECTED_KEYS = frozenset(("_data", "_index")) - - @annotate("DATAFRAME_INIT", color="blue", domain="cudf_python") - def __init__(self, data=None, index=None, columns=None, dtype=None): - """ - A GPU Dataframe object. + Parameters + ---------- + data : array-like, Iterable, dict, or DataFrame. + Dict can contain Series, arrays, constants, or list-like objects. - Parameters - ---------- - data : array-like, Iterable, dict, or DataFrame. - Dict can contain Series, arrays, constants, or list-like objects. + index : Index or array-like + Index to use for resulting frame. Will default to + RangeIndex if no indexing information part of input data and + no index provided. - index : Index or array-like - Index to use for resulting frame. Will default to - RangeIndex if no indexing information part of input data and - no index provided. + columns : Index or array-like + Column labels to use for resulting frame. + Will default to RangeIndex (0, 1, 2, …, n) if no column + labels are provided. - columns : Index or array-like - Column labels to use for resulting frame. - Will default to RangeIndex (0, 1, 2, …, n) if no column - labels are provided. + dtype : dtype, default None + Data type to force. Only a single dtype is allowed. + If None, infer. - dtype : dtype, default None - Data type to force. Only a single dtype is allowed. - If None, infer. - - Examples - -------- + Examples + -------- - Build dataframe with ``__setitem__``: + Build dataframe with ``__setitem__``: - >>> import cudf - >>> df = cudf.DataFrame() - >>> df['key'] = [0, 1, 2, 3, 4] - >>> df['val'] = [float(i + 10) for i in range(5)] # insert column - >>> df - key val - 0 0 10.0 - 1 1 11.0 - 2 2 12.0 - 3 3 13.0 - 4 4 14.0 + >>> import cudf + >>> df = cudf.DataFrame() + >>> df['key'] = [0, 1, 2, 3, 4] + >>> df['val'] = [float(i + 10) for i in range(5)] # insert column + >>> df + key val + 0 0 10.0 + 1 1 11.0 + 2 2 12.0 + 3 3 13.0 + 4 4 14.0 + + Build DataFrame via dict of columns: + + >>> import numpy as np + >>> from datetime import datetime, timedelta + >>> t0 = datetime.strptime('2018-10-07 12:00:00', '%Y-%m-%d %H:%M:%S') + >>> n = 5 + >>> df = cudf.DataFrame({ + ... 'id': np.arange(n), + ... 'datetimes': np.array( + ... [(t0+ timedelta(seconds=x)) for x in range(n)]) + ... }) + >>> df + id datetimes + 0 0 2018-10-07T12:00:00.000 + 1 1 2018-10-07T12:00:01.000 + 2 2 2018-10-07T12:00:02.000 + 3 3 2018-10-07T12:00:03.000 + 4 4 2018-10-07T12:00:04.000 + + Build DataFrame via list of rows as tuples: + + >>> df = cudf.DataFrame([ + ... (5, "cats", "jump", np.nan), + ... (2, "dogs", "dig", 7.5), + ... (3, "cows", "moo", -2.1, "occasionally"), + ... ]) + >>> df + 0 1 2 3 4 + 0 5 cats jump + 1 2 dogs dig 7.5 + 2 3 cows moo -2.1 occasionally + + Convert from a Pandas DataFrame: - Build DataFrame via dict of columns: + >>> import pandas as pd + >>> pdf = pd.DataFrame({'a': [0, 1, 2, 3],'b': [0.1, 0.2, None, 0.3]}) + >>> pdf + a b + 0 0 0.1 + 1 1 0.2 + 2 2 NaN + 3 3 0.3 + >>> df = cudf.from_pandas(pdf) + >>> df + a b + 0 0 0.1 + 1 1 0.2 + 2 2 + 3 3 0.3 + """ - >>> import numpy as np - >>> from datetime import datetime, timedelta - >>> t0 = datetime.strptime('2018-10-07 12:00:00', '%Y-%m-%d %H:%M:%S') - >>> n = 5 - >>> df = cudf.DataFrame({ - ... 'id': np.arange(n), - ... 'datetimes': np.array( - ... [(t0+ timedelta(seconds=x)) for x in range(n)]) - ... }) - >>> df - id datetimes - 0 0 2018-10-07T12:00:00.000 - 1 1 2018-10-07T12:00:01.000 - 2 2 2018-10-07T12:00:02.000 - 3 3 2018-10-07T12:00:03.000 - 4 4 2018-10-07T12:00:04.000 - - Build DataFrame via list of rows as tuples: - - >>> df = cudf.DataFrame([ - ... (5, "cats", "jump", np.nan), - ... (2, "dogs", "dig", 7.5), - ... (3, "cows", "moo", -2.1, "occasionally"), - ... ]) - >>> df - 0 1 2 3 4 - 0 5 cats jump - 1 2 dogs dig 7.5 - 2 3 cows moo -2.1 occasionally + _PROTECTED_KEYS = frozenset(("_data", "_index")) - Convert from a Pandas DataFrame: + @annotate("DATAFRAME_INIT", color="blue", domain="cudf_python") + def __init__(self, data=None, index=None, columns=None, dtype=None): - >>> import pandas as pd - >>> pdf = pd.DataFrame({'a': [0, 1, 2, 3],'b': [0.1, 0.2, None, 0.3]}) - >>> pdf - a b - 0 0 0.1 - 1 1 0.2 - 2 2 NaN - 3 3 0.3 - >>> df = cudf.from_pandas(pdf) - >>> df - a b - 0 0 0.1 - 1 1 0.2 - 2 2 - 3 3 0.3 - """ super().__init__() if isinstance(columns, (Series, cudf.BaseIndex)): diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 6dbe55d0bb8..2d8bf9e5a2c 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -27,14 +27,14 @@ class _BaseDtype(ExtensionDtype, Serializable): class CategoricalDtype(_BaseDtype): + """ + dtype similar to pd.CategoricalDtype with the categories + stored on the GPU. + """ ordered: Optional[bool] def __init__(self, categories=None, ordered: bool = None) -> None: - """ - dtype similar to pd.CategoricalDtype with the categories - stored on the GPU. - """ self._categories = self._init_categories(categories) self.ordered = ordered @@ -223,14 +223,14 @@ def deserialize(cls, header: dict, frames: list): class StructDtype(_BaseDtype): + """ + fields : dict + A mapping of field names to dtypes + """ name = "struct" def __init__(self, fields): - """ - fields : dict - A mapping of field names to dtypes - """ pa_fields = { k: cudf.utils.dtypes.cudf_dtype_to_pa_type(v) for k, v in fields.items() @@ -309,34 +309,34 @@ def deserialize(cls, header: dict, frames: list): class Decimal32Dtype(_BaseDtype): + """ + Parameters + ---------- + precision : int + The total number of digits in each value of this dtype + scale : int, optional + The scale of the Decimal32Dtype. See Notes below. + + Notes + ----- + When the scale is positive: + - numbers with fractional parts (e.g., 0.0042) can be represented + - the scale is the total number of digits to the right of the + decimal point + When the scale is negative: + - only multiples of powers of 10 (including 10**0) can be + represented (e.g., 1729, 4200, 1000000) + - the scale represents the number of trailing zeros in the value. + For example, 42 is representable with precision=2 and scale=0. + 13.0051 is representable with precision=6 and scale=4, + and *not* representable with precision<6 or scale<4. + """ name = "decimal32" _metadata = ("precision", "scale") MAX_PRECISION = np.floor(np.log10(np.iinfo("int32").max)) def __init__(self, precision, scale=0): - """ - Parameters - ---------- - precision : int - The total number of digits in each value of this dtype - scale : int, optional - The scale of the Decimal32Dtype. See Notes below. - - Notes - ----- - When the scale is positive: - - numbers with fractional parts (e.g., 0.0042) can be represented - - the scale is the total number of digits to the right of the - decimal point - When the scale is negative: - - only multiples of powers of 10 (including 10**0) can be - represented (e.g., 1729, 4200, 1000000) - - the scale represents the number of trailing zeros in the value. - For example, 42 is representable with precision=2 and scale=0. - 13.0051 is representable with precision=6 and scale=4, - and *not* representable with precision<6 or scale<4. - """ self._validate(precision, scale) self._typ = pa.decimal128(precision, scale) @@ -417,34 +417,34 @@ def deserialize(cls, header: dict, frames: list): class Decimal64Dtype(_BaseDtype): + """ + Parameters + ---------- + precision : int + The total number of digits in each value of this dtype + scale : int, optional + The scale of the Decimal64Dtype. See Notes below. + + Notes + ----- + When the scale is positive: + - numbers with fractional parts (e.g., 0.0042) can be represented + - the scale is the total number of digits to the right of the + decimal point + When the scale is negative: + - only multiples of powers of 10 (including 10**0) can be + represented (e.g., 1729, 4200, 1000000) + - the scale represents the number of trailing zeros in the value. + For example, 42 is representable with precision=2 and scale=0. + 13.0051 is representable with precision=6 and scale=4, + and *not* representable with precision<6 or scale<4. + """ name = "decimal64" _metadata = ("precision", "scale") MAX_PRECISION = np.floor(np.log10(np.iinfo("int64").max)) def __init__(self, precision, scale=0): - """ - Parameters - ---------- - precision : int - The total number of digits in each value of this dtype - scale : int, optional - The scale of the Decimal64Dtype. See Notes below. - - Notes - ----- - When the scale is positive: - - numbers with fractional parts (e.g., 0.0042) can be represented - - the scale is the total number of digits to the right of the - decimal point - When the scale is negative: - - only multiples of powers of 10 (including 10**0) can be - represented (e.g., 1729, 4200, 1000000) - - the scale represents the number of trailing zeros in the value. - For example, 42 is representable with precision=2 and scale=0. - 13.0051 is representable with precision=6 and scale=4, - and *not* representable with precision<6 or scale<4. - """ self._validate(precision, scale) self._typ = pa.decimal128(precision, scale) @@ -525,16 +525,17 @@ def deserialize(cls, header: dict, frames: list): class IntervalDtype(StructDtype): + """ + subtype: str, np.dtype + The dtype of the Interval bounds. + closed: {‘right’, ‘left’, ‘both’, ‘neither’}, default ‘right’ + Whether the interval is closed on the left-side, right-side, + both or neither. See the Notes for more detailed explanation. + """ + name = "interval" def __init__(self, subtype, closed="right"): - """ - subtype: str, np.dtype - The dtype of the Interval bounds. - closed: {‘right’, ‘left’, ‘both’, ‘neither’}, default ‘right’ - Whether the interval is closed on the left-side, right-side, - both or neither. See the Notes for more detailed explanation. - """ super().__init__(fields={"left": subtype, "right": subtype}) if closed in ["left", "right", "neither", "both"]: diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 97d764cdf60..32c1642fbdc 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1783,25 +1783,25 @@ def __mul__(self, other): class GenericIndex(BaseIndex): - """An array of orderable values that represent the indices of another Column + """ + An array of orderable values that represent the indices of another Column Attributes ---------- _values: A Column object name: A string + + Parameters + ---------- + data : Column + The Column of data for this index + name : str optional + The name of the Index. If not provided, the Index adopts the value + Column's name. Otherwise if this name is different from the value + Column's, the data Column will be cloned to adopt this name. """ def __init__(self, data, **kwargs): - """ - Parameters - ---------- - data : Column - The Column of data for this index - name : str optional - The name of the Index. If not provided, the Index adopts the value - Column's name. Otherwise if this name is different from the value - Column's, the data Column will be cloned to adopt this name. - """ kwargs = _setdefault_name(data, **kwargs) # normalize the input diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index c6663a25684..4403a58dd30 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -17,45 +17,46 @@ class Scalar(object): + """ + A GPU-backed scalar object with NumPy scalar like properties + May be used in binary operations against other scalars, cuDF + Series, DataFrame, and Index objects. + + Examples + -------- + >>> import cudf + >>> cudf.Scalar(42, dtype='int64') + Scalar(42, dtype=int64) + >>> cudf.Scalar(42, dtype='int32') + cudf.Scalar(42, dtype='float64') + Scalar(84.0, dtype=float64) + >>> cudf.Scalar(42, dtype='int64') + np.int8(21) + Scalar(63, dtype=int64) + >>> x = cudf.Scalar(42, dtype='datetime64[s]') + >>> y = cudf.Scalar(21, dtype='timedelta64[ns]) + >>> x - y + Scalar(1970-01-01T00:00:41.999999979, dtype=datetime64[ns]) + >>> cudf.Series([1,2,3]) + cudf.Scalar(1) + 0 2 + 1 3 + 2 4 + dtype: int64 + >>> df = cudf.DataFrame({'a':[1,2,3], 'b':[4.5, 5.5, 6.5]}) + >>> slr = cudf.Scalar(10, dtype='uint8') + >>> df - slr + a b + 0 -9 -5.5 + 1 -8 -4.5 + 2 -7 -3.5 + + Parameters + ---------- + value : Python Scalar, NumPy Scalar, or cuDF Scalar + The scalar value to be converted to a GPU backed scalar object + dtype : np.dtype or string specifier + The data type + """ + def __init__(self, value, dtype=None): - """ - A GPU-backed scalar object with NumPy scalar like properties - May be used in binary operations against other scalars, cuDF - Series, DataFrame, and Index objects. - - Examples - -------- - >>> import cudf - >>> cudf.Scalar(42, dtype='int64') - Scalar(42, dtype=int64) - >>> cudf.Scalar(42, dtype='int32') + cudf.Scalar(42, dtype='float64') - Scalar(84.0, dtype=float64) - >>> cudf.Scalar(42, dtype='int64') + np.int8(21) - Scalar(63, dtype=int64) - >>> x = cudf.Scalar(42, dtype='datetime64[s]') - >>> y = cudf.Scalar(21, dtype='timedelta64[ns]) - >>> x - y - Scalar(1970-01-01T00:00:41.999999979, dtype=datetime64[ns]) - >>> cudf.Series([1,2,3]) + cudf.Scalar(1) - 0 2 - 1 3 - 2 4 - dtype: int64 - >>> df = cudf.DataFrame({'a':[1,2,3], 'b':[4.5, 5.5, 6.5]}) - >>> slr = cudf.Scalar(10, dtype='uint8') - >>> df - slr - a b - 0 -9 -5.5 - 1 -8 -4.5 - 2 -7 -3.5 - - Parameters - ---------- - value : Python Scalar, NumPy Scalar, or cuDF Scalar - The scalar value to be converted to a GPU backed scalar object - dtype : np.dtype or string specifier - The data type - """ self._host_value = None self._host_dtype = None From abf11c7027f869e7a04746dc8776d2dcb194d15d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 2 Aug 2021 19:36:23 -0700 Subject: [PATCH 38/49] Fix copy_docstring calls --- python/cudf/cudf/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index d295cf0c691..dad525b5b41 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2350,7 +2350,7 @@ def __invert__(self): f"Operation `~` not supported on {self.dtype.type.__name__}" ) - @copy_docstring(CategoricalAccessor.__init__) # type: ignore + @copy_docstring(CategoricalAccessor) # type: ignore @property def cat(self): return CategoricalAccessor(parent=self) @@ -5756,7 +5756,7 @@ def diff(self, periods=1): return Series(output_col, name=self.name, index=self.index) - @copy_docstring(SeriesGroupBy.__init__) + @copy_docstring(SeriesGroupBy) def groupby( self, by=None, From ed7da825889ca73552df76240b144e36ad151ee6 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 2 Aug 2021 20:53:14 -0700 Subject: [PATCH 39/49] fix docs --- docs/cudf/source/api_docs/dataframe.rst | 1 + docs/cudf/source/api_docs/index_objects.rst | 1 + docs/cudf/source/conf.py | 6 +++--- python/cudf/cudf/core/dataframe.py | 2 ++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst index f5920c8f11c..12ff1f13bc4 100644 --- a/docs/cudf/source/api_docs/dataframe.rst +++ b/docs/cudf/source/api_docs/dataframe.rst @@ -7,6 +7,7 @@ Constructor ~~~~~~~~~~~ .. autosummary:: :toctree: api/ + :template: autosummary/class_with_autosummary.rst DataFrame diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst index 75d21021a91..c23c9a3f6c1 100644 --- a/docs/cudf/source/api_docs/index_objects.rst +++ b/docs/cudf/source/api_docs/index_objects.rst @@ -12,6 +12,7 @@ used before calling these methods directly.** .. autosummary:: :toctree: api/ + :template: autosummary/class_with_autosummary.rst Index diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 0f85fd09399..20b0eb9f736 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -256,9 +256,9 @@ def process_class_docstrings(app, what, name, obj, options, lines): if what == "class": if name in {"cudf.RangeIndex", "cudf.Int64Index", "cudf.UInt64Index", "cudf.Float64Index", "cudf.CategoricalIndex", "cudf.IntervalIndex", "cudf.MultiIndex", "cudf.DatetimeIndex", "cudf.TimedeltaIndex", "cudf.TimedeltaIndex"}: - # cut_index = lines.index(':Attributes:') - # lines[:] = lines[:cut_index] - pass + cut_index = lines.index('.. rubric:: Attributes') + lines[:] = lines[:cut_index] + diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f6778e1d176..c3e3b7e4da7 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -4468,6 +4468,7 @@ def join( ) return df + @copy_docstring(DataFrameGroupBy) def groupby( self, by=None, @@ -4512,6 +4513,7 @@ def groupby( sort=sort, ) + @copy_docstring(Rolling) def rolling( self, window, min_periods=None, center=False, axis=0, win_type=None ): From d4ce421fb43c5c2c9824b868eccd897d1f450372 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 3 Aug 2021 12:18:52 -0700 Subject: [PATCH 40/49] misc fixes --- python/cudf/cudf/__init__.py | 1 + python/cudf/cudf/_lib/csv.pyx | 4 ++-- python/cudf/cudf/_lib/orc.pyx | 4 ++-- python/cudf/cudf/core/__init__.py | 1 + python/cudf/cudf/core/algorithms.py | 2 +- python/cudf/cudf/core/column/string.py | 6 +++--- python/cudf/cudf/core/dataframe.py | 6 +++--- python/cudf/cudf/core/frame.py | 10 +++++----- python/cudf/cudf/core/groupby/groupby.py | 4 ++-- python/cudf/cudf/core/index.py | 22 +++++++++++----------- python/cudf/cudf/core/reshape.py | 4 ++-- python/cudf/cudf/core/series.py | 14 +++++++------- python/cudf/cudf/core/window/rolling.py | 4 ++-- python/cudf/cudf/tests/test_dataframe.py | 4 +--- python/cudf/cudf/tests/test_groupby.py | 2 +- python/cudf/cudf/tests/test_index.py | 8 ++++---- python/cudf/cudf/tests/test_text.py | 4 ++-- python/cudf/cudf/utils/ioutils.py | 20 ++++++++++---------- python/dask_cudf/dask_cudf/backends.py | 4 ++-- 19 files changed, 62 insertions(+), 62 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 2d52b517242..be78b9cf825 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -31,6 +31,7 @@ IntervalIndex, MultiIndex, RangeIndex, + StringIndex, Scalar, Series, TimedeltaIndex, diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx index 2dfa61ee900..d1d5ef38dcd 100644 --- a/python/cudf/cudf/_lib/csv.pyx +++ b/python/cudf/cudf/_lib/csv.pyx @@ -359,7 +359,7 @@ def read_csv( See Also -------- - cudf.io.csv.read_csv + cudf.read_csv """ if not isinstance(datasource, (BytesIO, StringIO, bytes, @@ -429,7 +429,7 @@ cpdef write_csv( See Also -------- - cudf.io.csv.to_csv + cudf.to_csv """ cdef table_view input_table_view = \ table.view() if index is True else table.data_view() diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx index e15b569ed85..b888f213921 100644 --- a/python/cudf/cudf/_lib/orc.pyx +++ b/python/cudf/cudf/_lib/orc.pyx @@ -84,7 +84,7 @@ cpdef read_orc(object filepaths_or_buffers, See Also -------- - cudf.io.orc.read_orc + cudf.read_orc """ cdef orc_reader_options c_orc_reader_options = make_orc_reader_options( filepaths_or_buffers, @@ -142,7 +142,7 @@ cpdef write_orc(Table table, See Also -------- - cudf.io.orc.read_orc + cudf.read_orc """ cdef compression_type compression_ = _get_comp_type(compression) cdef table_metadata metadata_ = table_metadata() diff --git a/python/cudf/cudf/core/__init__.py b/python/cudf/cudf/core/__init__.py index 5eaa5b52fd4..016aba2edb3 100644 --- a/python/cudf/cudf/core/__init__.py +++ b/python/cudf/cudf/core/__init__.py @@ -17,6 +17,7 @@ Int64Index, IntervalIndex, RangeIndex, + StringIndex, TimedeltaIndex, UInt8Index, UInt16Index, diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py index 9f26ac8ee78..38b6f8789bb 100644 --- a/python/cudf/cudf/core/algorithms.py +++ b/python/cudf/cudf/core/algorithms.py @@ -35,7 +35,7 @@ def factorize(values, sort=False, na_sentinel=-1, size_hint=None): See Also -------- - cudf.core.series.Series.factorize : Encode the input values of Series. + cudf.Series.factorize : Encode the input values of Series. """ if sort: diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 19eb3906044..4008d2f06ab 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -2503,7 +2503,7 @@ def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex: Also available on indices: - >>> idx = cudf.core.index.StringIndex(['X 123', 'Y 999']) + >>> idx = cudf.Index(['X 123', 'Y 999']) >>> idx StringIndex(['X 123' 'Y 999'], dtype='object') @@ -2570,7 +2570,7 @@ def rpartition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex: Also available on indices: - >>> idx = cudf.core.index.StringIndex(['X 123', 'Y 999']) + >>> idx = cudf.Index(['X 123', 'Y 999']) >>> idx StringIndex(['X 123' 'Y 999'], dtype='object') @@ -3242,7 +3242,7 @@ def count(self, pat: str, flags: int = 0) -> SeriesOrIndex: This is also available on Index. - >>> index = cudf.core.index.StringIndex(['A', 'A', 'Aaba', 'cat']) + >>> index = cudf.Index(['A', 'A', 'Aaba', 'cat']) >>> index.str.count('a') Int64Index([0, 0, 2, 1], dtype='int64') """ # noqa W605 diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f413c372b16..32824525fa0 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -3463,7 +3463,7 @@ def rename( if index: if ( any(type(item) == str for item in index.values()) - and type(self.index) != cudf.core.index.StringIndex + and type(self.index) != cudf.StringIndex ): raise NotImplementedError( "Implicit conversion of index to " @@ -6759,9 +6759,9 @@ def mode(self, axis=0, numeric_only=False, dropna=True): See Also -------- - cudf.core.series.Series.mode : Return the highest frequency value + cudf.Series.mode : Return the highest frequency value in a Series. - cudf.core.series.Series.value_counts : Return the counts of values + cudf.Series.value_counts : Return the counts of values in a Series. Notes diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py index 97a67f2dc5c..ce588b8686d 100644 --- a/python/cudf/cudf/core/frame.py +++ b/python/cudf/cudf/core/frame.py @@ -1116,19 +1116,19 @@ def dropna( See also -------- - cudf.core.dataframe.DataFrame.isna + cudf.DataFrame.isna Indicate null values. - cudf.core.dataframe.DataFrame.notna + cudf.DataFrame.notna Indicate non-null values. - cudf.core.dataframe.DataFrame.fillna + cudf.DataFrame.fillna Replace null values. - cudf.core.series.Series.dropna + cudf.Series.dropna Drop null values. - cudf.core.index.Index.dropna + cudf.Index.dropna Drop null indices. Examples diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index bd5e5b21270..46bc527f2c8 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -348,10 +348,10 @@ def pipe(self, func, *args, **kwargs): See also -------- - cudf.core.series.Series.pipe + cudf.Series.pipe Apply a function with arguments to a series. - cudf.core.dataframe.DataFrame.pipe + cudf.DataFrame.pipe Apply a function with arguments to a dataframe. apply diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 28dba98656d..49f0c4fd62e 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -326,7 +326,7 @@ def set_names(self, names, level=None, inplace=False): See Also -------- - cudf.core.index.Index.rename : Able to set new names without level. + cudf.Index.rename : Able to set new names without level. Examples -------- @@ -529,9 +529,9 @@ def min(self): See Also -------- - cudf.core.index.Index.max : Return the maximum value in an Index. - cudf.core.series.Series.min : Return the minimum value in a Series. - cudf.core.dataframe.DataFrame.min : Return the minimum values in + cudf.Index.max : Return the maximum value in an Index. + cudf.Series.min : Return the minimum value in a Series. + cudf.DataFrame.min : Return the minimum values in a DataFrame. Examples @@ -554,9 +554,9 @@ def max(self): See Also -------- - cudf.core.index.Index.min : Return the minimum value in an Index. - cudf.core.series.Series.max : Return the maximum value in a Series. - cudf.core.dataframe.DataFrame.max : Return the maximum values in + cudf.Index.min : Return the minimum value in an Index. + cudf.Series.max : Return the maximum value in a Series. + cudf.DataFrame.max : Return the maximum values in a DataFrame. Examples @@ -780,8 +780,8 @@ def sort_values(self, return_indexer=False, ascending=True, key=None): See Also -------- - cudf.core.series.Series.min : Sort values of a Series. - cudf.core.dataframe.DataFrame.sort_values : Sort values in a DataFrame. + cudf.Series.min : Sort values of a Series. + cudf.DataFrame.sort_values : Sort values in a DataFrame. Examples -------- @@ -1350,9 +1350,9 @@ def from_pandas(cls, index, nan_as_null=None): >>> import numpy as np >>> data = [10, 20, 30, np.nan] >>> pdi = pd.Index(data) - >>> cudf.core.index.Index.from_pandas(pdi) + >>> cudf.Index.from_pandas(pdi) Float64Index([10.0, 20.0, 30.0, ], dtype='float64') - >>> cudf.core.index.Index.from_pandas(pdi, nan_as_null=False) + >>> cudf.Index.from_pandas(pdi, nan_as_null=False) Float64Index([10.0, 20.0, 30.0, nan], dtype='float64') """ if not isinstance(index, pd.Index): diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 54571ebb31d..1b8405af1a4 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -803,9 +803,9 @@ def _pivot(df, index, columns): Parameters ---------- df : DataFrame - index : cudf.core.index.Index + index : cudf.Index Index labels of the result - columns : cudf.core.index.Index + columns : cudf.Index Column labels of the result """ columns_labels, columns_idx = columns._encode() diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 54d3cbd0b18..06fb2a142d8 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -458,7 +458,7 @@ def drop( Return series without null values Series.drop_duplicates Return series with duplicate values removed - cudf.core.dataframe.DataFrame.drop + cudf.DataFrame.drop Drop specified labels from rows or columns in dataframe Examples @@ -875,7 +875,7 @@ def memory_usage(self, index=True, deep=False): See Also -------- - cudf.core.dataframe.DataFrame.memory_usage : Bytes consumed by + cudf.DataFrame.memory_usage : Bytes consumed by a DataFrame. Examples @@ -2499,10 +2499,10 @@ def dropna(self, axis=0, inplace=False, how=None): Series.fillna : Replace null values. - cudf.core.dataframe.DataFrame.dropna : Drop rows or columns which + cudf.DataFrame.dropna : Drop rows or columns which contain null values. - cudf.core.index.Index.dropna : Drop null indices. + cudf.Index.dropna : Drop null indices. Examples -------- @@ -2932,7 +2932,7 @@ def loc(self): See also -------- - cudf.core.dataframe.DataFrame.loc + cudf.DataFrame.loc Examples -------- @@ -2955,7 +2955,7 @@ def iloc(self): See also -------- - cudf.core.dataframe.DataFrame.iloc + cudf.DataFrame.iloc Examples -------- @@ -5140,7 +5140,7 @@ def value_counts( Series.count Number of non-NA elements in a Series. - cudf.core.dataframe.DataFrame.count + cudf.DataFrame.count Number of non-NA elements in a DataFrame. Examples diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py index 733f65a3b5d..d2f120a7bb9 100644 --- a/python/cudf/cudf/core/window/rolling.py +++ b/python/cudf/cudf/core/window/rolling.py @@ -258,12 +258,12 @@ def apply(self, func, *args, **kwargs): See also -------- - cudf.core.series.Series.applymap : Apply an elementwise function to + cudf.Series.applymap : Apply an elementwise function to transform the values in the Column. Notes ----- - See notes of the :meth:`cudf.core.series.Series.applymap` + See notes of the :meth:`cudf.Series.applymap` """ has_nulls = False diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 21683d4bdd0..c0050d244a3 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3616,9 +3616,7 @@ def test_as_column_types(): assert_eq(pds, gds) pds = pd.Series(pd.Index(["1", "18", "9"]), dtype="int") - gds = cudf.Series( - cudf.core.index.StringIndex(["1", "18", "9"]), dtype="int" - ) + gds = cudf.Series(cudf.StringIndex(["1", "18", "9"]), dtype="int") assert_eq(pds, gds) diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py index e423a64fe4d..cd46569be81 100644 --- a/python/cudf/cudf/tests/test_groupby.py +++ b/python/cudf/cudf/tests/test_groupby.py @@ -985,7 +985,7 @@ def test_groupby_index_type(): df["string_col"] = ["a", "b", "c"] df["counts"] = [1, 2, 3] res = df.groupby(by="string_col").counts.sum() - assert isinstance(res.index, cudf.core.index.StringIndex) + assert isinstance(res.index, cudf.StringIndex) @pytest.mark.parametrize( diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index f03454c479a..a2842d5a908 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -323,7 +323,7 @@ def test_index_copy_datetime(name, dtype, deep=True): @pytest.mark.parametrize("name", ["x"]) @pytest.mark.parametrize("dtype", ["category", "object"]) def test_index_copy_string(name, dtype, deep=True): - cidx = cudf.core.index.StringIndex(["a", "b", "c"]) + cidx = cudf.StringIndex(["a", "b", "c"]) pidx = cidx.to_pandas() pidx_copy = pidx.copy(name=name, deep=deep, dtype=dtype) @@ -380,7 +380,7 @@ def test_index_copy_category(name, dtype, deep=True): "idx", [ cudf.DatetimeIndex(["2001", "2002", "2003"]), - cudf.core.index.StringIndex(["a", "b", "c"]), + cudf.StringIndex(["a", "b", "c"]), cudf.Int64Index([1, 2, 3]), cudf.Float64Index([1.0, 2.0, 3.0]), cudf.CategoricalIndex([1, 2, 3]), @@ -425,7 +425,7 @@ def test_index_copy_deep(idx, deep): idx._values.categories.base_data.ptr == idx_copy._values.categories.base_data.ptr ) == same_ref - elif isinstance(idx, cudf.core.index.StringIndex): + elif isinstance(idx, cudf.StringIndex): children = idx._values._base_children copy_children = idx_copy._values._base_children assert all( @@ -470,7 +470,7 @@ def test_rangeindex_slice_attr_name(): def test_from_pandas_str(): idx = ["a", "b", "c"] pidx = pd.Index(idx, name="idx") - gidx_1 = cudf.core.index.StringIndex(idx, name="idx") + gidx_1 = cudf.StringIndex(idx, name="idx") gidx_2 = cudf.from_pandas(pidx) assert_eq(gidx_1, gidx_2) diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py index 6c3fdd4640a..d0b1ba0758e 100644 --- a/python/cudf/cudf/tests/test_text.py +++ b/python/cudf/cudf/tests/test_text.py @@ -507,8 +507,8 @@ def test_character_tokenize_index(): actual = sr.str.character_tokenize() assert_eq(expected, actual) - sr = cudf.core.index.as_index([""]) - expected = cudf.core.index.StringIndex([], dtype="object") + sr = cudf.Index([""]) + expected = cudf.StringIndex([], dtype="object") actual = sr.str.character_tokenize() assert_eq(expected, actual) diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py index 2aaea8435e0..626db3ee646 100644 --- a/python/cudf/cudf/utils/ioutils.py +++ b/python/cudf/cudf/utils/ioutils.py @@ -67,8 +67,8 @@ See Also -------- -cudf.io.csv.read_csv -cudf.io.json.read_json +cudf.read_csv +cudf.read_json """.format( remote_data_sources=_docstring_remote_sources ) @@ -175,7 +175,7 @@ -------- cudf.io.parquet.read_parquet_metadata cudf.io.parquet.to_parquet -cudf.io.orc.read_orc +cudf.read_orc """.format( remote_data_sources=_docstring_remote_sources ) @@ -217,7 +217,7 @@ See Also -------- cudf.io.parquet.read_parquet -cudf.io.orc.read_orc +cudf.read_orc """ doc_to_parquet = docfmt_partial(docstring=_docstring_to_parquet) @@ -270,7 +270,7 @@ See Also -------- -cudf.io.orc.read_orc +cudf.read_orc """ doc_read_orc_metadata = docfmt_partial(docstring=_docstring_read_orc_metadata) @@ -296,7 +296,7 @@ See Also -------- -cudf.io.orc.read_orc +cudf.read_orc """ doc_read_orc_statistics = docfmt_partial( docstring=_docstring_read_orc_statistics @@ -385,7 +385,7 @@ See Also -------- -cudf.io.orc.read_orc +cudf.read_orc """ doc_to_orc = docfmt_partial(docstring=_docstring_to_orc) @@ -687,7 +687,7 @@ See Also -------- -cudf.io.hdf.read_hdf : Read from HDF file. +cudf.read_hdf : Read from HDF file. cudf.io.parquet.to_parquet : Write a DataFrame to the binary parquet format. cudf.io.feather.to_feather : Write out feather-format for DataFrames. """ @@ -898,7 +898,7 @@ See Also -------- -cudf.io.csv.to_csv +cudf.to_csv """.format( remote_data_sources=_docstring_remote_sources ) @@ -963,7 +963,7 @@ See Also -------- -cudf.io.csv.read_csv +cudf.read_csv """ doc_to_csv = docfmt_partial( docstring=_docstring_to_csv.format( diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py index 53543b9e886..6fb5efbdf0f 100644 --- a/python/dask_cudf/dask_cudf/backends.py +++ b/python/dask_cudf/dask_cudf/backends.py @@ -51,8 +51,8 @@ def _nonempty_index(idx): data = np.array([start, "1970-01-02"], dtype=idx.dtype) values = cudf.core.column.as_column(data) return cudf.core.index.DatetimeIndex(values, name=idx.name) - elif isinstance(idx, cudf.core.index.StringIndex): - return cudf.core.index.StringIndex(["cat", "dog"], name=idx.name) + elif isinstance(idx, cudf.StringIndex): + return cudf.StringIndex(["cat", "dog"], name=idx.name) elif isinstance(idx, cudf.core.index.CategoricalIndex): key = tuple(idx._data.keys()) assert len(key) == 1 From 10bbfccc99d3999f82121cdc431830d4ab8a8f34 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 3 Aug 2021 13:31:04 -0700 Subject: [PATCH 41/49] fix general functions --- .../source/api_docs/general_functions.rst | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/docs/cudf/source/api_docs/general_functions.rst b/docs/cudf/source/api_docs/general_functions.rst index 2cbc1e93df5..226ae8acd32 100644 --- a/docs/cudf/source/api_docs/general_functions.rst +++ b/docs/cudf/source/api_docs/general_functions.rst @@ -1,10 +1,32 @@ ================= General Functions ================= +.. currentmodule:: cudf -.. automodule:: cudf.core.reshape - :members: +Data manipulations +------------------ -.. autofunction:: cudf.to_datetime +.. autosummary:: + :toctree: api/ -.. autofunction:: cudf.to_numeric + cudf.concat + cudf.melt + cudf.get_dummies + cudf.merge_sorted + cudf.pivot + cudf.unstack + +Top-level conversions +--------------------- +.. autosummary:: + :toctree: api/ + + cudf.to_numeric + +Top-level dealing with datetimelike +----------------------------------- + +.. autosummary:: + :toctree: api/ + + cudf.to_datetime From 87c64ec0c9a8d6fd641777f439d505b6432f37cb Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 3 Aug 2021 15:54:31 -0700 Subject: [PATCH 42/49] adds docs --- docs/cudf/source/api_docs/general_utilities.rst | 11 ++++++++--- docs/cudf/source/index.rst | 8 +++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/docs/cudf/source/api_docs/general_utilities.rst b/docs/cudf/source/api_docs/general_utilities.rst index 6f50c70498b..d9c53c3fbbd 100644 --- a/docs/cudf/source/api_docs/general_utilities.rst +++ b/docs/cudf/source/api_docs/general_utilities.rst @@ -2,7 +2,12 @@ General Utilities ================= -.. currentmodule:: cudf.testing +Testing functions +----------------- +.. autosummary:: + :toctree: api/ -.. automodule:: cudf.testing.testing - :members: + cudf.testing.testing.assert_column_equal + cudf.testing.testing.assert_frame_equal + cudf.testing.testing.assert_index_equal + cudf.testing.testing.assert_series_equal diff --git a/docs/cudf/source/index.rst b/docs/cudf/source/index.rst index 950694f69a7..5ed0a9973be 100644 --- a/docs/cudf/source/index.rst +++ b/docs/cudf/source/index.rst @@ -1,7 +1,13 @@ Welcome to cuDF's documentation! ================================= - +cuDF is a Python GPU DataFrame library (built on the `Apache Arrow +`_ columnar memory format) for loading, joining, +aggregating, filtering, and otherwise manipulating data. cuDF also provides a +pandas-like API that will be familiar to data engineers & data scientists, so +they can use it to easily accelerate their workflows without going into +the details of CUDA programming. + .. toctree:: :maxdepth: 2 From c0996b3cf94a8f4af75da57e959a4da4ef4cd935 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Tue, 3 Aug 2021 18:37:15 -0700 Subject: [PATCH 43/49] fix pymod index creation --- docs/cudf/source/conf.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py index 20b0eb9f736..c5f1233d022 100644 --- a/docs/cudf/source/conf.py +++ b/docs/cudf/source/conf.py @@ -24,7 +24,10 @@ from docutils.nodes import Text from sphinx.addnodes import pending_xref +import cudf +sys.path.insert(0, os.path.abspath(cudf.__path__[0])) +sys.path.insert(0, os.path.abspath(".")) sys.path.insert(0, os.path.abspath("../..")) sys.path.append(os.path.abspath("./_ext")) @@ -53,6 +56,8 @@ autosummary_generate = True ipython_mplbackend = "str" +html_use_modindex = True + # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] From 091002af41236e880e25054d174fc1977d6e6d4f Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 5 Aug 2021 15:01:45 -0700 Subject: [PATCH 44/49] remove modindex --- docs/cudf/source/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/cudf/source/index.rst b/docs/cudf/source/index.rst index 5ed0a9973be..90b287bd1b6 100644 --- a/docs/cudf/source/index.rst +++ b/docs/cudf/source/index.rst @@ -22,5 +22,4 @@ Indices and tables ================== * :ref:`genindex` -* :ref:`modindex` * :ref:`search` From f76781ab2d291466bbcb37cb8160393bc445e62d Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Thu, 5 Aug 2021 17:06:33 -0700 Subject: [PATCH 45/49] fix API page content --- docs/cudf/source/api_docs/index.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/cudf/source/api_docs/index.rst b/docs/cudf/source/api_docs/index.rst index 7948f6d240c..70b9563fc1d 100644 --- a/docs/cudf/source/api_docs/index.rst +++ b/docs/cudf/source/api_docs/index.rst @@ -2,9 +2,8 @@ API reference ============= -This is a simple demonstration site to show off a few visual -and structural elements of the theme. Click the sections on -the left sidebar to see how various elements look on this theme. +This page provides a list of all publicly accessible modules, methods and classes through +``cudf.*`` namespace. .. toctree:: :maxdepth: 2 From b8214d79dc92b92925dbd7a566cccdb51402eb76 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Fri, 6 Aug 2021 14:27:17 -0700 Subject: [PATCH 46/49] enforce table borders --- docs/cudf/source/_static/params.css | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/cudf/source/_static/params.css b/docs/cudf/source/_static/params.css index 4dc2d54ddb2..d0a8330d497 100644 --- a/docs/cudf/source/_static/params.css +++ b/docs/cudf/source/_static/params.css @@ -49,3 +49,7 @@ table.io-supported-types-table thead{ --pst-color-toc-link-active: var(--pst-color-active-navigation); } + +.table td, .table th { + border: 1px solid #dee2e6; +} \ No newline at end of file From d551ad022450427e3c585ff122fee8ff96f22cdf Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 9 Aug 2021 10:34:07 -0700 Subject: [PATCH 47/49] make table styling specific to tables only and not autosummary tables --- docs/cudf/source/_static/params.css | 2 +- docs/cudf/source/basics/basics.rst | 50 +++++++------ docs/cudf/source/basics/groupby.rst | 73 ++++++++++--------- .../cudf/source/basics/io-supported-types.rst | 2 +- 4 files changed, 66 insertions(+), 61 deletions(-) diff --git a/docs/cudf/source/_static/params.css b/docs/cudf/source/_static/params.css index d0a8330d497..2bdd6f5a299 100644 --- a/docs/cudf/source/_static/params.css +++ b/docs/cudf/source/_static/params.css @@ -50,6 +50,6 @@ table.io-supported-types-table thead{ } -.table td, .table th { +.special-table td, .special-table th { border: 1px solid #dee2e6; } \ No newline at end of file diff --git a/docs/cudf/source/basics/basics.rst b/docs/cudf/source/basics/basics.rst index 15b4b43662b..ee63f67daa2 100644 --- a/docs/cudf/source/basics/basics.rst +++ b/docs/cudf/source/basics/basics.rst @@ -12,30 +12,32 @@ cuDF uses dtypes for Series or individual columns of a DataFrame. cuDF uses NumP The following table lists all of cudf types. For methods requiring dtype arguments, strings can be specified as indicated. See the respective documentation sections for more on each type. - -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Kind of Data | Data Type | Scalar | String Aliases | -+========================+==================+=====================================================================================+=============================================+ -| Integer | | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_, | ``'int8'``, ``'int16'``, ``'int32'``, | -| | | np.uint32_, np.uint64_ | ``'int64'``, ``'uint8'``, ``'uint16'``, | -| | | | ``'uint32'``, ``'uint64'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Float | | np.float32_, np.float64_ | ``'float32'``, ``'float64'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Strings | | `str `_ | ``'string'``, ``'object'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Datetime | | np.datetime64_ | ``'datetime64[s]'``, ``'datetime64[ms]'``, | -| | | | ``'datetime64[us]'``, ``'datetime64[ns]'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Timedelta | | np.timedelta64_ | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,| -| (duration type) | | | ``'timedelta64[us]'``, ``'timedelta64[ns]'``| -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Categorical | CategoricalDtype | (none) | ``'category'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Boolean | | np.bool_ | ``'bool'`` | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ -| Decimal | Decimal64Dtype | (none) | (none) | -+------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ +.. rst-class:: special-table +.. table:: + + +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + | Kind of Data | Data Type | Scalar | String Aliases | + +========================+==================+=====================================================================================+=============================================+ + | Integer | | np.int8_, np.int16_, np.int32_, np.int64_, np.uint8_, np.uint16_, | ``'int8'``, ``'int16'``, ``'int32'``, | + | | | np.uint32_, np.uint64_ | ``'int64'``, ``'uint8'``, ``'uint16'``, | + | | | | ``'uint32'``, ``'uint64'`` | + +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + | Float | | np.float32_, np.float64_ | ``'float32'``, ``'float64'`` | + +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + | Strings | | `str `_ | ``'string'``, ``'object'`` | + +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + | Datetime | | np.datetime64_ | ``'datetime64[s]'``, ``'datetime64[ms]'``, | + | | | | ``'datetime64[us]'``, ``'datetime64[ns]'`` | + +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + | Timedelta | | np.timedelta64_ | ``'timedelta64[s]'``, ``'timedelta64[ms]'``,| + | (duration type) | | | ``'timedelta64[us]'``, ``'timedelta64[ns]'``| + +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + | Categorical | CategoricalDtype | (none) | ``'category'`` | + +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + | Boolean | | np.bool_ | ``'bool'`` | + +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ + | Decimal | Decimal64Dtype | (none) | (none) | + +------------------------+------------------+-------------------------------------------------------------------------------------+---------------------------------------------+ **Note: All dtypes above are Nullable** diff --git a/docs/cudf/source/basics/groupby.rst b/docs/cudf/source/basics/groupby.rst index a6ce9db6817..04c4d42fa2a 100644 --- a/docs/cudf/source/basics/groupby.rst +++ b/docs/cudf/source/basics/groupby.rst @@ -131,41 +131,44 @@ Aggregations on groups is supported via the ``agg`` method: The following table summarizes the available aggregations and the types that support them: -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| Aggregations / dtypes | Numeric | Datetime | String | Categorical | List | Struct | Interval | Decimal | -+====================================+===========+============+==========+===============+========+==========+============+===========+ -| count | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| size | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| sum | ✅ | ✅ | | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| idxmin | ✅ | ✅ | | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| idxmax | ✅ | ✅ | | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| min | ✅ | ✅ | ✅ | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| max | ✅ | ✅ | ✅ | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| mean | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| var | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| std | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| quantile | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| median | ✅ | ✅ | | | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| nunique | ✅ | ✅ | ✅ | ✅ | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| nth | ✅ | ✅ | ✅ | | | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| collect | ✅ | ✅ | ✅ | | ✅ | | | ✅ | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ -| unique | ✅ | ✅ | ✅ | ✅ | | | | | -+------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ +.. rst-class:: special-table +.. table:: + + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | Aggregations / dtypes | Numeric | Datetime | String | Categorical | List | Struct | Interval | Decimal | + +====================================+===========+============+==========+===============+========+==========+============+===========+ + | count | ✅ | ✅ | ✅ | ✅ | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | size | ✅ | ✅ | ✅ | ✅ | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | sum | ✅ | ✅ | | | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | idxmin | ✅ | ✅ | | | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | idxmax | ✅ | ✅ | | | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | min | ✅ | ✅ | ✅ | | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | max | ✅ | ✅ | ✅ | | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | mean | ✅ | ✅ | | | | | | | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | var | ✅ | ✅ | | | | | | | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | std | ✅ | ✅ | | | | | | | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | quantile | ✅ | ✅ | | | | | | | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | median | ✅ | ✅ | | | | | | | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | nunique | ✅ | ✅ | ✅ | ✅ | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | nth | ✅ | ✅ | ✅ | | | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | collect | ✅ | ✅ | ✅ | | ✅ | | | ✅ | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ + | unique | ✅ | ✅ | ✅ | ✅ | | | | | + +------------------------------------+-----------+------------+----------+---------------+--------+----------+------------+-----------+ GroupBy apply ------------- diff --git a/docs/cudf/source/basics/io-supported-types.rst b/docs/cudf/source/basics/io-supported-types.rst index 739c1634ca7..78c1bfb6554 100644 --- a/docs/cudf/source/basics/io-supported-types.rst +++ b/docs/cudf/source/basics/io-supported-types.rst @@ -3,7 +3,7 @@ I/O Supported dtypes The following table lists are compatible cudf types for each supported IO format. -.. rst-class:: io-supported-types-table +.. rst-class:: io-supported-types-table special-table .. table:: :widths: 15 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 From 81067f52bfe9a575086de677319fa73b1c7326e7 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 9 Aug 2021 11:00:35 -0700 Subject: [PATCH 48/49] make pivot, unstack top level accessible --- python/cudf/cudf/__init__.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index be78b9cf825..13c20d8bcd4 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -74,7 +74,14 @@ tan, true_divide, ) -from cudf.core.reshape import concat, get_dummies, melt, merge_sorted +from cudf.core.reshape import ( + concat, + get_dummies, + melt, + merge_sorted, + pivot, + unstack, +) from cudf.core.series import isclose from cudf.core.tools.datetimes import DateOffset, to_datetime from cudf.core.tools.numeric import to_numeric From 2620c508ff820f715dbba8fde72bf465a2dac463 Mon Sep 17 00:00:00 2001 From: galipremsagar Date: Mon, 9 Aug 2021 12:33:23 -0700 Subject: [PATCH 49/49] docstring fix --- python/cudf/cudf/core/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index f3954c3e564..b3ca6f7973b 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -2941,7 +2941,7 @@ def from_breaks(breaks, closed="right", name=None, copy=False, dtype=None): Construct an IntervalIndex from an array of splits. Parameters - --------- + ---------- breaks : array-like (1-dimensional) Left and right bounds for each interval. closed : {"left", "right", "both", "neither"}, default "right"