Skip to content

Commit

Permalink
documentation formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
rom1mouret committed Feb 1, 2021
1 parent 128bd2f commit 5b99bb5
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 54 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ Float64 were chosen over float32 for the sake of compatibility with [gonum](http

Storing all the data slices as `interface{}` is sound.
For one thing, this requires only one `map[string]interface{}`.
By contrast, ml-essentials allocates 5 `map[string]T`, even if when empty.
By contrast, ml-essentials allocates 5 `map[string]T`, even when empty.
Also, some functions get to be very succinct, for instance
`rename` can move the data from one column to another without ever knowing what
type the data is of.
Expand Down
16 changes: 8 additions & 8 deletions v0/dataframe/dataframe.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ func EmptyDataFrame(nRows int, maxCPU int) *DataFrame {
// ZeroMask returns a possibly pre-allocated mask for the MaskView function.
// The values of the mask are all initialized to false.
// Intended use:
// m := df.ZeroMask()
// for i := 0; i < df.NumRows(); i++ {
// m := df.ZeroMask()
// for i := 0; i < df.NumRows(); i++ {
// if i % 10 == 0 {
// m[i] = true
// }
// }
// df = df.MaskView(m)
// }
// df = df.MaskView(m)
// Do not concurrently use this function unless you call ThreadSafeMasking(True)
// first.
func (df *DataFrame) ZeroMask() []bool {
Expand All @@ -65,11 +65,11 @@ func (df *DataFrame) ZeroMask() []bool {
// EmptyMask returns a possibly pre-allocated mask for the MaskView function.
// The values of the mask are not initialized and can be either true of false.
// Intended use:
// m := df.EmptyMask()
// for i := 0; i < df.NumRows(); i++ {
// m := df.EmptyMask()
// for i := 0; i < df.NumRows(); i++ {
// m[i] = i % 10
// }
// df = df.MaskView(m)
// }
// df = df.MaskView(m)
// Do not concurrently use this function unless you call ThreadSafeMasking(True)
// first.
func (df *DataFrame) EmptyMask() []bool {
Expand Down
48 changes: 24 additions & 24 deletions v0/dataframe/inplace.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,10 @@ func (df *DataFrame) Encode(newEncoding encoding.Encoding) error {
// The given slice is copied, so it can safely be alteredafter this call.
// If the column doesn't exist, it will create a new column.
// Otherwise, it is functionally equivalent to:
// access := df.Ints(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
// access := df.Ints(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
func (df *DataFrame) OverwriteInts(colName string, values []int) {
df.debugPrint("overwriting ints on")
col := df.ints[colName]
Expand All @@ -101,10 +101,10 @@ func (df *DataFrame) OverwriteInts(colName string, values []int) {
// The given slice is copied, so it can safely be altered after this call.
// If the column doesn't exist, it will create a new column.
// Otherwise, it is functionally equivalent to:
// access := df.Floats(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
// access := df.Floats(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
func (df *DataFrame) OverwriteFloats64(colName string, values []float64) {
df.debugPrint("overwriting floats64 on")
col := df.floats[colName]
Expand All @@ -121,10 +121,10 @@ func (df *DataFrame) OverwriteFloats64(colName string, values []float64) {
// The given slice is copied, so it can safely be altered after this call.
// If the column doesn't exist, it will create a new column.
// Otherwise, it is functionally equivalent to:
// access := df.Floats(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, (float64) values[i])
// }
// access := df.Floats(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, (float64) values[i])
// }
func (df *DataFrame) OverwriteFloats32(colName string, values []float32) {
df.debugPrint("overwriting floats32 on")
col := df.floats[colName]
Expand All @@ -141,10 +141,10 @@ func (df *DataFrame) OverwriteFloats32(colName string, values []float32) {
// The given slice is copied, so it can safely be altered after this call.
// If the column doesn't exist, it will create a new column.
// Otherwise, it is functionally equivalent to:
// access := df.Bools(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
// access := df.Bools(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
func (df *DataFrame) OverwriteBools(colName string, values []bool) {
df.debugPrint("overwriting bools on")
col := df.bools[colName]
Expand All @@ -161,10 +161,10 @@ func (df *DataFrame) OverwriteBools(colName string, values []bool) {
// The given slice is copied, so it can safely be altered after this call.
// If the column doesn't exist, it will create a new column.
// Otherwise, it is functionally equivalent to:
// access := df.Objects(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
// access := df.Objects(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
// The third argument is only used if the column doesn't exist and has to be
// created. It is the only way to mix strings with nil values and yet benefit
// from dataframe operations specialized for strings such as HashStringsView.
Expand All @@ -190,10 +190,10 @@ func (df *DataFrame) OverwriteObjects(colName string, values []interface{},
// after calling this function.
// If the column doesn't exist, it will create a new column.
// Otherwise, it is functionally equivalent to:
// access := df.Objects(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
// access := df.Objects(colName)
// for i := 0; i < len(values); i++ {
// access.Set(i, values[i])
// }
// If you need to overwrite strings with missing values, use OverwriteObjects
// instead.
func (df *DataFrame) OverwriteStrings(colName string, values []string) {
Expand Down
4 changes: 2 additions & 2 deletions v0/dataframe/print.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
// It doesn't print the data.
// Everything is printed on stdout. Nothing on stderr.
// PrintSummary returns the dataframe itself so you can write
// df.PrintSummary().PrintHead(n, "") or df.PrintHead(n, "").PrintSummary()
// df.PrintSummary().PrintHead(n, "") or df.PrintHead(n, "").PrintSummary()
func (df *DataFrame) PrintSummary() *DataFrame {
if len(df.floats) > 0 {
cols := df.FloatHeader().NameList()
Expand Down Expand Up @@ -62,7 +62,7 @@ func (df *DataFrame) PrintSummary() *DataFrame {
// floatFormat defaults to %.3f
// Everything is printed on stdout. Nothing on stderr.
// PrintHead returns the dataframe itself so you can write
// df.PrintSummary().PrintHead(n, "") or df.PrintHead(n, "").PrintSummary()
// df.PrintSummary().PrintHead(n, "") or df.PrintHead(n, "").PrintSummary()
func (df *DataFrame) PrintHead(n int, floatFormat string) *DataFrame {
if n < 0 || n > len(df.indices) {
n = df.NumRows()
Expand Down
2 changes: 1 addition & 1 deletion v0/dataframe/raw_data.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ func (data *RawData) Drop(columns ...string) {
// Rename changes the name of a column.
// The new column will be of the same type and share the same data.
// For example, if you execute:
// df.View().Rename("apples", "oranges").Ints("oranges").Set(0, 42)
// df.View().Rename("apples", "oranges").Ints("oranges").Set(0, 42)
// It will change df's number of apples to 42 at index=0.
func (data *RawData) Rename(oldName string, newName string) {
if data.sharedMaps {
Expand Down
30 changes: 15 additions & 15 deletions v0/dataframe/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@ func (df *DataFrame) SliceView(from int, to int) *DataFrame {
// To avoid unnecessary allocations, please get a pre-allocated mask from
// DataFrame.EmptyMask() or DataFrame.ZeroMask().
// MaskView is functionally equivalent to:
// indices = make([]int, 0)
// for i, b := mask {
// if b {
// indices = make([]int, 0)
// for i, b := mask {
// if b {
// indices = append(indices, i)
// }
// }
// maskedView := df.IndexView(indices)
// }
// }
// maskedView := df.IndexView(indices)
func (df *DataFrame) MaskView(mask []bool) *DataFrame {
result := df.View()
result.indices = make([]int, len(mask))
Expand Down Expand Up @@ -129,9 +129,9 @@ func (df *DataFrame) ColumnView(columns ...string) *DataFrame {

// ShuffleView randomizes the dataframe.
// This is functionally equivalent to this pseudo-code:
// indices = range(0, df.NumRows())
// shuffle(indices)
// shuffledView = df.IndexView(indices)
// indices = range(0, df.NumRows())
// shuffle(indices)
// shuffledView = df.IndexView(indices)
// If you want ShuffleView to behave deterministically, you need to call
// rand.Seed(seed) somewhere in your program prior to calling ShuffleView.
func (df *DataFrame) ShuffleView() *DataFrame {
Expand All @@ -143,7 +143,7 @@ func (df *DataFrame) ShuffleView() *DataFrame {
// SampleView randomly samples n rows from the dataframe.
// Sampling with replacement is not yet supported.
// Sampling without replacement is functionally equivalent to:
// df.ShuffleView().SliceView(0, n)
// df.ShuffleView().SliceView(0, n)
func (df *DataFrame) SampleView(n int, replacement bool) *DataFrame {
if replacement {
panic("replacement is not supported yet")
Expand Down Expand Up @@ -387,8 +387,8 @@ func (df *DataFrame) HashStringsView(columns ...string) *DataFrame {
// altering the original data from some parent dataframe.
// It will perform a copy only if the data is shared.
// This is useful when you execute a function that changes the data in-place:
// view := df.DetachedView("height")
// view.OverwriteFloats64("height", []float64{173, 174, 162, 185})
// view := df.DetachedView("height")
// view.OverwriteFloats64("height", []float64{173, 174, 162, 185})
// Caveat: this can be an expensive action if the data that backs up the
// dataframe is large, even though the dataframe at hand hasn't many rows.
func (df *DataFrame) DetachedView(columns ...string) *DataFrame {
Expand All @@ -401,11 +401,11 @@ func (df *DataFrame) DetachedView(columns ...string) *DataFrame {

// View makes the shallowest copy of the dataframe.
// It is roughly equivalent to:
// copy := *df
// copy := *df
// Use this function when you want to transform an in-place operation into
// a view operation, e.g.:
// view := df.View()
// view.AllocateFloats("height")
// view := df.View()
// view.AllocateFloats("height")
func (df *DataFrame) View() *DataFrame {
result := *df
result.sharedMaps = true
Expand Down
6 changes: 3 additions & 3 deletions v0/preprocessing/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ type PreprocTraining interface {
// The preprocessor typically chooses the columns that are relevant to the
// transformation. If you want the transformation to apply on a subset of
// columns, you can write it like that:
// preproc.Fit(df.ColumnView("this-column", "and-that-column"))
// preproc.Fit(df.ColumnView("this-column", "and-that-column"))
// This won't stop you from running the preprocessor on a wider dataframe:
// preproc.TransformInplace(df) // here 'df' has more than 2 columns.
Fit(df *dataframe.DataFrame) error
Expand All @@ -34,8 +34,8 @@ type Transform interface {
// It returns an error if an error occurred, e.g. a categorical feature of df
// contains an unknown category.
// TransformView is functionally equivalent to:
// result := df.View()
// result.TransformInplace()
// result := df.View()
// result.TransformInplace()
TransformView(df *dataframe.DataFrame) (*dataframe.DataFrame, error)
}

Expand Down

0 comments on commit 5b99bb5

Please sign in to comment.