Merge pull request #165 from realratchet/master

Respect passed pbars
root-11 · Apr 22, 2024 · 2d0ecae · 2d0ecae
2 parents 492be9c + e15191c
commit 2d0ecae
Show file tree

Hide file tree

Showing 9 changed files with 91 additions and 42 deletions.
diff --git a/nimlite/funcs/filter.nim b/nimlite/funcs/filter.nim
@@ -81,7 +81,7 @@ proc checkExpressions(row: seq[PY_ObjectND], exprCols: seq[string], expressions:
     of FT_ANY: any(expressions, xpr => row.checkExpression(exprCols, xpr))
     of FT_ALL: all(expressions, xpr => row.checkExpression(exprCols, xpr))
 
-proc filter*(table: nimpy.PyObject, pyExpressions: seq[nimpy.PyObject], filterTypeName: string, tqdm: nimpy.PyObject): (nimpy.PyObject, nimpy.PyObject) =
+proc filter*(table: nimpy.PyObject, pyExpressions: seq[nimpy.PyObject], filterTypeName: string, tqdm: nimpy.PyObject = nil, pbarInp: nimpy.PyObject = nil): (nimpy.PyObject, nimpy.PyObject) =
     let m = modules()
     let builtins = m.builtins
     let tablite = m.tablite
@@ -267,10 +267,15 @@ proc filter*(table: nimpy.PyObject, pyExpressions: seq[nimpy.PyObject], filterTy
         for (key, col) in tablePages.pairs():
             col.dumpPage(passTablePages[key], failTablePages[key])
 
-    let tableLen = builtins.getLen(table)
-    let tqdmLen = int ceil(float(tableLen) / float(pageSize))
-    let TqdmClass = (if isNone(tqdm): m.tqdm.classes.TqdmClass else: tqdm)
-    let pbar = TqdmClass!(total: tqdmLen, desc = "filter")
+    var pbar: nimpy.PyObject
+
+    if pbarInp.isNone:
+        let tableLen = builtins.getLen(table)
+        let tqdmLen = int ceil(float(tableLen) / float(pageSize))
+        let TqdmClass = (if isNone(tqdm): m.tqdm.classes.TqdmClass else: tqdm)
+        pbar = TqdmClass!(total: tqdmLen, desc = "filter")
+    else:
+        pbar = pbarInp
 
     for (i, row) in enumerate(exprCols.iterateRows(tablePages)):
         bitmask[bitNum] = row.checkExpressions(exprCols, expressions, filterType)

diff --git a/nimlite/funcs/groupby.nim b/nimlite/funcs/groupby.nim
@@ -585,7 +585,7 @@ iterator iteratePages(paths: seq[string]): seq[PY_ObjectND] =
             res.add(i())
             finished = finished or finished(i)
 
-proc groupby*(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, Accumulator)], tqdm: nimpy.PyObject = modules().tqdm.classes.TqdmClass): nimpy.PyObject =
+proc groupby*(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, Accumulator)], tqdm: nimpy.PyObject = nil, pbarInp: nimpy.PyObject = nil): nimpy.PyObject =
     let
         m = modules()
         tabliteBase = m.tablite.modules.base
@@ -626,12 +626,19 @@ proc groupby*(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, Accu
         if cn notin columnNames:
             columnNames.add(cn)
 
-    # var relevantT = T.slice(columnNames)
-    var columnsPaths: OrderedTable[string, seq[string]] = collect(initOrderedTable()):
+    let columnsPaths: OrderedTable[string, seq[string]] = collect(initOrderedTable()):
         for cn in columnNames:
             {cn: tabliteBase.collectPages(T[cn])}
-    var TqdmClass = if tqdm.isNone: m.tqdm.classes.TqdmClass else: tqdm
-    var pbar = TqdmClass!(desc: &"groupby", total: len(columnsPaths[toSeq(columnsPaths.keys)[0]]))
+
+
+    var pbar: nimpy.PyObject
+
+    if pbarInp.isNone:
+        let TqdmClass = if tqdm.isNone: m.tqdm.classes.TqdmClass else: tqdm
+        pbar = TqdmClass!(desc: &"groupby", total: len(columnsPaths[toSeq(columnsPaths.keys)[0]]))
+    else:
+        pbar = pbarInp
+
     var aggregationFuncs = initOrderedTable[seq[PY_ObjectND], seq[(string, GroupByFunction)]]()
     for pagesZipped in pageZipper(columnsPaths):
         for row in iteratePages(pagesZipped):

diff --git a/nimlite/funcs/imputation.nim b/nimlite/funcs/imputation.nim
@@ -115,7 +115,7 @@ proc savePages(sliceData: seq[seq[PY_ObjectND]], columns: seq[nimpy.PyObject], p
 
 proc nearestNeighbourImputation*(T: nimpy.PyObject, sources: seq[string],
         missing: seq[PY_ObjectND], targets: seq[string],
-        tqdm: nimpy.PyObject = modules().tqdm.classes.TqdmClass): nimpy.PyObject =
+        tqdm: nimpy.PyObject = nil, pbarInp: nimpy.PyObject = nil): nimpy.PyObject =
     let
         m = modules()
         tabliteBase = m.tablite.modules.base
@@ -153,13 +153,20 @@ proc nearestNeighbourImputation*(T: nimpy.PyObject, sources: seq[string],
             for m in missing:
                 if m in k:
                     {k: v}
-    var
-        missingValsCounts = collect: (for v in missing_value_index.values(): len(v))
-        totalSteps = sum(missingValsCounts)
-        TqdmClass = if tqdm.isNone: m.tqdm.classes.TqdmClass else: tqdm
+
+
+    var pbar: nimpy.PyObject
+    if pbarInp.isNone:
+        let missingValsCounts = collect: (for v in missing_value_index.values(): len(v))
+        let totalSteps = sum(missingValsCounts)
+        let TqdmClass = if tqdm.isNone: m.tqdm.classes.TqdmClass else: tqdm
+
         pbar = TqdmClass!(desc: &"imputation.nearest_neighbour", total: totalSteps)
-        ranks: seq[PY_ObjectND] = @[]
-        newOrder = initTable[seq[int], seq[PY_ObjectND]]()
+    else:
+        pbar = pbarInp
+
+    var ranks: seq[PY_ObjectND] = @[]
+    var newOrder = initTable[seq[int], seq[PY_ObjectND]]()
 
     for k in missingValueIndex.keys():
         for kk in k:

diff --git a/nimlite/libnimlite.nim b/nimlite/libnimlite.nim
@@ -121,14 +121,14 @@ when isLib:
 
     # --------    FILTER    -----------
     import funcs/filter as ff
-    proc filter(table: nimpy.PyObject, expressions: seq[nimpy.PyObject], `type`: string, tqdm: nimpy.PyObject): (nimpy.PyObject, nimpy.PyObject) {.exportpy.} =
-        return ff.filter(table, expressions, `type`, tqdm)
+    proc filter(table: nimpy.PyObject, expressions: seq[nimpy.PyObject], `type`: string, tqdm: nimpy.PyObject, pbar: nimpy.PyObject): (nimpy.PyObject, nimpy.PyObject) {.exportpy.} =
+        return ff.filter(table, expressions, `type`, tqdm, pbar)
 
     # --------    FILTER    -----------
 
     # --------  IMPUTATION  -----------
     import funcs/imputation
-    proc nearest_neighbour(T: nimpy.PyObject, sources: seq[string], missing: seq[nimpy.PyObject], targets: seq[string], tqdm: nimpy.PyObject): nimpy.PyObject {.exportpy.} =
+    proc nearest_neighbour(T: nimpy.PyObject, sources: seq[string], missing: seq[nimpy.PyObject], targets: seq[string], tqdm: nimpy.PyObject, pbar: nimpy.PyObject): nimpy.PyObject {.exportpy.} =
         var miss: seq[PY_ObjectND] = @[]
         for m in missing:
             case modules().builtins.getTypeName(m):
@@ -150,14 +150,14 @@ when isLib:
                 miss.add(newPY_Object(m.to(bool)))
             else:
                 raise newException(ValueError, "unrecognized type.")
-        return nearestNeighbourImputation(T, sources, miss, targets, tqdm)
+        return nearestNeighbourImputation(T, sources, miss, targets, tqdm, pbar)
     # --------  IMPUTATION  -----------
 
     # --------   GROUPBY  -----------
     import funcs/groupby as gb
-    proc groupby(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, string)], tqdm: nimpy.PyObject): nimpy.PyObject {. exportpy .} =
+    proc groupby(T: nimpy.PyObject, keys: seq[string], functions: seq[(string, string)], tqdm: nimpy.PyObject, pbar: nimpy.PyObject): nimpy.PyObject {. exportpy .} =
         var funcs = collect:
             for (cn, fn) in functions:
                 (cn, str2Accumulator(fn))
-        return gb.groupby(T, keys, funcs, tqdm)
+        return gb.groupby(T, keys, funcs, tqdm, pbar)
     # --------   GROUPBY  -----------
diff --git a/nimlite/libnimlite.pyi b/nimlite/libnimlite.pyi
@@ -1,4 +1,4 @@
-def text_reader_task(path,  encoding,  dia_delimiter,  dia_quotechar,  dia_escapechar,  dia_doublequote,  dia_quoting,  dia_skipinitialspace,  dia_skiptrailingspace,  dia_lineterminator,  dia_strict,  guess_dtypes,  tsk_pages,  tsk_offset,  tsk_count, import_fields):
+def text_reader_task(path, encoding, dia_delimiter, dia_quotechar, dia_escapechar, dia_doublequote, dia_quoting, dia_skipinitialspace, dia_skiptrailingspace, dia_lineterminator, dia_strict, guess_dtypes, tsk_pages, tsk_offset, tsk_count, import_fields):
     pass
 
 
@@ -30,11 +30,13 @@ def collect_text_reader_page_info_task(task_info, task):
     pass
 
 
-def nearest_neighbour(T, sources, missing, targets, tqdm):
+def nearest_neighbour(T, sources, missing, targets, tqdm, pbar):
     pass
 
-def groupby(T, keys, functions, tqdm):
+
+def groupby(T, keys, functions, tqdm, pbar):
     pass
 
-def filter(table, expressions, type, tqdm):
+
+def filter(table, expressions, type, tqdm, pbar):
     pass
diff --git a/tablite/core.py b/tablite/core.py
@@ -611,7 +611,7 @@ def groupby(self, keys, functions, tqdm=_tqdm, pbar=None):
             https://github.com/root-11/tablite/blob/master/tests/test_groupby.py
 
         """
-        return _groupby(self, keys, functions, tqdm)
+        return _groupby(self, keys, functions, tqdm, pbar)
 
     def pivot(self, rows, columns, functions, values_as_rows=True, tqdm=_tqdm, pbar=None):
         """
@@ -730,7 +730,7 @@ def column_select(self, cols: list[ColumnSelectorDict], tqdm=_tqdm, TaskManager=
             first table contains the rows that were successfully cast to desired types
             second table contains rows that failed to cast + rejection reason
         """
-        return _column_select(self, cols, tqdm, TaskManager)
+        return _column_select(self, cols, tqdm=tqdm, TaskManager=TaskManager)
 
     def join(self, other, left_keys, right_keys, left_columns=None, right_columns=None, kind="inner", merge_keys=False, tqdm=_tqdm, pbar=None):
         """

diff --git a/tablite/nimlite.py b/tablite/nimlite.py
@@ -79,11 +79,19 @@ def text_reader(
     guess_datatypes: bool =False,
     newline: str='\n', delimiter: str=',', text_qualifier: str='"',
     quoting: ValidQuoting, strip_leading_and_tailing_whitespace: bool=True, skip_empty: ValidSkipEmpty = "NONE",
-    tqdm=_tqdm
+    tqdm=_tqdm,
+    pbar:_tqdm = None
 ) -> K:
     assert isinstance(path, Path)
     assert isinstance(pid, Path)
-    with tqdm(total=10, desc=f"importing file") as pbar:
+
+    if pbar is None:
+        pbar = tqdm(total=10, desc=f"importing file")
+        pbar_close = True
+    else:
+        pbar_close = False
+
+    try:
         table = nl.text_reader(
             pid=str(pid),
             path=str(path),
@@ -183,10 +191,14 @@ def next_task(task: Task, page_info):
         pbar.update(pbar.total - pbar.n)
 
         table = T(columns=table_dict)
+    finally:
+        if pbar_close:
+            pbar.close()
 
     return table
 
 
+
 def wrap(str_: str) -> str:
     return '"' + str_.replace('"', '\\"').replace("'", "\\'").replace("\n", "\\n").replace("\t", "\\t") + '"'
 
@@ -203,8 +215,14 @@ def _collect_cs_info(i: int, columns: dict, res_cols_pass: list, res_cols_fail:
     return el, col_pass, col_fail
 
 
-def column_select(table: K, cols: list[ColumnSelectorDict], tqdm=_tqdm, TaskManager=TaskManager) -> Tuple[K, K]:
-    with tqdm(total=100, desc="column select", bar_format='{desc}: {percentage:.1f}%|{bar}{r_bar}') as pbar:
+def column_select(table: K, cols: list[ColumnSelectorDict], tqdm=_tqdm, pbar:_tqdm = None, TaskManager=TaskManager) -> Tuple[K, K]:
+    if pbar is None:
+        pbar = tqdm(total=100, desc="column select", bar_format='{desc}: {percentage:.1f}%|{bar}{r_bar}')
+        pbar_close = True
+    else:
+        pbar_close = False
+
+    try:
         T = type(table)
         dir_pid = Config.workdir / Config.pid
 
@@ -297,18 +315,21 @@ def extend_table(table, columns):
         pbar.update(pbar.total - pbar.n)
 
         return tbl_pass, tbl_fail
+    finally:
+        if pbar_close:
+            pbar.close()
 
 def read_page(path: Union[str, Path]) -> np.ndarray:
     return nl.read_page(str(path))
 
 def repaginate(column: Column):
     nl.repaginate(column)
 
-def nearest_neighbour(T: BaseTable, sources: Union[list[str], None], missing: Union[list, None], targets: Union[list[str], None], tqdm=_tqdm):
-    return nl.nearest_neighbour(T, sources, list(missing), targets, tqdm)
+def nearest_neighbour(T: BaseTable, sources: Union[list[str], None], missing: Union[list, None], targets: Union[list[str], None], tqdm=_tqdm, pbar: _tqdm = None):
+    return nl.nearest_neighbour(T, sources, list(missing), targets, tqdm, pbar)
 
-def groupby(T, keys, functions, tqdm=_tqdm):
-    return nl.groupby(T, keys, functions, tqdm)
+def groupby(T, keys, functions, tqdm=_tqdm, pbar: _tqdm=None):
+    return nl.groupby(T, keys, functions, tqdm, pbar)
 
-def filter(table: BaseTable, expressions: list[FilterDict], type: FilterType, tqdm = _tqdm):
-    return nl.filter(table, expressions, type, tqdm)
+def filter(table: BaseTable, expressions: list[FilterDict], type: FilterType, tqdm = _tqdm, pbar: _tqdm = None):
+    return nl.filter(table, expressions, type, tqdm, pbar)
diff --git a/tablite/sortation.py b/tablite/sortation.py
@@ -38,7 +38,11 @@ def sort_index(T, mapping, sort_mode="excel", tqdm=_tqdm, pbar=None):
 
     rank = {i: tuple() for i in range(len(T))}  # create index and empty tuple for sortation.
 
-    _pbar = tqdm(total=len(mapping.items()), desc="creating sort index") if pbar is None else pbar
+    if pbar is None:
+        pbar = tqdm(total=len(mapping.items()), desc="creating sort index")
+        pbar_close = True
+    else:
+        pbar_close = False
 
     for key, reverse in mapping.items():
         col = T[key][:]
@@ -48,7 +52,10 @@ def sort_index(T, mapping, sort_mode="excel", tqdm=_tqdm, pbar=None):
             v2 = numpy_to_python(v)
             rank[ix] += (ranks[v2],)  # add tuple for each sortation level.
 
-        _pbar.update(1)
+        pbar.update(1)
+
+    if pbar_close:
+        pbar.close()
 
     del col
     del ranks

diff --git a/tablite/version.py b/tablite/version.py
@@ -1,3 +1,3 @@
-major, minor, patch = 2023, 11, 4
+major, minor, patch = 2023, 11, 5
 __version_info__ = (major, minor, patch)
 __version__ = ".".join(str(i) for i in __version_info__)