improve efficiency and robustness of to_list (#838)

ansys · Jan 14, 2022 · fa45e83 · fa45e83
1 parent 1e9d3fd
commit fa45e83
Show file tree

Hide file tree

Showing 2 changed files with 119 additions and 52 deletions.
diff --git a/ansys/mapdl/core/commands.py b/ansys/mapdl/core/commands.py
@@ -23,14 +23,21 @@
 import re
 import numpy as np
 
+# compiled regular expressions used for parsing tablular outputs
+REG_LETTERS = re.compile(r"[a-df-zA-DF-Z]+")  # all except E or e
+REG_FLOAT_INT = re.compile(r"[+-]?[0-9]*[.]?[0-9]+[Ee]?[+-]?[0-9]+|\s[0-9]+\s")
+BC_REGREP = re.compile(
+    r"([0-9]+)\s*([A-Za-z]+)\s*([0-9]*[.]?[0-9]+)\s+([0-9]*[.]?[0-9]+)"
+)
+
 MSG_NOT_PANDAS = """'Pandas' is not installed or could not be found.
 Hence this command is not applicable.
 
 You can install it using:
 pip install pandas
 """
 
-MSG_BCListingOutput_to_array = """This command has strings values in some of its columns (such 'UX', 'FX', 'UY', 'TEMP', etc),
+MSG_BCLISTINGOUTPUT_TO_ARRAY = """This command has strings values in some of its columns (such 'UX', 'FX', 'UY', 'TEMP', etc),
 so it cannot be converted to Numpy Array.
 
 Please use 'to_list' or 'to_dataframe' instead."""
@@ -438,6 +445,9 @@ class CommandListingOutput(CommandOutput):
     a list of lists, a Numpy array or a Pandas DataFrame.
     """
 
+    def __init__(self, *args, **kwargs):
+        self._cache = None
+
     def _is_data_start(self, line, magicword=None):
         """Check if line is the start of a data group."""
         if not magicword:
@@ -492,7 +502,8 @@ def _format(self):
     def _get_body(self, trail_header=None):
         """Get command body text.
 
-        It removes the maximum absolute values tail part and makes sure there is separation between columns.
+        It removes the maximum absolute values tail part and makes sure there is
+        separation between columns.
         """
         # Doing some formatting of the string
         body = self._format().splitlines()
@@ -515,8 +526,7 @@ def _get_body(self, trail_header=None):
 
     def _get_data_group_indexes(self, body, magicword=None):
         """Return the indexes of the start and end of the data groups."""
-
-        if '*****ANSYS VERIFICATION RUN ONLY*****' in str(self):
+        if '*****ANSYS VERIFICATION RUN ONLY*****' in str(self[:1000]):
             shift = 2
         else:
             shift = 0
@@ -533,28 +543,6 @@ def _get_data_group_indexes(self, body, magicword=None):
 
         return zip(start_idxs, ends)
 
-    def _get_data_groups(self, magicword=None, trail_header=None):
-        """Get raw data groups"""
-        body = self._get_body(trail_header=trail_header)
-
-        try:
-            np.array(body[1].split(), dtype=float) #if this fail, there is headers,
-            self._default_format = False
-        except:
-            # There is headers, assuming default format
-            self._default_format = True
-
-        if not self._default_format:
-            return [each for each in body[1:] if each]
-
-        # Using default format
-        data = []
-        for start, end in self._get_data_group_indexes(body, magicword=magicword):
-            data.extend(body[start+1:end])
-
-        # removing empty lines
-        return [each for each in data if each]
-
     def get_columns(self):
         """Get the column names for the dataframe.
 
@@ -570,16 +558,40 @@ def get_columns(self):
         except:
             return None
 
+    def _parse_table(self):
+        """Parse tabular command output.
+
+        Returns
+        -------
+        numpy.ndarray
+            Parsed tabular data from command output.
+
+        """
+        parsed_lines = []
+        for line in self.splitlines():
+            # exclude any line containing characters [A-Z] except for E
+            if line and not REG_LETTERS.search(line):
+                items = REG_FLOAT_INT.findall(line)
+                if items:
+                    parsed_lines.append(items)
+        return np.array(parsed_lines, dtype=np.float64)
+
+    @property
+    def _parsed(self):
+        """Return parsed output."""
+        if self._cache is None:
+            self._cache = self._parse_table()
+        return self._cache
+
     @check_valid_output
     def to_list(self):
         """Export the command output a list or list of lists.
 
         Returns
         -------
-            List of strings
+        list
         """
-        data = self._get_data_groups()
-        return [each.split() for each in data]
+        return self._parsed.tolist()
 
     def to_array(self):
         """Export the command output as a numpy array.
@@ -589,25 +601,25 @@ def to_array(self):
         numpy.ndarray
             Numpy array of floats.
         """
-        return np.array(self.to_list(), dtype=float)
+        return self._parsed
 
     def to_dataframe(self, data=None, columns=None):
         """Export the command output as a Pandas DataFrame.
 
         Parameters
         ----------
         data : numpy.ndarray (structured or homogeneous), Iterable, dict, or DataFrame
-            The data to be converted to the dataframe values.
-            Passed directly to the pandas.DataFrame constructor.
-            Dict can contain Series, arrays, constants, dataclass or list-like objects. If
-            data is a dict, column order follows insertion-order.
+            The data to be converted to the dataframe values.  Passed directly
+            to the pandas.DataFrame constructor.  Dict can contain Series,
+            arrays, constants, dataclass or list-like objects. If data is a
+            dict, column order follows insertion-order.
 
         columns : Index or array-like
-            Iterable with columns names.
-            Passed directly to the pandas.DataFrame constructor.
-            Column labels to use for resulting frame when data does not have them,
-            defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels,
-            will perform column selection instead.
+            Iterable with columns names.  Passed directly to the
+            pandas.DataFrame constructor.  Column labels to use for resulting
+            frame when data does not have them, defaulting to RangeIndex(0, 1,
+            2, ..., n). If data contains column labels, will perform column
+            selection instead.
 
         Returns
         -------
@@ -617,15 +629,16 @@ def to_dataframe(self, data=None, columns=None):
         Notes
         -----
         The returned dataframe has all its data converted to float
-        (inheritate from :func:`to_array() <ansys.mapdl.core.commands.CommandListingOutput.to_array>` method).
+        (inheritate from :func:`to_array()
+        <ansys.mapdl.core.commands.CommandListingOutput.to_array>` method).
         """
         try:
             import pandas as pd
         except ModuleNotFoundError:
             raise ModuleNotFoundError(MSG_NOT_PANDAS)
 
-        if not data:
-            data = self.to_array()
+        if data is None:
+            data = self.to_list()
         if not columns:
             columns = self.get_columns()
 
@@ -640,8 +653,30 @@ class BoundaryConditionsListingOutput(CommandListingOutput):
     or a Pandas DataFrame.
     """
 
+    def _parse_table(self):
+        """Parse tabular command output."""
+        parsed_lines = []
+        for line in self.splitlines():
+            # exclude any line containing characters [A-Z] except for E
+            if line:
+                items = BC_REGREP.findall(line)
+                if items:
+                    parsed_lines.append(items)
+
+        return parsed_lines
+
+    @check_valid_output
+    def to_list(self):
+        """Export the command output a list or list of lists.
+
+        Returns
+        -------
+        list
+        """
+        return self._parsed
+
     def to_array(self):
-        raise ValueError(MSG_BCListingOutput_to_array)
+        raise ValueError(MSG_BCLISTINGOUTPUT_TO_ARRAY)
 
     def to_dataframe(self):
         """Convert the command output to a Pandas Dataframe.
@@ -664,15 +699,15 @@ def to_dataframe(self):
         """
         df = super().to_dataframe(data=self.to_list())
         if 'NODE' in df.columns:
-            df['NODE'] = df['NODE'].astype(int)
+            df['NODE'] = df['NODE'].astype(np.int32, copy=False)
 
         if 'LABEL' in df.columns:
-            df['LABEL'] = df['LABEL'].astype(str)
+            df['LABEL'] = df['LABEL'].astype(str, copy=False)
 
         if 'REAL' in df.columns:
-            df['REAL'] = df['REAL'].astype(float)
+            df['REAL'] = df['REAL'].astype(np.float64, copy=False)
 
         if 'IMAG' in df.columns:
-            df['IMAG'] = df['IMAG'].astype(float)
+            df['IMAG'] = df['IMAG'].astype(np.float64, copy=False)
 
         return df
diff --git a/tests/test_commands.py b/tests/test_commands.py
@@ -64,6 +64,34 @@
        3  -0.7065315064E+007 -0.4038004530E+007
        4  -0.4297798077E+007 -0.2476291263E+007"""
 
+PRNSOL_OUT_LONG = """PRINT F    REACTION SOLUTIONS PER NODE
+
+ *** ANSYS - ENGINEERING ANALYSIS SYSTEM  RELEASE 2021 R2          21.2     ***
+ DISTRIBUTED Ansys Mechanical Enterprise
+
+ 00000000  VERSION=LINUX x64     15:56:42  JAN 13, 2022 CP=      0.665
+
+
+
+
+
+  ***** POST1 TOTAL REACTION SOLUTION LISTING *****
+
+  LOAD STEP=     1  SUBSTEP=     1
+   TIME=    1.0000      LOAD CASE=   0
+
+  THE FOLLOWING X,Y,Z SOLUTIONS ARE IN THE GLOBAL COORDINATE SYSTEM
+
+    NODE       FX           FY
+       1  0.12875E+008 0.42667E+007
+       2 -0.15120E+007 0.22476E+007
+       3 -0.70653E+007-0.40380E+007
+       4 -0.42978E+007-0.24763E+007
+
+ TOTAL VALUES
+ VALUE  -0.37253E-008 0.46566E-009
+"""
+
 
 CMD_DOC_STRING_INJECTOR = CMD_LISTING.copy()
 CMD_DOC_STRING_INJECTOR.extend(CMD_BC_LISTING)
@@ -119,7 +147,8 @@ def test_cmd_class_prnsol_short():
     out_array = out.to_array()
 
     assert isinstance(out, CommandListingOutput)
-    assert isinstance(out_list, list) and bool(out_list)
+    assert isinstance(out_list, list)
+    assert out_list
     assert isinstance(out_array, np.ndarray) and out_array.size != 0
 
     if HAS_PANDAS:
@@ -154,7 +183,7 @@ def test_output_listing(mapdl, plastic_solve, func, args):
     out_array = out.to_array()
 
     assert isinstance(out, CommandListingOutput)
-    assert isinstance(out_list, list) and bool(out_list)
+    assert isinstance(out_list, list) and out_list
     assert isinstance(out_array, np.ndarray) and out_array.size != 0
 
     if HAS_PANDAS:
@@ -167,9 +196,10 @@ def test_bclist(mapdl, beam_solve, func):
     func_ = getattr(mapdl, func)
     out = func_()
 
-    assert isinstance(out, BoundaryConditionsListingOutput)
-    assert isinstance(out.to_list(), list) and bool(out.to_list())
+    out_list = out.to_list()
 
+    assert isinstance(out, BoundaryConditionsListingOutput)
+    assert isinstance(out_list, list) and out_list
     with pytest.raises(ValueError):
         out.to_array()
 
@@ -184,7 +214,9 @@ def test_docstring_injector(mapdl, method):
     for name in dir(mapdl):
         if name[0:4].upper() == method:
             func = mapdl.__getattribute__(name)
-            docstring = func.__doc__ # If '__func__' not present (AttributeError) very likely it has not been wrapped.
+            # If '__func__' not present (AttributeError) very likely it has not
+            # been wrapped.
+            docstring = func.__doc__
 
             assert "Returns" in docstring
             assert "``str.to_list()``" in docstring