Merge pull request #2930 from PrincetonUniversity/remove_autograd

Remove autograd
PrincetonUniversity · Apr 16, 2024 · 0170522 · 0170522
2 parents e2f96f0 + c727250
commit 0170522
Show file tree

Hide file tree

Showing 11 changed files with 96 additions and 55 deletions.
diff --git a/README.rst b/README.rst
@@ -146,7 +146,6 @@ Dependencies that are automatically installed (except those noted as optional) i
    * pillow
    * llvmlite
    * mpi4py (optional)
-   * autograd (optional)
 
 Lists of required packages for PsyNeuLink, developing PsyNeuLink, and running the PsyNeuLink tutorial are also
 stored in pip-style `requirements.txt`, `dev_requirements.txt`, and `tutorial_requirements.txt` in the source code.

diff --git a/Scripts/Models (Under Development)/Bustamante_Stroop_XOR_LVOC_Model_VZ.py b/Scripts/Models (Under Development)/Bustamante_Stroop_XOR_LVOC_Model_VZ.py
@@ -18,7 +18,7 @@
 <https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1006043&rev=2>`_
 """
 
-import autograd.numpy as np
+import numpy as np
 import psyneulink as pnl
 
 # from build_stimuli_VZ import xor_dict

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -178,7 +178,6 @@ Dependencies that are automatically installed (except those noted as optional) i
    * pillow
    * llvmlite
    * mpi4py (optional)
-   * autograd (optional)
 
 Lists of required packages for PsyNeuLink, developing PsyNeuLink, and running the PsyNeuLink tutorial are also
 stored in pip-style `requirements.txt`, `dev_requirements.txt`, and `tutorial_requirements.txt` in the source code.

diff --git a/docs/source/index_logo_with_text.rst b/docs/source/index_logo_with_text.rst
@@ -163,7 +163,6 @@ Dependencies that are automatically installed (except those noted as optional) i
    * pillow
    * llvmlite
    * mpi4py (optional)
-   * autograd (optional)
 
 Lists of required packages for PsyNeuLink, developing PsyNeuLink, and running the PsyNeuLink tutorial are also
 stored in pip-style `requirements.txt`, `dev_requirements.txt`, and `tutorial_requirements.txt` in the source code.

diff --git a/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py b/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py
@@ -31,6 +31,13 @@
 from numbers import Number
 
 import numpy as np
+
+# Conditionally import torch
+try:
+    import torch
+except ImportError:
+    torch = None
+
 from beartype import beartype
 
 from psyneulink._typing import Optional, Union, Callable, Literal
@@ -942,8 +949,9 @@ class GradientOptimization(OptimizationFunction):
     which should be the derivative of the `objective_function <GradientOptimization.objective_function>`
     with respect to `variable <GradientOptimization.variable>` at its current value:
     :math:`\\frac{d(objective\\_function(variable))}{d(variable)}`.  If the **gradient_function* argument of the
-    constructor is not specified, then an attempt is made to use `Autograd's <https://github.com/HIPS/autograd>`_ `grad
-    <autograd.grad>` method to generate `gradient_function <GradientOptimization.gradient_function>`.  If that fails,
+    constructor is not specified, then an attempt is made to use PyTorch functional
+    `autograd's <https://pytorch.org/docs/stable/generated/torch.func.grad.html>`_ `grad <torch.func.grad>`
+    method to generate `gradient_function <GradientOptimization.gradient_function>`.  If that fails,
     an error occurs.  The **search_space** argument can be used to specify lower and/or upper bounds for each dimension
     of the sample; if the gradient causes a value of the sample to exceed a bound along a dimenson, the value of the
     bound is used for that dimension, unless/until the gradient shifts and causes it to return back within the bound.
@@ -963,7 +971,7 @@ class GradientOptimization(OptimizationFunction):
     gradient_function : function
         specifies function used to compute the gradient in each iteration of the `optimization process
         <GradientOptimization_Procedure>`;  if it is not specified, an attempt is made to compute it using
-        `autograd.grad <https://github.com/HIPS/autograd>`_.
+        `PyTorch autograd's <https://pytorch.org/docs/stable/generated/torch.func.grad.html>`_ `grad <torch.func.grad>`.
 
     direction : ASCENT or DESCENT : default ASCENT
         specifies the direction of gradient optimization: if *ASCENT*, movement is attempted in the positive direction
@@ -1247,15 +1255,37 @@ def reset(self, default_variable=None, objective_function=None, context=None, **
             **kwargs
         )
 
-        # Differentiate objective_function using autograd.grad()
+        # Differentiate objective_function using torch.func.grad()
         if objective_function is not None and not self.gradient_function:
+
+            if torch is None:
+                raise ValueError("PyTorch is not installed. Please install PyTorch to use GradientOptimization without "
+                                 "specifying a gradient_function.")
+
+            if 'func' not in dir(torch):
+                raise ValueError("torch.func.grad not found. PyTorch version is probably too old. Please upgrade "
+                                 "PyTorch to >= 2.0 to use GradientOptimization without specifying a "
+                                 "gradient_function.")
+
             try:
-                from autograd import grad
-                self.parameters.gradient_function._set(grad(self.objective_function), context)
-            except:
-                raise OptimizationFunctionError("Unable to use autograd with {} specified for {} Function: {}.".
+                # Need to wrap objective_function in a lambda to pass to grad because it needs to return a torch tensor
+                def func_wrapper(x, context):
+                    return torch.tensor(self.objective_function(x, context))
+
+                # Get the gradient of the objective function with pytorch autograd
+                gradient_func = torch.func.grad(func_wrapper)
+
+                # We need to wrap the gradient function in a lambda as well because we need to convert back to numpy
+                def gradient_func_wrapper(x, context):
+                    return gradient_func(torch.from_numpy(x), context).detach().numpy()
+
+                self.parameters.gradient_function._set(gradient_func_wrapper, context)
+
+            except Exception as ex:
+
+                raise OptimizationFunctionError("Unable to use PyTorch autograd with {} specified for {} Function: {}.".
                                                 format(repr(OBJECTIVE_FUNCTION), self.__class__.__name__,
-                                                       objective_function.__name__))
+                                                       objective_function.__name__)) from ex
         search_space = self.search_space
         bounds = None
 

diff --git a/psyneulink/core/components/functions/nonstateful/transferfunctions.py b/psyneulink/core/components/functions/nonstateful/transferfunctions.py
@@ -722,8 +722,6 @@ def _function(self,
         scale = self._get_current_parameter_value(SCALE, context)
         offset = self._get_current_parameter_value(OFFSET, context)
 
-        # The following doesn't work with autograd (https://github.com/HIPS/autograd/issues/416)
-        # result = scale * np.exp(rate * variable + bias) + offset
         result = scale * e**(rate * variable + bias) + offset
         return self.convert_output_type(result)
 
@@ -1022,8 +1020,6 @@ def _function(self,
         offset = self._get_current_parameter_value(OFFSET, context)
         scale = self._get_current_parameter_value(SCALE, context)
 
-        # The following doesn't work with autograd (https://github.com/HIPS/autograd/issues/416)
-        # result = 1. / (1 + np.exp(-gain * (variable - bias) + offset))
         result = scale * (1. / (1 + e**(-gain * (variable + bias - x_0) + offset)))
 
         return self.convert_output_type(result)
@@ -1346,9 +1342,6 @@ def _function(self,
         offset = self._get_current_parameter_value(OFFSET, context)
         scale = self._get_current_parameter_value(SCALE, context)
 
-        # The following probably doesn't work with autograd (https://github.com/HIPS/autograd/issues/416)
-        #   (since np.exp doesn't work)
-        # result = 1. / (1 + np.tanh(-gain * (variable - bias) + offset))
         exponent = -2 * (gain * (variable + bias - x_0) + offset)
         result = scale * (1 - e**exponent)/ (1 + e**exponent)
 
@@ -2437,7 +2430,6 @@ def _function(self,
         offset = self._get_current_parameter_value(OFFSET, context)
         random_state = self._get_current_parameter_value('random_state', context)
 
-        # The following doesn't work with autograd (https://github.com/HIPS/autograd/issues/416)
         result = scale * random_state.normal(variable + bias, variance) + offset
 
         return self.convert_output_type(result)
@@ -2846,7 +2838,6 @@ def _function(self,
             result = variable
 
         else:
-            # ??Not sure whether the following works with autograd (https://github.com/HIPS/autograd/issues/416)
             p = p or self.defaults.p
             self.binomial_distort.parameters.p.set(p, context)
             result = self.binomial_distort(variable) * (1 / (1 - p))

diff --git a/psyneulink/core/globals/utilities.py b/psyneulink/core/globals/utilities.py
@@ -123,6 +123,12 @@
 
 import numpy as np
 
+# Conditionally import torch
+try:
+    import torch
+except ImportError:
+    torch = None
+
 from psyneulink.core.globals.keywords import \
     comparison_operators, DISTANCE_METRICS, EXPONENTIAL, GAUSSIAN, LINEAR, MATRIX_KEYWORD_VALUES, NAME, SINUSOID, VALUE
 
@@ -619,17 +625,7 @@ def recursively_check_elements_for_numeric(value):
                         else:
                             return True
                 else:
-                    if not is_number(value):
-                        try:
-                            # True for autograd ArrayBox (and maybe other types?)
-                            # if isinstance(value._value, Number):
-                            from autograd.numpy.numpy_boxes import ArrayBox
-                            if isinstance(value, ArrayBox):
-                                return True
-                        except:
-                            return False
-                    else:
-                        return True
+                    return is_number(value)
             # Test copy since may need to convert matrix to array (see above)
             if not recursively_check_elements_for_numeric(candidate.copy()):
                 return False
@@ -1021,6 +1017,11 @@ def convert_to_np_array(value, dimension=None):
     """
     def safe_create_np_array(value):
         with warnings.catch_warnings():
+
+            # If we have a torch tensor, allow it to pass through unchanged
+            if torch and torch.is_tensor(value):
+                return value
+
             warnings.filterwarnings('error', category=np.VisibleDeprecationWarning)
             # NOTE: this will raise a ValueError in the future.
             # See https://numpy.org/neps/nep-0034-infer-dtype-is-object.html
@@ -1053,7 +1054,11 @@ def safe_create_np_array(value):
     value = safe_create_np_array(value)
 
     if dimension == 1:
-        value = np.atleast_1d(value)
+        if torch and torch.is_tensor(value):
+            value = torch.atleast_1d(value)
+        else:
+            value = np.atleast_1d(value)
+
     elif dimension == 2:
         # Array is made up of non-uniform elements, so treat as 2d array and pass
         if (
@@ -1063,7 +1068,11 @@ def safe_create_np_array(value):
         ):
             pass
         else:
-            value = np.atleast_2d(value)
+            if torch and torch.is_tensor(value):
+                value = torch.atleast_2d(value)
+            else:
+                value = np.atleast_2d(value)
+
     elif dimension is not None:
         raise UtilitiesError("dimension param ({0}) must be None, 1, or 2".format(dimension))
 

diff --git a/psyneulink/library/compositions/gymforagercfa.py b/psyneulink/library/compositions/gymforagercfa.py
@@ -177,7 +177,8 @@ def evaluate(self, feature_values, control_allocation, num_estimates, num_trials
 
         .. note::
             If this method is assigned as the `objective_funtion of a `GradientOptimization` `Function`,
-            it is differentiated using `autograd <https://github.com/HIPS/autograd>`_\\.grad().
+            it is differentiated using
+            `PyTorch autograd's <https://pytorch.org/docs/stable/generated/torch.func.grad.html>`_ `grad <torch.func.grad>`.
         """
         predicted_outcome=0
 

diff --git a/psyneulink/library/compositions/regressioncfa.py b/psyneulink/library/compositions/regressioncfa.py
@@ -346,7 +346,9 @@ def evaluate(self, feature_values, control_allocation, num_estimates, num_trials
 
         .. note::
             If this method is assigned as the `objective_funtion of a `GradientOptimization` `Function`,
-            it is differentiated using `autograd <https://github.com/HIPS/autograd>`_\\.grad().
+            it is differentiated using
+            `PyTorch autograd's <https://pytorch.org/docs/stable/generated/torch.func.grad.html>`_
+            `grad <torch.func.grad>`.
         """
 
         predicted_outcome=0
@@ -633,23 +635,23 @@ def compute_terms(self, control_allocation, context=None):
             computed_terms[PV.F] = f = self.terms[PV.F.value]
 
             # Compute value of each control_signal from its variable
-            c = [None] * len(control_allocation)
+            c = np.zeros((len(control_allocation), ))
             for i, var in enumerate(control_allocation):
                 c[i] = self.control_signal_functions[i](var, context=context)
-            computed_terms[PV.C] = c = np.array(c)
+            computed_terms[PV.C] = c
 
             # Compute costs for new control_signal values
             if PV.COST in terms:
                 # computed_terms[PV.COST] = -(np.exp(0.25*c-3))
                 # computed_terms[PV.COST] = -(np.exp(0.25*c-3) + (np.exp(0.25*np.abs(c-self.control_signal_change)-3)))
-                costs = [None] * len(c)
+                costs = np.zeros((len(control_allocation),))
                 for i, val in enumerate(c):
                     # MODIFIED 11/9/18 OLD:
                     costs[i] = -(self._compute_costs[i](val, context=context))
                     # # MODIFIED 11/9/18 NEW: [JDC]
                     # costs[i] = -(self._compute_costs[i](val, ref_variables[i]))
                     # MODIFIED 11/9/18 END
-                computed_terms[PV.COST] = np.array(costs)
+                computed_terms[PV.COST] = costs
 
             # Compute terms interaction that are used
             if any(term in terms for term in [PV.FF, PV.FFC, PV.FFCC]):

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
-autograd<1.7
 beartype<0.18.0
 dill<0.3.9
 fastkde>=1.0.24, <1.0.31

diff --git a/tests/composition/test_control.py b/tests/composition/test_control.py
@@ -1840,29 +1840,41 @@ def test_lvoc_both_predictors_specs(self):
 
         assert len(lvoc.input_ports) == 5
 
+    @pytest.mark.pytorch
     def test_lvoc_features_function(self):
         m1 = pnl.TransferMechanism(input_ports=["InputPort A", "InputPort B"])
         m2 = pnl.TransferMechanism()
         c = pnl.Composition()
         c.add_node(m1, required_roles=pnl.NodeRole.INPUT)
         c.add_node(m2, required_roles=pnl.NodeRole.INPUT)
         c._analyze_graph()
-        lvoc = pnl.OptimizationControlMechanism(agent_rep=pnl.RegressionCFA,
-                                                state_features=[m1.input_ports[0], m1.input_ports[1], m2.input_port, m2],
-                                                state_feature_function=pnl.LinearCombination(offset=10.0),
-                                                objective_mechanism=pnl.ObjectiveMechanism(
-                                                    monitor=[m1, m2]),
-                                                function=pnl.GradientOptimization(max_iterations=1),
-                                                control_signals=[(pnl.SLOPE, m1), (pnl.SLOPE, m2)])
-        c.add_node(lvoc)
-        input_dict = {m1: [[1], [1]], m2: [1]}
 
-        c.run(inputs=input_dict)
+        ocm_kwargs = dict(agent_rep=pnl.RegressionCFA,
+                          state_features=[m1.input_ports[0], m1.input_ports[1], m2.input_port, m2],
+                          state_feature_function=pnl.LinearCombination(offset=10.0),
+                          objective_mechanism=pnl.ObjectiveMechanism(
+                              monitor=[m1, m2]),
+                          function=pnl.GradientOptimization(max_iterations=1),
+                          control_signals=[(pnl.SLOPE, m1), (pnl.SLOPE, m2)])
 
-        assert len(lvoc.input_ports) == 5
+        import torch
+        if 'func' in dir(torch):
+            lvoc = pnl.OptimizationControlMechanism(**ocm_kwargs)
+
+            c.add_node(lvoc)
+            input_dict = {m1: [[1], [1]], m2: [1]}
+
+            c.run(inputs=input_dict)
+
+            assert len(lvoc.input_ports) == 5
+
+            for i in range(1, 5):
+                assert lvoc.input_ports[i].function.offset == 10.0
+
+        else:
+            with pytest.raises(ValueError):
+                pnl.OptimizationControlMechanism(**ocm_kwargs)
 
-        for i in range(1,5):
-            assert lvoc.input_ports[i].function.offset == 10.0
 
     @pytest.mark.control
     @pytest.mark.composition