Merge pull request #700 from PrincetonUniversity/devel

Devel
PrincetonUniversity · Mar 5, 2018 · 261f368 · 261f368
2 parents 90c9ab4 + 4e7bf32
commit 261f368
Show file tree

Hide file tree

Showing 35 changed files with 1,627 additions and 663 deletions.
diff --git a/.idea/runConfigurations/Nieuwenhuis_documented.xml b/.idea/runConfigurations/Nieuwenhuis_documented.xml
diff --git a/.idea/runConfigurations/scratch.xml b/.idea/runConfigurations/scratch.xml
diff --git a/.idea/runConfigurations/scratch_1.xml b/.idea/runConfigurations/scratch_1.xml
diff --git a/Scripts/Examples/EVC-Gratton.py b/Scripts/Examples/EVC-Gratton.py
@@ -75,7 +75,9 @@
         {
             pnl.NAME: 'OFFSET RT',
             pnl.INDEX: 2,
-            pnl.ASSIGN: pnl.Linear(0, slope=0.3, intercept=1).function
+            pnl.ASSIGN: pnl.Linear(0, slope=0.3, intercept=1)
+            # pnl.VARIABLE:[(pnl.OWNER_VALUE,2)],
+            # pnl.FUNCTION: pnl.Linear(0, slope=0.3, intercept=1)
         }
     ],
 )

diff --git a/Scripts/Examples/EVC_MARKUS_control.py b/Scripts/Examples/EVC_MARKUS_control.py
@@ -47,7 +47,7 @@
         {
             pnl.NAME: 'OFFSET RT',
             pnl.INDEX: 2,
-            pnl.ASSIGN: pnl.Linear(0, slope=1.0, intercept=1).function
+            pnl.ASSIGN: pnl.Linear(0, slope=1.0, intercept=1)
         }
     ],) #drift_rate=(1.0),threshold=(0.2645),noise=(0.5),starting_point=(0), t0=0.15
 Decision.set_log_conditions('DECISION_VARIABLE')

diff --git a/Scripts/Examples/Multilayer-Learning.py b/Scripts/Examples/Multilayer-Learning.py
@@ -109,7 +109,7 @@ def show_target(system):
 
 mySystem.reportOutputPref = True
 # Shows graph will full information:
-mySystem.show_graph(show_learning=pnl.ALL, show_dimensions=pnl.ALL)
+# mySystem.show_graph(show_learning=pnl.ALL, show_dimensions=pnl.ALL)
 # Shows minimal graph:
 # mySystem.show_graph()
 

diff --git a/Scripts/Examples/RL-DDM.py b/Scripts/Examples/RL-DDM.py
@@ -16,14 +16,6 @@
     name='Input Layer'
 )
 
-# def decision_variable_to_one_hot(x):
-#     """Generate "one-hot" 1d array designating selected action from DDM's scalar decision variable
-#     (used to generate value of OutputState for action_selection Mechanism"""
-#     if x > 0:
-#         return [1,0]
-#     else:
-#         return [0,-1]
-
 # Takes sum of input layer elements as external component of drift rate
 # Notes:
 #    - drift_rate parameter in constructor for DDM is the "internally modulated" component of the drift_rate;
@@ -40,7 +32,7 @@
                 starting_point=pnl.CONTROL,
                 noise=pnl.CONTROL,
         ),
-        output_states=[pnl.DECISION_VARIABLE_ARRAY],
+        output_states=[pnl.SELECTED_INPUT_ARRAY],
         name='DDM'
 )
 
@@ -53,8 +45,8 @@
 p = pnl.Process(
     default_variable=[0, 0],
     # pathway=[input_layer, np.array([[1],[-1]]), action_selection],
-    pathway=[input_layer, action_selection],
-    learning=pnl.LearningProjection(learning_function=pnl.Reinforcement(learning_rate=0.05)),
+    pathway=[input_layer, pnl.IDENTITY_MATRIX, action_selection],
+    learning=pnl.LearningProjection(learning_function=pnl.Reinforcement(learning_rate=0.5)),
     target=0
 )
 
@@ -73,23 +65,56 @@
 def print_header(system):
     print("\n\n**** Time: ", system.scheduler_processing.clock.simple_time)
 def show_weights():
-    print('Reward prediction weights: \n', action_selection.input_state.path_afferents[0].matrix)
-    print(
-        '\nAction selected:  {}; predicted reward: {}'.format(
-            np.nonzero(action_selection.output_state.value)[0][0],
-            action_selection.output_state.value[np.nonzero(action_selection.output_state.value)][0]
-        )
-    )
+    print('\nReward prediction weights: \n', action_selection.input_state.path_afferents[0].matrix)
+    # print(
+    #     '\nAction selected:  {}; predicted reward: {}'.format(
+    #         np.nonzero(action_selection.output_state.value)[0][0],
+    #         action_selection.output_state.value[np.nonzero(action_selection.output_state.value)][0]
+    #     )
+    # )
+    comparator = action_selection.output_state.efferents[0].receiver.owner
+    learn_mech = action_selection.output_state.efferents[1].receiver.owner
+    print('\nact_sel_in_state variable:  {} '
+          '\nact_sel_in_state value:     {} '
+          '\naction_selection variable:  {} '
+          '\naction_selection output:    {} '
+          '\ncomparator sample:          {} '
+          '\ncomparator target:          {} '
+          '\nlearning mech act in:       {} '
+          '\nlearning mech act out:      {} '
+          '\nlearning mech error in:     {} '
+          '\nlearning mech error out:    {} '
+          '\nlearning mech learning_sig: {} '
+          # '\npredicted reward:           {} '
+        .format(
+            action_selection.input_states[0].variable,
+            action_selection.input_states[0].value,
+            action_selection.variable,
+            action_selection.output_state.value,
+            comparator.input_states[pnl.SAMPLE].value,
+            comparator.input_states[pnl.TARGET].value,
+            learn_mech.input_states[pnl.ACTIVATION_INPUT].value,
+            learn_mech.input_states[pnl.ACTIVATION_OUTPUT].value,
+            learn_mech.input_states[pnl.ERROR_SIGNAL].value,
+            learn_mech.output_states[pnl.ERROR_SIGNAL].value,
+            learn_mech.output_states[pnl.LEARNING_SIGNAL].value,
+            # action_selection.output_state.value[np.nonzero(action_selection.output_state.value)][0]
+    ))
+
+
+# Specify reward values associated with each action (corresponding to elements of esaction_selection.output_state.value)
+# reward_values = [10, 0]
+reward_values = [0, 10]
 
 # Used by System to generate a reward on each trial based on the outcome of the action_selection (DDM) Mechanism
 def reward():
-    return [reward_values[int(np.nonzero(action_selection.output_state.value)[0])]]
+    """Return the reward associated with the selected action"""
+    selected_action = action_selection.output_state.value
+    if not any(selected_action):
+        # Deal with initialization, during which action_selection.output_state.value may == [0,0]
+        selected_action = np.array([1,0])
+    return [reward_values[int(np.nonzero(selected_action)[0])]]
 
-# Specify reward values for the (one-hot coded) actions returned by action_selection.output_state.value
-# Note:  the first (uncommented) reward array favors the left action,
-#        and the second (commented) one favors the right action.
-reward_values = [10, 0]
-# reward_values = [0, 10]
 
 # Input stimuli for run of the System.
 # Notes:

diff --git a/Scripts/Examples/Reinforcement-Learning.py b/Scripts/Examples/Reinforcement-Learning.py
@@ -36,6 +36,7 @@
 
 
 def reward():
+    """Return the reward associated with the selected action"""
     return [reward_values[int(np.nonzero(action_selection.output_state.value)[0])]]
 
 
@@ -44,15 +45,37 @@ def print_header(system):
 
 
 def show_weights():
-    print('Reward prediction weights: \n', action_selection.input_state.path_afferents[0].matrix)
-    print(
-        '\nAction selected:  {}; predicted reward: {}'.format(
-            np.nonzero(action_selection.output_state.value)[0][0],
-            action_selection.output_state.value[np.nonzero(action_selection.output_state.value)][0]
-        )
+    # print('Reward prediction weights: \n', action_selection.input_state.path_afferents[0].matrix)
+    # print(
+    #     '\nAction selected:  {}; predicted reward: {}'.format(
+    #         np.nonzero(action_selection.output_state.value)[0][0],
+    #         action_selection.output_state.value[np.nonzero(action_selection.output_state.value)][0]
+    #     )
+    assert True
+    comparator = action_selection.output_state.efferents[0].receiver.owner
+    learn_mech = action_selection.output_state.efferents[1].receiver.owner
+    print('\n'
+          '\naction_selection output:    {} '
+          '\ncomparator sample:          {} '
+          '\ncomparator target:          {} '
+          '\nlearning mech act in:       {} '
+          '\nlearning mech act out:      {} '
+          '\nlearning mech error in:     {} '
+          '\nlearning mech error out:    {} '
+          '\nlearning mech learning_sig: {} '
+          '\npredicted reward:           {} '.
+        format(
+            action_selection.output_state.value,
+            comparator.input_states[pnl.SAMPLE].value,
+            comparator.input_states[pnl.TARGET].value,
+            learn_mech.input_states[pnl.ACTIVATION_INPUT].value,
+            learn_mech.input_states[pnl.ACTIVATION_OUTPUT].value,
+            learn_mech.input_states[pnl.ERROR_SIGNAL].value,
+            learn_mech.output_states[pnl.ERROR_SIGNAL].value,
+            learn_mech.output_states[pnl.LEARNING_SIGNAL].value,
+            action_selection.output_state.value[np.nonzero(action_selection.output_state.value)][0])
     )
 
-
 p.run(
     num_trials=10,
     inputs=[[[1, 1, 1]]],

diff --git a/Scripts/Examples/Rumelhart Semantic Network.py b/Scripts/Examples/Rumelhart Semantic Network.py
@@ -9,7 +9,9 @@
 # At present, it implements only the structure of the network, as shown below:
 
 # Semantic Network:
-#                         _
+#                               _
+#   R_STEP P_STEP Q_STEP A_STEP  | Readout Processes
+#        |    |    /    / _______|
 #       REP PROP QUAL ACT  |
 #         \___\__/____/    |
 #             |        _   | Output Processes
@@ -21,6 +23,14 @@
 
 # It does not yet implement learning or testing.
 
+
+def step(variable):
+    if np.sum(variable)<.5:
+        out=0
+    else:
+        out=1
+    return(out)
+
 #Processing Units:
 rep_in = pnl.TransferMechanism(size=10, name='REP_IN')
 rel_in = pnl.TransferMechanism(size=11, name='REL_IN')
@@ -30,6 +40,10 @@
 prop_out = pnl.TransferMechanism(size=12, function=pnl.Logistic, name='PROP_OUT')
 qual_out = pnl.TransferMechanism(size=13, function=pnl.Logistic, name='QUAL_OUT')
 act_out = pnl.TransferMechanism(size=14, function=pnl.Logistic, name='ACT_OUT')
+r_step = pnl.ProcessingMechanism(size=10, function=step, name='REP_STEP')
+p_step = pnl.ProcessingMechanism(size=12, function=step, name='PROP_STEP')
+q_step = pnl.ProcessingMechanism(size=13, function=step, name='QUAL_STEP')
+a_step = pnl.ProcessingMechanism(size=14, function=step, name='ACT_STEP')
 
 #Processes that comprise the System:
 # NOTE: this is one of several configuration of processes that can be used to construct the full network
@@ -53,8 +67,27 @@
                            learning=pnl.LEARNING,
                            name='REL_ACT_PROC')
 
+rep_step_proc = pnl.Process(pathway=[rep_out, r_step],
+                           name='REP_STEP_PROC')
+act_step_proc = pnl.Process(pathway=[act_out, a_step],
+                           name='ACT_STEP_PROC')
+qual_step_proc = pnl.Process(pathway=[qual_out, q_step],
+                           name='QUAL_STEP_PROC')
+prop_step_proc = pnl.Process(pathway=[prop_out, p_step],
+                           name='PROP_STEP_PROC')
+
+
 # The System:
-S = pnl.System(processes=[rep_hidden_proc, rel_hidden_proc, rel_rep_proc, rel_prop_proc, rel_qual_proc, rel_act_proc])
+S = pnl.System(processes=[rep_hidden_proc,
+                          rel_hidden_proc,
+                          rel_rep_proc,
+                          rel_prop_proc,
+                          rel_qual_proc,
+                          rel_act_proc,
+                          rep_step_proc,
+                          act_step_proc,
+                          qual_step_proc,
+                          prop_step_proc])
 
 # Shows just the processing network:
 # S.show_graph(show_dimensions=True)

diff --git a/Scripts/Models/GilzenratModel.py b/Scripts/Models/GilzenratModel.py
@@ -155,7 +155,7 @@
 task = pnl.System(processes=[decision_process])
 
 # This displays a diagram of the System
-task.show_graph(show_dimensions=True)
+task.show_graph()
 
 
 # Create Stimulus -----------------------------------------------------------------------------------------------------
@@ -263,6 +263,3 @@ def record_trial():
 plt.show()
 
 task.show()
-
-# This displays a diagram of the System
-task.show_graph()
diff --git a/Scripts/Models/Nieuwenhuis2005Model.py b/Scripts/Models/Nieuwenhuis2005Model.py
@@ -209,10 +209,10 @@
 LC_results = LC.log.nparray()        # get logged results
 LC_results_w = np.zeros([trials])    # get LC_results_w
 for i in range(trials):
-    LC_results_w[i] = LC_results[3][i+1][3][0]
+    LC_results_w[i] = LC_results[4][i+1][3][0]
 LC_results_v = np.zeros([trials])    # get LC_results_v
 for i in range(trials):
-    LC_results_v[i] = LC_results[3][i+1][2][0]
+    LC_results_v[i] = LC_results[4][i+1][2][0]
 
 def h_v(v, C, d):                   # Compute h(v)
     return C * v + (1 - C) * d