diff --git a/Scripts/Models (Under Development)/N-back MODULARIZED.py b/Scripts/Models (Under Development)/N-back/N-back MODULARIZED.py
similarity index 99%
rename from Scripts/Models (Under Development)/N-back MODULARIZED.py
rename to Scripts/Models (Under Development)/N-back/N-back MODULARIZED.py
index 87ee56c5221..74dcf24e51d 100644
--- a/Scripts/Models (Under Development)/N-back MODULARIZED.py	
+++ b/Scripts/Models (Under Development)/N-back/N-back MODULARIZED.py	
@@ -151,7 +151,7 @@ def construct_model(num_tasks, stim_size, context_size, hidden_size, display=Fal
                        hidden, decision],
                       name="WORKING MEMORY (fnn)")
     comp = Composition(nodes=[stim, context, task, em, ffn, control],
-                       name="N-Back Model")
+                       name="N-back Model")
     comp.add_projection(MappingProjection(), stim, input_current_stim)
     comp.add_projection(MappingProjection(), context, input_current_context)
     comp.add_projection(MappingProjection(), task, input_task)
diff --git a/Scripts/Models (Under Development)/N-back/N-back.py b/Scripts/Models (Under Development)/N-back/N-back.py
new file mode 100644
index 00000000000..6504493494a
--- /dev/null
+++ b/Scripts/Models (Under Development)/N-back/N-back.py	
@@ -0,0 +1,537 @@
+"""
+This implements a model of the `N-back task <https://en.wikipedia.org/wiki/N-back#Neurobiology_of_n-back_task>`_
+described in `Beukers et al. (2022) <https://psyarxiv.com/jtw5p>`_.  The model uses a simple implementation of episodic
+(content-addressable) memory to store previous stimuli and the temporal context in which they occured,
+and a feedforward neural network to evaluate whether the current stimulus is a match to the n'th preceding stimulus
+(n-back level).  This model is an example of proposed interactions between working memory (e.g., in neocortex) and
+episodic memory e.g., in hippocampus and/or cerebellum) in the performance of tasks demanding of sequential processing
+and control, and along the lines of models emerging machine learning that augment the use of recurrent neural networks
+(e.g., long short-term memory mechanisms; LSTMs) for active memory and control with an external memory capable of
+rapid storage and content-based retrieval, such as the Neural Turing Machine (NTN; `Graves et al., 2016
+<https://arxiv.org/abs/1410.5401>`_), Episodic Planning Networks (EPN; `Ritter et al., 2020
+<https://arxiv.org/abs/2006.03662>`_), and Emergent Symbols through Binding Networks (ESBN; `Webb et al., 2021
+<https://arxiv.org/abs/2012.14601>`_).
+
+There are three primary methods in the script:
+
+* construct_model(args):
+  takes as arguments parameters used to construct the model;  for convenience, defaults are defined below,
+  (under "Construction parameters")
+
+* train_network(args)
+  takes as arguments the feedforward neural network Composition (FFN_COMPOSITION) and number of epochs to train.
+  Note: learning_rate is set at construction (can specify using LEARNING_RATE under "Training parameters" below).
+
+* run_model()
+  takes the context drift rate to be applied on each trial and the number of trials to execute as args, as well as
+  reporting and animation specifications (see "Execution parameters" below).
+
+See "Settings for running the script" to specify whether the model is trained and/or executed when the script is run,
+and whether a graphic display of the network is generated when it is constructed.
+
+TODO:
+    - from Andre
+             - network architecture;  in particular, size of hidden layer and projection patterns to and from it
+                - the stim+context input vector (length 90) projects to a hidden layer (length 80);
+                - the task input vector (length 2) projects to a different hidden layer (length 80);
+                - those two hidden layers project (over fixed, nonlearnable, one-one-projections?) to a third hidden layer (length 80) that simply sums them;
+                - the third hidden layer projects to the length 2 output layer;
+                - a softmax is taken over the output layer to determine the response.
+                - fix: were biases trained?
+          - training:
+              - learning rate: 0.001; epoch: 1 trial per epoch of training
+              - fix: state_dict with weights (still needed)
+          - get empirical stimulus sequences (still needed)
+          - put N-back script (with pointer to latest version on PNL) in nback-paper repo
+    - fix: get rid of objective_mechanism (see "VERSION *WITHOUT* ObjectiveMechanism" under control(...)
+    - fix: warnings on run
+    - complete documentation in BeukersNbackModel.rst
+    - validate against nback-paper results
+    - after validation:
+        - try with STIM_SIZE = NUM_STIMS rather than 20 (as in nback-paper)
+        - refactor generate_stim_sequence() to use actual empirical stimulus sequences
+        - replace get_input_sequence and get_training_inputs with generators passed to nback_model.run() and ffn.learn
+
+"""
+
+from graph_scheduler import *
+
+from psyneulink import *
+import numpy as np
+
+# Settings for running script:
+TRAIN = True
+RUN = True
+DISPLAY_MODEL = False # show visual graphic of model
+
+# PARAMETERS -------------------------------------------------------------------------------------------------------
+
+# Fixed (structural) parameters:
+MAX_NBACK_LEVELS = 3
+NUM_STIM = 8 # number of different stimuli in stimulus set -  QUESTION: WHY ISN"T THIS EQUAL TO STIM_SIZE OR VICE VERSA?
+FFN_TRANSFER_FUNCTION = ReLU
+
+# Constructor parameters:  (values are from nback-paper)
+STIM_SIZE=8 # length of stimulus vector
+CONTEXT_SIZE=25 # length of context vector
+HIDDEN_SIZE=STIM_SIZE*4 # dimension of hidden units in ff
+NBACK_LEVELS = [2,3] # Currently restricted to these
+NUM_NBACK_LEVELS = len(NBACK_LEVELS)
+CONTEXT_DRIFT_NOISE=0.0  # noise used by DriftOnASphereIntegrator (function of Context mech)
+RANDOM_WEIGHTS_INITIALIZATION=RandomMatrix(center=0.0, range=0.1)  # Matrix spec used to initialize all Projections
+RETRIEVAL_SOFTMAX_TEMP=1/8 # express as gain # precision of retrieval process
+RETRIEVAL_HAZARD_RATE=0.04 # rate of re=sampling of em following non-match determination in a pass through ffn
+RETRIEVAL_STIM_WEIGHT=.05 # weighting of stimulus field in retrieval from em
+RETRIEVAL_CONTEXT_WEIGHT = 1-RETRIEVAL_STIM_WEIGHT # weighting of context field in retrieval from em
+DECISION_SOFTMAX_TEMP=1
+
+# Training parameters:
+NUM_EPOCHS= 6250    # nback-paper: 400,000 @ one trial per epoch = 6,250 @ 64 trials per epoch
+LEARNING_RATE=0.01  # nback-paper: .001
+
+# Execution parameters:
+CONTEXT_DRIFT_RATE=.1 # drift rate used for DriftOnASphereIntegrator (function of Context mech) on each trial
+NUM_TRIALS = 48 # number of stimuli presented in a trial sequence
+REPORT_OUTPUT = ReportOutput.OFF   # Sets console output during run
+REPORT_PROGRESS = ReportProgress.OFF  # Sets console progress bar during run
+REPORT_LEARNING = ReportLearning.OFF  # Sets console progress bar during training
+ANIMATE = False # {UNIT:EXECUTION_SET} # Specifies whether to generate animation of execution
+
+# Names of Compositions and Mechanisms:
+NBACK_MODEL = "N-back Model"
+FFN_COMPOSITION = "WORKING MEMORY (fnn)"
+FFN_STIMULUS_INPUT = "CURRENT STIMULUS"
+FFN_CONTEXT_INPUT = "CURRENT CONTEXT"
+FFN_STIMULUS_RETRIEVED = "RETRIEVED STIMULUS"
+FFN_CONTEXT_RETRIEVED = "RETRIEVED CONTEXT"
+FFN_TASK = "CURRENT TASK"
+FFN_HIDDEN = "HIDDEN LAYER"
+FFN_OUTPUT = "DECISION LAYER"
+MODEL_STIMULUS_INPUT ='STIM'
+MODEL_CONTEXT_INPUT = 'CONTEXT'
+MODEL_TASK_INPUT = "TASK"
+EM = "EPISODIC MEMORY (dict)"
+CONTROLLER = "READ/WRITE CONTROLLER"
+
+# ======================================== MODEL CONSTRUCTION =========================================================
+
+def construct_model(stim_size = STIM_SIZE,
+                    context_size = CONTEXT_SIZE,
+                    hidden_size = HIDDEN_SIZE,
+                    num_nback_levels = NUM_NBACK_LEVELS,
+                    context_drift_noise = CONTEXT_DRIFT_NOISE,
+                    retrievel_softmax_temp = RETRIEVAL_SOFTMAX_TEMP,
+                    retrieval_hazard_rate = RETRIEVAL_HAZARD_RATE,
+                    retrieval_stimulus_weight = RETRIEVAL_STIM_WEIGHT,
+                    retrieval_context_weight = RETRIEVAL_CONTEXT_WEIGHT,
+                    decision_softmax_temp = DECISION_SOFTMAX_TEMP):
+    """Construct nback_model"""
+
+    print(f"constructing '{FFN_COMPOSITION}'...")
+
+    # FEED FORWARD NETWORK -----------------------------------------
+
+    #     inputs: encoding of current stimulus and context, retrieved stimulus and retrieved context,
+    #     output: decision: match [1,0] or non-match [0,1]
+    # Must be trained to detect match for specified task (1-back, 2-back, etc.)
+    input_current_stim = TransferMechanism(name=FFN_STIMULUS_INPUT,
+                                           size=stim_size,
+                                           function=FFN_TRANSFER_FUNCTION)
+    input_current_context = TransferMechanism(name=FFN_CONTEXT_INPUT,
+                                              size=context_size,
+                                              function=FFN_TRANSFER_FUNCTION)
+    input_retrieved_stim = TransferMechanism(name=FFN_STIMULUS_RETRIEVED,
+                                             size=stim_size,
+                                             function=FFN_TRANSFER_FUNCTION)
+    input_retrieved_context = TransferMechanism(name=FFN_CONTEXT_RETRIEVED,
+                                                size=context_size,
+                                                function=FFN_TRANSFER_FUNCTION)
+    input_task = TransferMechanism(name=FFN_TASK,
+                                   size=num_nback_levels,
+                                   function=FFN_TRANSFER_FUNCTION)
+    hidden = TransferMechanism(name=FFN_HIDDEN,
+                               size=hidden_size,
+                               function=FFN_TRANSFER_FUNCTION)
+    decision = ProcessingMechanism(name=FFN_OUTPUT,
+                                   size=2, function=SoftMax(output=MAX_INDICATOR,
+                                                            gain=decision_softmax_temp))
+    ffn = AutodiffComposition(([{input_current_stim,
+                                input_current_context,
+                                input_retrieved_stim,
+                                input_retrieved_context,
+                                input_task},
+                               hidden, decision],
+                              RANDOM_WEIGHTS_INITIALIZATION,
+                               ),
+                              name=FFN_COMPOSITION,
+                              learning_rate=LEARNING_RATE
+                              )
+
+    # FULL MODEL (Outer Composition, including input, EM and control Mechanisms) ------------------------
+
+    print(f"'constructing {NBACK_MODEL}'...")
+
+    # Stimulus Encoding: takes STIM_SIZE vector as input
+    stim = TransferMechanism(name=MODEL_STIMULUS_INPUT, size=stim_size)
+
+    # Context Encoding: takes scalar as drift step for current trial
+    context = ProcessingMechanism(name=MODEL_CONTEXT_INPUT,
+                                  function=DriftOnASphereIntegrator(
+                                      initializer=np.random.random(context_size-1),
+                                      noise=context_drift_noise,
+                                      dimension=context_size))
+
+    # Task: task one-hot indicating n-back (1, 2, 3 etc.) - must correspond to what ffn has been trained to do
+    task = ProcessingMechanism(name=MODEL_TASK_INPUT,
+                               size=num_nback_levels)
+
+    # Episodic Memory:
+    #    - entries: stimulus (field[0]) and context (field[1]); randomly initialized
+    #    - uses Softmax to retrieve best matching input, subject to weighting of stimulus and context by STIM_WEIGHT
+    em = EpisodicMemoryMechanism(name=EM,
+                                 input_ports=[{NAME:"STIMULUS_FIELD",
+                                               SIZE:stim_size},
+                                              {NAME:"CONTEXT_FIELD",
+                                               SIZE:context_size}],
+                                 function=ContentAddressableMemory(
+                                     initializer=[[[0]*stim_size, [0]*context_size]],
+                                     distance_field_weights=[retrieval_stimulus_weight,
+                                                             retrieval_context_weight],
+                                     # equidistant_entries_select=NEWEST,
+                                     selection_function=SoftMax(output=MAX_INDICATOR,
+                                                                gain=retrievel_softmax_temp)),
+                                 )
+
+    # Control Mechanism
+    #  Ensures current stimulus and context are only encoded in EM once (at beginning of trial)
+    #    by controlling the storage_prob parameter of em:
+    #      - if outcome of decision signifies a match or hazard rate is realized:
+    #        - set  EM[store_prob]=1 (as prep encoding stimulus in EM on next trial)
+    #        - this also serves to terminate trial (see nback_model.termination_processing condition)
+    #      - if outcome of decision signifies a non-match
+    #        - set  EM[store_prob]=0 (as prep for another retrieval from EM without storage)
+    #        - continue trial
+    control = ControlMechanism(name=CONTROLLER,
+                               default_variable=[[1]],  # Ensure EM[store_prob]=1 at beginning of first trial
+                               # ---------
+                               # VERSION *WITH* ObjectiveMechanism:
+                               objective_mechanism=ObjectiveMechanism(name="OBJECTIVE MECHANISM",
+                                                                      monitor=decision,
+                                                                      # Outcome=1 if match, else 0
+                                                                      function=lambda x: int(x[0][1]>x[0][0])),
+                               # Set ControlSignal for EM[store_prob]
+                               function=lambda outcome: int(bool(outcome)
+                                                            or (np.random.random() > retrieval_hazard_rate)),
+                               # ---------
+                               # # VERSION *WITHOUT* ObjectiveMechanism:
+                               # monitor_for_control=decision,
+                               # # Set Evaluate outcome and set ControlSignal for EM[store_prob]
+                               # #   - outcome is received from decision as one hot in the form: [[match, no-match]]
+                               # function=lambda outcome: int(int(outcome[0][1]>outcome[0][0])
+                               #                              or (np.random.random() > retrieval_hazard_rate)),
+                               # ---------
+                               control=(STORAGE_PROB, em))
+
+    nback_model = Composition(name=NBACK_MODEL,
+                              nodes=[stim, context, task, ffn, em, control],
+                              # Terminate trial if value of control is still 1 after first pass through execution
+                              termination_processing={TimeScale.TRIAL: And(Condition(lambda: control.value),
+                                                                           AfterPass(0, TimeScale.TRIAL))},
+                              )
+    # # Terminate trial if value of control is still 1 after first pass through execution
+    # # FIX: ALL OF THE FOLLOWING STOP AFTER ~ NUMBER OF TRIALS (?90+); SHOULD BE: NUM_TRIALS*NUM_NBACK_LEVELS + 1
+    # nback_model.scheduler.add_condition(nback_model, And(Condition(lambda: control.value), AfterPass(0, TimeScale.TRIAL)))
+    # nback_model.scheduler.termination_conds = ({TimeScale.TRIAL: And(Condition(lambda: control.value),
+    #                                                                      AfterPass(0, TimeScale.TRIAL))})
+    # nback_model.scheduler.termination_conds.update({TimeScale.TRIAL: And(Condition(lambda: control.value),
+    #                                                                      AfterPass(0, TimeScale.TRIAL))})
+    nback_model.add_projection(MappingProjection(), stim, input_current_stim)
+    nback_model.add_projection(MappingProjection(), context, input_current_context)
+    nback_model.add_projection(MappingProjection(), task, input_task)
+    nback_model.add_projection(MappingProjection(), em.output_ports["RETRIEVED_STIMULUS_FIELD"], input_retrieved_stim)
+    nback_model.add_projection(MappingProjection(), em.output_ports["RETRIEVED_CONTEXT_FIELD"], input_retrieved_context)
+    nback_model.add_projection(MappingProjection(), stim, em.input_ports["STIMULUS_FIELD"])
+    nback_model.add_projection(MappingProjection(), context, em.input_ports["CONTEXT_FIELD"])
+
+    if DISPLAY_MODEL:
+        nback_model.show_graph(
+            # show_cim=True,
+            # show_node_structure=ALL,
+            # show_dimensions=True
+        )
+
+    print(f'full model constructed')
+    return nback_model
+
+# ==========================================STIMULUS GENERATION =======================================================
+# Based on nback-paper
+
+def get_stim_set(num_stim=STIM_SIZE):
+    """Construct an array of stimuli for use an experiment"""
+    # For now, use one-hots
+    return np.eye(num_stim)
+
+def get_task_input(nback_level):
+    """Construct input to task Mechanism for a given nback_level, used by run_model() and train_network()"""
+    task_input = list(np.zeros_like(NBACK_LEVELS))
+    task_input[nback_level-NBACK_LEVELS[0]] = 1
+    return task_input
+
+def get_run_inputs(model, nback_level, context_drift_rate, num_trials):
+    """Construct set of stimulus inputs for run_model()"""
+
+    def generate_stim_sequence(nback_level, trial_num, trial_type=0, num_stim=NUM_STIM, num_trials=NUM_TRIALS):
+        assert nback_level in {2,3} # At present, only 2- and 3-back levels are supported
+
+        def gen_subseq_stim():
+            A = np.random.randint(0,num_stim)
+            B = np.random.choice(
+                 np.setdiff1d(np.arange(num_stim),[A])
+                )
+            C = np.random.choice(
+                 np.setdiff1d(np.arange(num_stim),[A,B])
+                )
+            X = np.random.choice(
+                 np.setdiff1d(np.arange(num_stim),[A,B])
+                )
+            return A,B,C,X
+
+        def generate_match_no_foils_sequence(nback_level,trial_num):
+            # AXA (2-back) or ABXA (3-back)
+            seq = np.random.randint(0,num_stim,num_trials)
+            A,B,C,X = gen_subseq_stim()
+            #
+            if nback_level==2:
+                subseq = [A,X,A]
+            elif nback_level==3:
+                subseq = [A,B,X,A]
+            seq[trial_num-(nback_level+1):trial_num] = subseq
+            return seq[:trial_num]
+
+        def generate_non_match_no_foils_sequence(nback_level,trial_num):
+            # AXB (2-back) or ABXC (3-back)
+            seq = np.random.randint(0,num_stim,num_trials)
+            A,B,C,X = gen_subseq_stim()
+            #
+            if nback_level==2:
+                subseq = [A,X,B]
+            elif nback_level==3:
+                subseq = [A,B,X,C]
+            seq[trial_num-(nback_level+1):trial_num] = subseq
+            return seq[:trial_num]
+
+        def generate_match_with_foil_sequence(nback_level,trial_num):
+            # AAA (2-back) or AAXA (3-back)
+            seq = np.random.randint(0,num_stim,num_trials)
+            A,B,C,X = gen_subseq_stim()
+            #
+            if nback_level==2:
+                subseq = [A,A,A]
+            elif nback_level==3:
+                subseq = [A,A,X,A]
+            seq[trial_num-(nback_level+1):trial_num] = subseq
+            return seq[:trial_num]
+
+        def generate_non_match_with_foil_sequence(nback_level,trial_num):
+            # XAA (2-back) or ABXB (3-back)
+            seq = np.random.randint(0,num_stim,num_trials)
+            A,B,C,X = gen_subseq_stim()
+            #
+            if nback_level==2:
+                subseq = [X,A,A]
+            elif nback_level==3:
+                subseq = [A,B,X,B]
+            seq[trial_num-(nback_level+1):trial_num] = subseq
+            return seq[:trial_num]
+
+        trial_types = [generate_match_no_foils_sequence,
+                       generate_match_with_foil_sequence,
+                       generate_non_match_no_foils_sequence,
+                       generate_non_match_with_foil_sequence]
+        stim_seq = trial_types[trial_type](nback_level,trial_num)
+        # ytarget = [1,1,0,0][trial_type]
+        # ctxt = spherical_drift(trial_num)
+        # return stim,ctxt,ytarget
+        return stim_seq
+
+    # def stim_set_generation(nback_level, num_trials):
+    #     stim_sequence = []
+    #     # for seq_int, trial in itertools.product(range(4),np.arange(5,trials)): # This generates all length sequences
+    #     for trial_type, trial_num in itertools.product(range(4),[num_trials]):  # This generates only longest seq (
+    #         # num_trials)
+    #         return stim_sequence.append(generate_stim_sequence(nback_level, trial_num, trial_type=trial_type, trials=num_trials))
+
+    def get_input_sequence(nback_level, num_trials=NUM_TRIALS):
+        """Get sequence of inputs for a run"""
+        input_set = get_stim_set()
+        # Construct sequence of stimulus indices
+        trial_seq = generate_stim_sequence(nback_level, num_trials)
+        # Return list of corresponding stimulus input vectors
+        return [input_set[trial_seq[i]] for i in range(num_trials)]
+
+    return {model.nodes[MODEL_STIMULUS_INPUT]: get_input_sequence(nback_level, num_trials),
+            model.nodes[MODEL_CONTEXT_INPUT]: [[context_drift_rate]]*num_trials,
+            model.nodes[MODEL_TASK_INPUT]: [get_task_input(nback_level)]*num_trials}
+
+def get_training_inputs(network, num_epochs, nback_levels):
+    """Construct set of training stimuli used by ffn.learn() in train_network()
+    Construct one example of each condition:
+     match:  stim_current = stim_retrieved  and context_current = context_retrieved
+     stim_lure:  stim_current = stim_retrieved  and context_current != context_retrieved
+     context_lure:  stim_current != stim_retrieved  and context_current == context_retrieved
+     non_lure:  stim_current != stim_retrieved  and context_current != context_retrieved
+    """
+    assert is_iterable(nback_levels) and all([0<i<=MAX_NBACK_LEVELS for i in nback_levels])
+    stimuli = get_stim_set()
+    context_fct =  DriftOnASphereIntegrator(initializer=np.random.random(CONTEXT_SIZE-1),
+                                            noise=CONTEXT_DRIFT_NOISE,
+                                            dimension=CONTEXT_SIZE)
+    contexts = []
+    trial_types = ['match', 'stim_lure', 'context_lure', 'non_lure']
+
+    stim_current = []
+    context_current = []
+    stim_retrieved = []
+    context_retrieved = []
+    target = []
+    num_nback_levels = len(nback_levels)
+    current_task = []
+
+    # for i in range(num_epochs):
+    for nback_level in nback_levels:
+        # Construct one hot encoding for nback level
+        # task_input = list(np.zeros(num_nback_levels))
+        # task_input[nback_level-nback_levels[0]] = 1
+        task_input = get_task_input(nback_level)
+        for i in range(len(stimuli)):
+            # Get current stimulus and distractor
+            stims = list(stimuli.copy())
+            # Get stim, and remove from stims so distractor can be picked randomly from remaining ones
+            current_stim = stims.pop(i)
+            # Pick distractor randomly from stimuli remaining in set
+            distractor_stim = stims[np.random.randint(0,len(stims))]
+
+            # Get current context, nback context, and distractor
+            # Get nback+1 contexts (to bracket correct one)
+            for i in range(num_nback_levels+1):
+                contexts.append(context_fct(CONTEXT_DRIFT_RATE))
+            # Get current context as one that is next to last from list (leaving last one as potential lure)
+            current_context = contexts.pop(num_nback_levels-1)
+            #
+            nback_context = contexts.pop(0)
+            distractor_context = contexts[np.random.randint(0,len(contexts))]
+
+            # Assign retrieved stimulus and context accordingly to trial_type
+            for trial_type in trial_types:
+                stim_current.append(current_stim)
+                context_current.append(current_context)
+                # Assign retrieved stimulus
+                if trial_type in {'match','stim_lure'}:
+                    stim_retrieved.append(current_stim)
+                else: # context_lure or non_lure
+                    stim_retrieved.append(distractor_stim)
+                # Assign retrieved context
+                if trial_type in {'match','context_lure'}:
+                    context_retrieved.append(nback_context)
+                else: # stimulus_lure or non_lure
+                    context_retrieved.append(distractor_context)
+                # Assign target
+                if trial_type == 'match':
+                    target.append([1,0])
+                else:
+                    target.append([0,1])
+                current_task.append([task_input])
+
+    batch_size = len(target)
+    training_set = {INPUTS: {network.nodes[FFN_STIMULUS_INPUT]: stim_current,
+                             network.nodes[FFN_CONTEXT_INPUT]: context_current,
+                             network.nodes[FFN_STIMULUS_RETRIEVED]: stim_retrieved,
+                             network.nodes[FFN_CONTEXT_RETRIEVED]: context_retrieved,
+                             network.nodes[FFN_TASK]: current_task},
+                    TARGETS: {network.nodes[FFN_OUTPUT]:  target},
+                    EPOCHS: num_epochs*batch_size}
+
+    return training_set, batch_size
+
+# ======================================== MODEL EXECUTION ============================================================
+
+def train_network(network,
+                  learning_rate=LEARNING_RATE,
+                  num_epochs=NUM_EPOCHS,
+                  save_weights_to=None):
+    print(f"constructing training set for '{network.name}'...")
+    training_set, batch_size = get_training_inputs(network=network,
+                                                   num_epochs=num_epochs,
+                                                   nback_levels=NBACK_LEVELS)
+    print(f'num training stimuli per training set (batch size): {batch_size}')
+    print(f'num training sets (num_epochs): {num_epochs}')
+    print(f'total num trials: {num_epochs*batch_size}')
+    print(f"\ntraining '{network.name}'...")
+    import timeit
+    start_time = timeit.default_timer()
+    network.learn(inputs=training_set,
+                  minibatch_size=batch_size,
+                  report_progress=REPORT_PROGRESS,
+                  # report_learning=REPORT_LEARNING,
+                  learning_rate=learning_rate,
+                  execution_mode=ExecutionMode.LLVMRun)
+    stop_time = timeit.default_timer()
+    print(f"'{network.name}' trained")
+    training_time = stop_time-start_time
+    if training_time <= 60:
+        training_time_str = f'{int(training_time)} seconds'
+    else:
+        training_time_str = f'{int(training_time/60)} minutes'
+    print(f'training time: {training_time_str} for {num_epochs} epochs')
+    path = network.save(filename=save_weights_to)
+    print(f'saved weights to: {save_weights_to}')
+    return path
+    # print(f'saved weights sample: {network.nodes[FFN_HIDDEN].path_afferents[0].matrix.base[0][:3]}...')
+    # network.load(path)
+    # print(f'loaded weights sample: {network.nodes[FFN_HIDDEN].path_afferents[0].matrix.base[0][:3]}...')
+
+def run_model(model,
+              load_weights_from=None,
+              context_drift_rate=CONTEXT_DRIFT_RATE,
+              num_trials=NUM_TRIALS,
+              report_output=REPORT_OUTPUT,
+              report_progress=REPORT_PROGRESS,
+              animate=ANIMATE,
+              save_results_to=None
+              ):
+    ffn = nback_model.nodes[FFN_COMPOSITION]
+    em = model.nodes[EM]
+    if load_weights_from:
+        print(f"nback_model loading '{FFN_COMPOSITION}' weights from {load_weights_from}...")
+        ffn.load(load_weights_from)
+    print('nback_model executing...')
+    for nback_level in NBACK_LEVELS:
+        em.function.reset(em.memory[0]) # Reset episodic memory for new task using first entry (original initializer)
+        model.run(inputs=get_run_inputs(model, nback_level, context_drift_rate, num_trials),
+                  report_output=report_output,
+                  report_progress=report_progress,
+                  animate=animate
+                  )
+    # print("Number of entries in EM: ", len(model.nodes[EM].memory))
+    assert len(model.nodes[EM].memory) == NUM_TRIALS + 1 # extra one is for initializer
+    if REPORT_PROGRESS == ReportProgress.ON:
+        print('\n')
+    print(f'nback_model done: {len(nback_model.results)} trials executed')
+    saved_results = None
+    if save_results_to:
+        saved_resulats = np.save(save_results_to, model.results)
+    print(f'results: \n{model.results}')
+    return saved_results
+
+nback_model = construct_model()
+if TRAIN:
+    weights_filename = f'ffn.wts_nep_{NUM_EPOCHS}_lr_{str(LEARNING_RATE).split(".")[1]}.pnl'
+    saved_weights = train_network(nback_model.nodes[FFN_COMPOSITION],
+                                  save_weights_to=weights_filename)
+if RUN:
+    results_filename = f'nback.results_nep_{NUM_EPOCHS}_lr_{str(LEARNING_RATE).split(".")[1]}.pnl'
+    saved_results = run_model(nback_model,
+                              # load_weights_from='ffn.wts.pnl'
+                              # load_weights_from=INITIALIZER
+                              save_results_to= results_filename
+                              )
diff --git a/Scripts/Models (Under Development)/N-back_WITH_OBJECTIVE_MECH.py b/Scripts/Models (Under Development)/N-back/N-back_WITH_OBJECTIVE_MECH.py
similarity index 100%
rename from Scripts/Models (Under Development)/N-back_WITH_OBJECTIVE_MECH.py
rename to Scripts/Models (Under Development)/N-back/N-back_WITH_OBJECTIVE_MECH.py
diff --git a/Scripts/Models (Under Development)/N-back/Nback Notebook.ipynb b/Scripts/Models (Under Development)/N-back/Nback Notebook.ipynb
new file mode 100644
index 00000000000..ec3f4aff81d
--- /dev/null
+++ b/Scripts/Models (Under Development)/N-back/Nback Notebook.ipynb	
@@ -0,0 +1,188 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from Nback import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "### Construct the model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "constructing WORKING MEMORY (fnn)...\n",
+      "constructing N-back Model...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/jdc/PyCharmProjects/PsyNeuLink/psyneulink/core/globals/utilities.py:443: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  if reference is not None and (candidate == reference):\n",
+      "/Users/jdc/PyCharmProjects/PsyNeuLink/psyneulink/core/globals/utilities.py:443: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray.\n",
+      "  if reference is not None and (candidate == reference):\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "full model constructed\n"
+     ]
+    }
+   ],
+   "source": [
+    "clear_registry()\n",
+    "nback_model = construct_model(stim_size=10 # Size of stimulus input layer\n",
+    "                              )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "10"
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "len(nback_model.nodes['STIM'].variable[0])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Display the model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "<graphviz.dot.Digraph at 0x7fce715d9610>",
+      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.40.1 (20161225.0304)\n -->\n<!-- Title: N&#45;back Model Pages: 1 -->\n<svg width=\"973pt\" height=\"444pt\"\n viewBox=\"0.00 0.00 973.00 444.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 440)\">\n<title>N&#45;back Model</title>\n<polygon fill=\"#ffffff\" stroke=\"transparent\" points=\"-4,4 -4,-440 969,-440 969,4 -4,4\"/>\n<text text-anchor=\"middle\" x=\"482.5\" y=\"-6.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">N&#45;back Model</text>\n<g id=\"clust1\" class=\"cluster\">\n<title>cluster_WORKING MEMORY (fnn)</title>\n<polygon fill=\"none\" stroke=\"#ff0000\" points=\"20,-210 20,-428 957,-428 957,-210 20,-210\"/>\n<text text-anchor=\"middle\" x=\"488.5\" y=\"-216.8\" font-family=\"Times,serif\" font-size=\"14.00\" fill=\"#000000\">WORKING MEMORY (fnn)</text>\n</g>\n<!-- TASK -->\n<g id=\"node1\" class=\"node\">\n<title>TASK</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"96\" cy=\"-184\" rx=\"29.3695\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"96\" y=\"-181.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">TASK</text>\n</g>\n<!-- CURRENT TASK -->\n<g id=\"node4\" class=\"node\">\n<title>CURRENT TASK</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"96\" cy=\"-258\" rx=\"67.7277\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"96\" y=\"-255.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">CURRENT TASK</text>\n</g>\n<!-- TASK&#45;&gt;CURRENT TASK -->\n<g id=\"edge1\" class=\"edge\">\n<title>TASK&#45;&gt;CURRENT TASK</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M96,-202.2921C96,-210.5365 96,-220.4624 96,-229.6378\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"92.5001,-229.9182 96,-239.9183 99.5001,-229.9183 92.5001,-229.9182\"/>\n</g>\n<!-- CONTEXT -->\n<g id=\"node2\" class=\"node\">\n<title>CONTEXT</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"270\" cy=\"-112\" rx=\"46.1482\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"270\" y=\"-109.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">CONTEXT</text>\n</g>\n<!-- CURRENT CONTEXT -->\n<g id=\"node6\" class=\"node\">\n<title>CURRENT CONTEXT</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"267\" cy=\"-258\" rx=\"84.7796\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"267\" y=\"-255.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">CURRENT CONTEXT</text>\n</g>\n<!-- CONTEXT&#45;&gt;CURRENT CONTEXT -->\n<g id=\"edge3\" class=\"edge\">\n<title>CONTEXT&#45;&gt;CURRENT CONTEXT</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M269.6261,-130.1958C269.1141,-155.1132 268.1896,-200.1062 267.5854,-229.5123\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"264.0849,-229.5051 267.3786,-239.5749 271.0834,-229.6489 264.0849,-229.5051\"/>\n</g>\n<!-- EPISODIC MEMORY (dict) -->\n<g id=\"node7\" class=\"node\">\n<title>EPISODIC MEMORY (dict)</title>\n<ellipse fill=\"none\" stroke=\"#000000\" cx=\"461\" cy=\"-184\" rx=\"100.8581\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"461\" y=\"-181.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">EPISODIC MEMORY (dict)</text>\n</g>\n<!-- CONTEXT&#45;&gt;EPISODIC MEMORY (dict) -->\n<g id=\"edge6\" class=\"edge\">\n<title>CONTEXT&#45;&gt;EPISODIC MEMORY (dict)</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M303.2697,-124.5414C332.1612,-135.4325 374.5118,-151.3971 408.0564,-164.0422\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"407.1195,-167.4294 417.7114,-167.6818 409.5887,-160.8793 407.1195,-167.4294\"/>\n</g>\n<!-- STIM -->\n<g id=\"node3\" class=\"node\">\n<title>STIM</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"745\" cy=\"-112\" rx=\"28.1229\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"745\" y=\"-109.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">STIM</text>\n</g>\n<!-- CURRENT STIMULUS -->\n<g id=\"node5\" class=\"node\">\n<title>CURRENT STIMULUS</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"862\" cy=\"-258\" rx=\"86.9762\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"862\" y=\"-255.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">CURRENT STIMULUS</text>\n</g>\n<!-- STIM&#45;&gt;CURRENT STIMULUS -->\n<g id=\"edge2\" class=\"edge\">\n<title>STIM&#45;&gt;CURRENT STIMULUS</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M757.9681,-128.1824C778.0901,-153.2919 817.0211,-201.8724 841.1325,-231.9603\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"838.5275,-234.3065 847.5123,-239.9213 843.99,-229.9291 838.5275,-234.3065\"/>\n</g>\n<!-- STIM&#45;&gt;EPISODIC MEMORY (dict) -->\n<g id=\"edge8\" class=\"edge\">\n<title>STIM&#45;&gt;EPISODIC MEMORY (dict)</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M718.791,-118.6445C676.061,-129.4775 590.0477,-151.2837 529.1471,-166.7233\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"528.0545,-163.3894 519.2213,-169.2397 529.7748,-170.1748 528.0545,-163.3894\"/>\n</g>\n<!-- HIDDEN LAYER -->\n<g id=\"node13\" class=\"node\">\n<title>HIDDEN LAYER</title>\n<ellipse fill=\"none\" stroke=\"#000000\" cx=\"364\" cy=\"-330\" rx=\"65.427\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"364\" y=\"-327.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">HIDDEN LAYER</text>\n</g>\n<!-- CURRENT TASK&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge13\" class=\"edge\">\n<title>CURRENT TASK&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M143.913,-270.8722C189.691,-283.1707 258.7143,-301.7143 307.2303,-314.7484\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"306.3856,-318.1456 316.9513,-317.3601 308.2019,-311.3853 306.3856,-318.1456\"/>\n</g>\n<!-- CURRENT STIMULUS&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge12\" class=\"edge\">\n<title>CURRENT STIMULUS&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M798.5651,-270.3742C787.7176,-272.3525 776.5475,-274.3039 766,-276 649.9877,-294.6552 514.4192,-311.9333 434.0769,-321.7038\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"433.5833,-318.2379 424.0771,-322.9158 434.4256,-325.1871 433.5833,-318.2379\"/>\n</g>\n<!-- CURRENT CONTEXT&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge11\" class=\"edge\">\n<title>CURRENT CONTEXT&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M290.481,-275.4292C303.2034,-284.8727 319.0888,-296.6639 332.8349,-306.8672\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"330.9542,-309.8299 341.07,-312.9798 335.1263,-304.2091 330.9542,-309.8299\"/>\n</g>\n<!-- RETRIEVED CONTEXT -->\n<g id=\"node8\" class=\"node\">\n<title>RETRIEVED CONTEXT</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"461\" cy=\"-258\" rx=\"91.135\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"461\" y=\"-255.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">RETRIEVED CONTEXT</text>\n</g>\n<!-- EPISODIC MEMORY (dict)&#45;&gt;RETRIEVED CONTEXT -->\n<g id=\"edge4\" class=\"edge\">\n<title>EPISODIC MEMORY (dict)&#45;&gt;RETRIEVED CONTEXT</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M461,-202.2921C461,-210.5365 461,-220.4624 461,-229.6378\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"457.5001,-229.9182 461,-239.9183 464.5001,-229.9183 457.5001,-229.9182\"/>\n</g>\n<!-- RETRIEVED STIMULUS -->\n<g id=\"node9\" class=\"node\">\n<title>RETRIEVED STIMULUS</title>\n<ellipse fill=\"none\" stroke=\"#00ff00\" stroke-width=\"3\" cx=\"664\" cy=\"-258\" rx=\"93.3314\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"664\" y=\"-255.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">RETRIEVED STIMULUS</text>\n</g>\n<!-- EPISODIC MEMORY (dict)&#45;&gt;RETRIEVED STIMULUS -->\n<g id=\"edge5\" class=\"edge\">\n<title>EPISODIC MEMORY (dict)&#45;&gt;RETRIEVED STIMULUS</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M522.8834,-198.213C535.6191,-201.6603 548.8465,-205.6292 561,-210 581.8044,-217.482 604.1324,-227.6117 622.6632,-236.6373\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"621.1883,-239.8124 631.7047,-241.0993 624.2861,-233.5352 621.1883,-239.8124\"/>\n</g>\n<!-- RETRIEVED CONTEXT&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge14\" class=\"edge\">\n<title>RETRIEVED CONTEXT&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M437.519,-275.4292C424.7966,-284.8727 408.9112,-296.6639 395.1651,-306.8672\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"392.8737,-304.2091 386.93,-312.9798 397.0458,-309.8299 392.8737,-304.2091\"/>\n</g>\n<!-- RETRIEVED STIMULUS&#45;&gt;HIDDEN LAYER -->\n<g id=\"edge15\" class=\"edge\">\n<title>RETRIEVED STIMULUS&#45;&gt;HIDDEN LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M605.4448,-272.0532C552.9011,-284.6637 476.1497,-303.0841 423.3605,-315.7535\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"422.3724,-312.3912 413.4653,-318.1283 424.006,-319.1979 422.3724,-312.3912\"/>\n</g>\n<!-- READ/WRITE CONTROLLER -->\n<g id=\"node10\" class=\"node\">\n<title>READ/WRITE CONTROLLER</title>\n<polygon fill=\"none\" stroke=\"#0000ff\" stroke-width=\"3\" points=\"579.302,-104.5442 579.302,-119.4558 508.8307,-130 409.1693,-130 338.698,-119.4558 338.698,-104.5442 409.1693,-94 508.8307,-94 579.302,-104.5442\"/>\n<text text-anchor=\"middle\" x=\"459\" y=\"-109.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">READ/WRITE CONTROLLER</text>\n</g>\n<!-- READ/WRITE CONTROLLER&#45;&gt;EPISODIC MEMORY (dict) -->\n<g id=\"edge7\" class=\"edge\">\n<title>READ/WRITE CONTROLLER&#45;&gt;EPISODIC MEMORY (dict)</title>\n<path fill=\"none\" stroke=\"#0000ff\" d=\"M459.5047,-130.1686C459.7186,-137.869 459.9729,-147.0257 460.2106,-155.5834\"/>\n<polygon fill=\"#0000ff\" stroke=\"#0000ff\" points=\"464.487,-165.4756 456.4901,-165.6978 456.2678,-157.7009 464.2648,-157.4787 464.487,-165.4756\"/>\n<polyline fill=\"none\" stroke=\"#0000ff\" points=\"460.2663,-157.5898 460.2108,-155.5906 \"/>\n</g>\n<!-- OBJECTIVE MECHANISM -->\n<g id=\"node11\" class=\"node\">\n<title>OBJECTIVE MECHANISM</title>\n<ellipse fill=\"none\" stroke=\"#000000\" cx=\"225\" cy=\"-40\" rx=\"100.2314\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"225\" y=\"-37.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">OBJECTIVE MECHANISM</text>\n</g>\n<!-- OBJECTIVE MECHANISM&#45;&gt;READ/WRITE CONTROLLER -->\n<g id=\"edge9\" class=\"edge\">\n<title>OBJECTIVE MECHANISM&#45;&gt;READ/WRITE CONTROLLER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M275.7745,-55.6229C310.2004,-66.2155 356.0321,-80.3176 393.2446,-91.7676\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"392.4129,-95.1735 403,-94.7692 394.4715,-88.4831 392.4129,-95.1735\"/>\n</g>\n<!-- DECISION LAYER -->\n<g id=\"node12\" class=\"node\">\n<title>DECISION LAYER</title>\n<ellipse fill=\"none\" stroke=\"#ff0000\" stroke-width=\"3\" cx=\"233\" cy=\"-402\" rx=\"72.5131\" ry=\"18\"/>\n<text text-anchor=\"middle\" x=\"233\" y=\"-399.2994\" font-family=\"arial\" font-size=\"12.00\" fill=\"#000000\">DECISION LAYER</text>\n</g>\n<!-- DECISION LAYER&#45;&gt;OBJECTIVE MECHANISM -->\n<g id=\"edge10\" class=\"edge\">\n<title>DECISION LAYER&#45;&gt;OBJECTIVE MECHANISM</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M172.9967,-391.8104C103.9605,-376.2492 0,-339.3161 0,-258 0,-184 0,-184 0,-184 0,-112.206 82.274,-74.5843 147.3198,-56.0593\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"148.468,-59.3738 157.1875,-53.3553 146.618,-52.6227 148.468,-59.3738\"/>\n</g>\n<!-- HIDDEN LAYER&#45;&gt;DECISION LAYER -->\n<g id=\"edge16\" class=\"edge\">\n<title>HIDDEN LAYER&#45;&gt;DECISION LAYER</title>\n<path fill=\"none\" stroke=\"#000000\" d=\"M334.601,-346.1582C316.1534,-356.2973 292.1498,-369.4902 272.106,-380.5066\"/>\n<polygon fill=\"#000000\" stroke=\"#000000\" points=\"270.2873,-377.5124 263.2095,-385.3963 273.6589,-383.6469 270.2873,-377.5124\"/>\n</g>\n</g>\n</svg>\n"
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "nback_model.show_graph(output_fmt='jupyter')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### Train the model:"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "ffn = nback_model.nodes['WORKING MEMORY (fnn)']\n",
+    "train_network(ffn, num_epochs=100)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Run the model:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "run_model(nback_model)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
\ No newline at end of file
diff --git a/Scripts/Models (Under Development)/N-back.py b/Scripts/Models (Under Development)/N-back/Nback.py
similarity index 84%
rename from Scripts/Models (Under Development)/N-back.py
rename to Scripts/Models (Under Development)/N-back/Nback.py
index a7546baa6e9..abd8173c02a 100644
--- a/Scripts/Models (Under Development)/N-back.py	
+++ b/Scripts/Models (Under Development)/N-back/Nback.py	
@@ -35,19 +35,17 @@
                 - the stim+context input vector (length 90) projects to a hidden layer (length 80);
                 - the task input vector (length 2) projects to a different hidden layer (length 80);
                 - those two hidden layers project (over fixed, nonlearnable, one-one-projections?) to a third hidden layer (length 80) that simply sums them;
-                - the third hidden layer projections to the length 2 output layer;
+                - the third hidden layer projects to the length 2 output layer;
                 - a softmax is taken over the output layer to determine the response.
-             - softmax temp on output/decision layer: 1
-             - confirm that ReLUs all use 0 thresholds and unit slope
+                - fix: were biases trained?
           - training:
               - learning rate: 0.001; epoch: 1 trial per epoch of training
-              - state_dict with weights (still needed)
+              - fix: state_dict with weights (still needed)
           - get empirical stimulus sequences (still needed)
           - put N-back script (with pointer to latest version on PNL) in nback-paper repo
-    - get rid of objective_mechanism (see "VERSION *WITHOUT* ObjectiveMechanism" under control(...) (fix bug)
-    - make termination processing part of the Composition definition (fix bug)
-    - pass learning_rate as parameter to train_network() (add feature)
-    - fix warnings on run
+    - fix: get rid of objective_mechanism (see "VERSION *WITHOUT* ObjectiveMechanism" under control(...)
+    - fix: warnings on run
+    - complete documentation in BeukersNbackModel.rst
     - validate against nback-paper results
     - after validation:
         - try with STIM_SIZE = NUM_STIMS rather than 20 (as in nback-paper)
@@ -62,8 +60,6 @@
 import numpy as np
 
 # Settings for running script:
-TRAIN = True
-RUN = False
 DISPLAY_MODEL = False # show visual graphic of model
 
 # PARAMETERS -------------------------------------------------------------------------------------------------------
@@ -85,22 +81,22 @@
 RETRIEVAL_HAZARD_RATE=0.04 # rate of re=sampling of em following non-match determination in a pass through ffn
 RETRIEVAL_STIM_WEIGHT=.05 # weighting of stimulus field in retrieval from em
 RETRIEVAL_CONTEXT_WEIGHT = 1-RETRIEVAL_STIM_WEIGHT # weighting of context field in retrieval from em
-DECISION_SOFTMAX_TEMP=1/8 # express as gain # binarity of decision process
+DECISION_SOFTMAX_TEMP=1
 
 # Training parameters:
-NUM_EPOCHS=10    # nback-paper: 400,000, one trial per epoch
-LEARNING_RATE=0.1  # nback-paper: .001
+NUM_EPOCHS=3    # nback-paper: 400,000 @ one trial per epoch = 2,500 @ 160 trials per epoch
+LEARNING_RATE=0.01  # nback-paper: .001
 
 # Execution parameters:
 CONTEXT_DRIFT_RATE=.1 # drift rate used for DriftOnASphereIntegrator (function of Context mech) on each trial
-NUM_TRIALS = 48 # number of stimuli presented in a trial sequence
+NUM_TRIALS = 48 # number of stimuli presented in a trial sequence for a given nback_level during run
 REPORT_OUTPUT = ReportOutput.OFF   # Sets console output during run
-REPORT_PROGRESS = ReportProgress.ON  # Sets console progress bar during run
-REPORT_LEARNING = ReportLearning.ON  # Sets console progress bar during training
-ANIMATE = True # {UNIT:EXECUTION_SET} # Specifies whether to generate animation of execution
+REPORT_PROGRESS = ReportProgress.OFF  # Sets console progress bar during run
+REPORT_LEARNING = ReportLearning.OFF  # Sets console progress bar during training
+ANIMATE = False # {UNIT:EXECUTION_SET} # Specifies whether to generate animation of execution
 
 # Names of Compositions and Mechanisms:
-NBACK_MODEL = "N-Back Model"
+NBACK_MODEL = "N-back Model"
 FFN_COMPOSITION = "WORKING MEMORY (fnn)"
 FFN_STIMULUS_INPUT = "CURRENT STIMULUS"
 FFN_CONTEXT_INPUT = "CURRENT CONTEXT"
@@ -129,6 +125,8 @@ def construct_model(stim_size = STIM_SIZE,
                     decision_softmax_temp = DECISION_SOFTMAX_TEMP):
     """Construct nback_model"""
 
+    print(f'constructing {FFN_COMPOSITION}...')
+
     # FEED FORWARD NETWORK -----------------------------------------
 
     #     inputs: encoding of current stimulus and context, retrieved stimulus and retrieved context,
@@ -161,7 +159,7 @@ def construct_model(stim_size = STIM_SIZE,
                                 input_retrieved_context,
                                 input_task},
                                hidden, decision],
-                               RANDOM_WEIGHTS_INITIALIZATION,
+                              RANDOM_WEIGHTS_INITIALIZATION,
                                ),
                               name=FFN_COMPOSITION,
                               learning_rate=LEARNING_RATE
@@ -169,15 +167,17 @@ def construct_model(stim_size = STIM_SIZE,
 
     # FULL MODEL (Outer Composition, including input, EM and control Mechanisms) ------------------------
 
+    print(f'constructing {NBACK_MODEL}...')
+
     # Stimulus Encoding: takes STIM_SIZE vector as input
-    stim = TransferMechanism(name=MODEL_STIMULUS_INPUT, size=STIM_SIZE)
+    stim = TransferMechanism(name=MODEL_STIMULUS_INPUT, size=stim_size)
 
     # Context Encoding: takes scalar as drift step for current trial
     context = ProcessingMechanism(name=MODEL_CONTEXT_INPUT,
                                   function=DriftOnASphereIntegrator(
-                                      initializer=np.random.random(CONTEXT_SIZE-1),
+                                      initializer=np.random.random(context_size-1),
                                       noise=context_drift_noise,
-                                      dimension=CONTEXT_SIZE))
+                                      dimension=context_size))
 
     # Task: task one-hot indicating n-back (1, 2, 3 etc.) - must correspond to what ffn has been trained to do
     task = ProcessingMechanism(name=MODEL_TASK_INPUT,
@@ -188,11 +188,11 @@ def construct_model(stim_size = STIM_SIZE,
     #    - uses Softmax to retrieve best matching input, subject to weighting of stimulus and context by STIM_WEIGHT
     em = EpisodicMemoryMechanism(name=EM,
                                  input_ports=[{NAME:"STIMULUS_FIELD",
-                                               SIZE:STIM_SIZE},
+                                               SIZE:stim_size},
                                               {NAME:"CONTEXT_FIELD",
-                                               SIZE:CONTEXT_SIZE}],
+                                               SIZE:context_size}],
                                  function=ContentAddressableMemory(
-                                     initializer=[[[0]*STIM_SIZE, [0]*CONTEXT_SIZE]],
+                                     initializer=[[[0]*stim_size, [0]*context_size]],
                                      distance_field_weights=[retrieval_stimulus_weight,
                                                              retrieval_context_weight],
                                      # equidistant_entries_select=NEWEST,
@@ -211,7 +211,8 @@ def construct_model(stim_size = STIM_SIZE,
     #        - continue trial
     control = ControlMechanism(name=CONTROLLER,
                                default_variable=[[1]],  # Ensure EM[store_prob]=1 at beginning of first trial
-                               # # VERSION *WITH* ObjectiveMechanism:
+                               # ---------
+                               # VERSION *WITH* ObjectiveMechanism:
                                objective_mechanism=ObjectiveMechanism(name="OBJECTIVE MECHANISM",
                                                                       monitor=decision,
                                                                       # Outcome=1 if match, else 0
@@ -219,20 +220,21 @@ def construct_model(stim_size = STIM_SIZE,
                                # Set ControlSignal for EM[store_prob]
                                function=lambda outcome: int(bool(outcome)
                                                             or (np.random.random() > retrieval_hazard_rate)),
+                               # ---------
                                # # VERSION *WITHOUT* ObjectiveMechanism:
                                # monitor_for_control=decision,
                                # # Set Evaluate outcome and set ControlSignal for EM[store_prob]
                                # #   - outcome is received from decision as one hot in the form: [[match, no-match]]
                                # function=lambda outcome: int(int(outcome[0][1]>outcome[0][0])
-                               #                              or (np.random.random() > HAZARD_RATE)),
+                               #                              or (np.random.random() > retrieval_hazard_rate)),
+                               # ---------
                                control=(STORAGE_PROB, em))
 
     nback_model = Composition(name=NBACK_MODEL,
                               nodes=[stim, context, task, ffn, em, control],
-                              # # # Terminate trial if value of control is still 1 after first pass through execution
-                              # # FIX: STOPS AFTER ~ NUMBER OF TRIALS (?90+); SHOULD BE: NUM_TRIALS*NUM_NBACK_LEVELS + 1
-                              # termination_processing={TimeScale.TRIAL: And(Condition(lambda: control.value),
-                              #                                              AfterPass(0, TimeScale.TRIAL))},
+                              # Terminate trial if value of control is still 1 after first pass through execution
+                              termination_processing={TimeScale.TRIAL: And(Condition(lambda: control.value),
+                                                                           AfterPass(0, TimeScale.TRIAL))},
                               )
     # # Terminate trial if value of control is still 1 after first pass through execution
     # # FIX: ALL OF THE FOLLOWING STOP AFTER ~ NUMBER OF TRIALS (?90+); SHOULD BE: NUM_TRIALS*NUM_NBACK_LEVELS + 1
@@ -256,6 +258,7 @@ def construct_model(stim_size = STIM_SIZE,
             # show_dimensions=True
         )
 
+    print(f'full model constructed')
     return nback_model
 
 # ==========================================STIMULUS GENERATION =======================================================
@@ -445,18 +448,41 @@ def get_training_inputs(network, num_epochs, nback_levels):
                     TARGETS: {network.nodes[FFN_OUTPUT]:  target},
                     EPOCHS: num_epochs}
 
-    return training_set
+    batch_size = len(target)
+    print(f'num trials (batch_size): {len(target)}')
+    return training_set, batch_size
 
 # ======================================== MODEL EXECUTION ============================================================
 
 def train_network(network,
                   learning_rate=LEARNING_RATE,
                   num_epochs=NUM_EPOCHS):
-    training_set = get_training_inputs(network=network, num_epochs=num_epochs, nback_levels=NBACK_LEVELS)
+    print(f"constructing training_set for '{network.name}'...")
+    training_set, batch_size = get_training_inputs(network=network,
+                                                   num_epochs=num_epochs,
+                                                   nback_levels=NBACK_LEVELS)
+    print(f'training_set constructed: {len(training_set)}')
+    print(f"\ntraining '{network.name}'...")
+    import timeit
+    start_time = timeit.default_timer()
     network.learn(inputs=training_set,
-                  minibatch_size=NUM_TRIALS,
+                  minibatch_size=batch_size,
+                  report_progress=REPORT_PROGRESS,
                   # report_learning=REPORT_LEARNING,
+                  learning_rate=learning_rate,
                   execution_mode=ExecutionMode.LLVMRun)
+    stop_time = timeit.default_timer()
+    print(f"'{network.name}' trained")
+    training_time = stop_time-start_time
+    if training_time <= 60:
+        training_time_str = f'{int(training_time)} seconds'
+    else:
+        training_time_str = f'{int(training_time/60)} minutes'
+    print(f'training time: {training_time_str} for {num_epochs} epochs')
+    # path = network.save()
+    # print(f'saved weights sample: {network.nodes[FFN_HIDDEN].path_afferents[0].matrix.base[0][:3]}...')
+    # network.load(path)
+    # print(f'loaded weights sample: {network.nodes[FFN_HIDDEN].path_afferents[0].matrix.base[0][:3]}...')
 
 def run_model(model,
               context_drift_rate=CONTEXT_DRIFT_RATE,
@@ -465,12 +491,9 @@ def run_model(model,
               report_progress=REPORT_PROGRESS,
               animate=ANIMATE
               ):
+    print('nback_model executing...')
     for nback_level in NBACK_LEVELS:
         model.run(inputs=get_run_inputs(model, nback_level, context_drift_rate, num_trials),
-                  # FIX: MOVE THIS TO MODEL CONSTRUCTION ONCE THAT WORKS
-                  # Terminate trial if value of control is still 1 after first pass through execution
-                  termination_processing={TimeScale.TRIAL: And(Condition(lambda: model.nodes[CONTROLLER].value),
-                                                               AfterPass(0, TimeScale.TRIAL))}, # function arg
                   report_output=report_output,
                   report_progress=report_progress,
                   animate=animate
@@ -478,52 +501,7 @@ def run_model(model,
         # FIX: RESET MEMORY HERE?
     # print("Number of entries in EM: ", len(model.nodes[EM].memory))
     assert len(model.nodes[EM].memory) == NUM_TRIALS*NUM_NBACK_LEVELS + 1
-
-
-nback_model = construct_model()
-print('nback_model constructed')
-if TRAIN:
-    print('nback_model training...')
-    train_network(nback_model.nodes[FFN_COMPOSITION])
-    print('nback_model trained')
-if RUN:
-    print('nback_model executing...')
-    run_model(nback_model)
     if REPORT_PROGRESS == ReportProgress.ON:
         print('\n')
     print(f'nback_model done: {len(nback_model.results)} trials executed')
-
-# ===========================================================================
-
-# TEST OF SPHERICAL DRIFT:
-# stims = np.array([x[0] for x in em.memory])
-# contexts = np.array([x[1] for x in em.memory])
-# cos = Distance(metric=COSINE)
-# dist = Distance(metric=EUCLIDEAN)
-# diffs = [np.sum([contexts[i+1] - contexts[1]]) for i in range(NUM_TRIALS)]
-# diffs_1 = [np.sum([contexts[i+1] - contexts[i]]) for i in range(NUM_TRIALS)]
-# diffs_2 = [np.sum([contexts[i+2] - contexts[i]]) for i in range(NUM_TRIALS-1)]
-# dots = [[contexts[i+1] @ contexts[1]] for i in range(NUM_TRIALS)]
-# dot_diffs_1 = [[contexts[i+1] @ contexts[i]] for i in range(NUM_TRIALS)]
-# dot_diffs_2 = [[contexts[i+2] @ contexts[i]] for i in range(NUM_TRIALS-1)]
-# angle = [cos([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)]
-# angle_1 = [cos([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)]
-# angle_2 = [cos([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)]
-# euclidean = [dist([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)]
-# euclidean_1 = [dist([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)]
-# euclidean_2 = [dist([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)]
-# print("STIMS:", stims, "\n")
-# print("DIFFS:", diffs, "\n")
-# print("DIFFS 1:", diffs_1, "\n")
-# print("DIFFS 2:", diffs_2, "\n")
-# print("DOT PRODUCTS:", dots, "\n")
-# print("DOT DIFFS 1:", dot_diffs_1, "\n")
-# print("DOT DIFFS 2:", dot_diffs_2, "\n")
-# print("ANGLE: ", angle, "\n")
-# print("ANGLE_1: ", angle_1, "\n")
-# print("ANGLE_2: ", angle_2, "\n")
-# print("EUCILDEAN: ", euclidean, "\n")
-# print("EUCILDEAN 1: ", euclidean_1, "\n")
-# print("EUCILDEAN 2: ", euclidean_2, "\n")
-
-# n_back_model()
+    print(f'results: \n{model.results}')
diff --git a/Scripts/Models (Under Development)/N-back/SphericalDrift Tests.py b/Scripts/Models (Under Development)/N-back/SphericalDrift Tests.py
new file mode 100644
index 00000000000..3fb2cbed191
--- /dev/null
+++ b/Scripts/Models (Under Development)/N-back/SphericalDrift Tests.py	
@@ -0,0 +1,34 @@
+import numpy as np
+from psyneulink import *
+
+NUM_TRIALS = 48
+
+stims = np.array([x[0] for x in em.memory])
+contexts = np.array([x[1] for x in em.memory])
+cos = Distance(metric=COSINE)
+dist = Distance(metric=EUCLIDEAN)
+diffs = [np.sum([contexts[i+1] - contexts[1]]) for i in range(NUM_TRIALS)]
+diffs_1 = [np.sum([contexts[i+1] - contexts[i]]) for i in range(NUM_TRIALS)]
+diffs_2 = [np.sum([contexts[i+2] - contexts[i]]) for i in range(NUM_TRIALS-1)]
+dots = [[contexts[i+1] @ contexts[1]] for i in range(NUM_TRIALS)]
+dot_diffs_1 = [[contexts[i+1] @ contexts[i]] for i in range(NUM_TRIALS)]
+dot_diffs_2 = [[contexts[i+2] @ contexts[i]] for i in range(NUM_TRIALS-1)]
+angle = [cos([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)]
+angle_1 = [cos([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)]
+angle_2 = [cos([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)]
+euclidean = [dist([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)]
+euclidean_1 = [dist([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)]
+euclidean_2 = [dist([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)]
+print("STIMS:", stims, "\n")
+print("DIFFS:", diffs, "\n")
+print("DIFFS 1:", diffs_1, "\n")
+print("DIFFS 2:", diffs_2, "\n")
+print("DOT PRODUCTS:", dots, "\n")
+print("DOT DIFFS 1:", dot_diffs_1, "\n")
+print("DOT DIFFS 2:", dot_diffs_2, "\n")
+print("ANGLE: ", angle, "\n")
+print("ANGLE_1: ", angle_1, "\n")
+print("ANGLE_2: ", angle_2, "\n")
+print("EUCILDEAN: ", euclidean, "\n")
+print("EUCILDEAN 1: ", euclidean_1, "\n")
+print("EUCILDEAN 2: ", euclidean_2, "\n")
diff --git a/Scripts/Models (Under Development)/N-back/WORKING MEMORY (fnn)_matrix_wts_20stim_2500ep.pnl b/Scripts/Models (Under Development)/N-back/WORKING MEMORY (fnn)_matrix_wts_20stim_2500ep.pnl
new file mode 100644
index 00000000000..2a46665f63c
Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/WORKING MEMORY (fnn)_matrix_wts_20stim_2500ep.pnl differ
diff --git a/Scripts/Models (Under Development)/N-back/__init__.py b/Scripts/Models (Under Development)/N-back/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/Scripts/Models (Under Development)/N-back/ffn.wts_nep_1_lr_01.pnl b/Scripts/Models (Under Development)/N-back/ffn.wts_nep_1_lr_01.pnl
new file mode 100644
index 00000000000..4903636b03d
Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/ffn.wts_nep_1_lr_01.pnl differ
diff --git a/Scripts/Models (Under Development)/N-back/ffn.wts_nep_6250_lr_01.pnl b/Scripts/Models (Under Development)/N-back/ffn.wts_nep_6250_lr_01.pnl
new file mode 100644
index 00000000000..cb1e3a49a5e
Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/ffn.wts_nep_6250_lr_01.pnl differ
diff --git a/Scripts/Models (Under Development)/N-back/nback.results_nep_1_lr_01.pnl.npy b/Scripts/Models (Under Development)/N-back/nback.results_nep_1_lr_01.pnl.npy
new file mode 100644
index 00000000000..dc1b2a21074
Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/nback.results_nep_1_lr_01.pnl.npy differ
diff --git a/Scripts/Models (Under Development)/N-back/nback.results_nep_6250_lr_01.pnl.npy b/Scripts/Models (Under Development)/N-back/nback.results_nep_6250_lr_01.pnl.npy
new file mode 100644
index 00000000000..f0a8a235271
Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/nback.results_nep_6250_lr_01.pnl.npy differ
diff --git a/Scripts/Models (Under Development)/WORKING MEMORY (fnn)_matrix_wts.pnl b/Scripts/Models (Under Development)/WORKING MEMORY (fnn)_matrix_wts.pnl
new file mode 100644
index 00000000000..f94b91cb028
Binary files /dev/null and b/Scripts/Models (Under Development)/WORKING MEMORY (fnn)_matrix_wts.pnl differ
diff --git a/Scripts/Models (Under Development)/ffn.wts.pnl b/Scripts/Models (Under Development)/ffn.wts.pnl
new file mode 100644
index 00000000000..072920a24fe
Binary files /dev/null and b/Scripts/Models (Under Development)/ffn.wts.pnl differ
diff --git a/Scripts/Models (Under Development)/ffn.wts_01.pnl b/Scripts/Models (Under Development)/ffn.wts_01.pnl
new file mode 100644
index 00000000000..20016bdf831
Binary files /dev/null and b/Scripts/Models (Under Development)/ffn.wts_01.pnl differ
diff --git a/Scripts/Models (Under Development)/ffn.wts_nep_1_lr_01.pnl b/Scripts/Models (Under Development)/ffn.wts_nep_1_lr_01.pnl
new file mode 100644
index 00000000000..f5f9c4d160b
Binary files /dev/null and b/Scripts/Models (Under Development)/ffn.wts_nep_1_lr_01.pnl differ
diff --git a/autodiff_composition_matrix_wts.pnl b/autodiff_composition_matrix_wts.pnl
new file mode 100644
index 00000000000..4053d03da1d
Binary files /dev/null and b/autodiff_composition_matrix_wts.pnl differ
diff --git a/psyneulink/core/components/functions/nonstateful/learningfunctions.py b/psyneulink/core/components/functions/nonstateful/learningfunctions.py
index e1d9b5ab1a7..49f15cf4f8d 100644
--- a/psyneulink/core/components/functions/nonstateful/learningfunctions.py
+++ b/psyneulink/core/components/functions/nonstateful/learningfunctions.py
@@ -1123,7 +1123,6 @@ def _function(self,
         #                          then need to assign it to the default value
         # If learning_rate was not specified for instance or composition, use default value
         learning_rate = self._get_current_parameter_value(LEARNING_RATE, context)
-        # learning_rate = self.learning_rate
         if learning_rate is None:
             learning_rate = self.defaults.learning_rate
         #
diff --git a/psyneulink/core/components/functions/nonstateful/transferfunctions.py b/psyneulink/core/components/functions/nonstateful/transferfunctions.py
index 9e2d9fb3939..774da8a96ba 100644
--- a/psyneulink/core/components/functions/nonstateful/transferfunctions.py
+++ b/psyneulink/core/components/functions/nonstateful/transferfunctions.py
@@ -1620,8 +1620,8 @@ def derivative(self, input, output=None, context=None):
         # # MODIFIED 11/5/22 NEW:
         # bias = self._get_current_parameter_value(BIAS, context)
         # input = np.asarray(input).copy()
-        # input[(input-bias)>0] = gain
-        # input[(input-bias)<=0] = gain * leak
+        # input[(input - bias) > 0] = gain
+        # input[(input - bias) <= 0] = gain * leak
         # MODIFIED 11/5/22 END
 
         return input
diff --git a/psyneulink/core/components/functions/stateful/memoryfunctions.py b/psyneulink/core/components/functions/stateful/memoryfunctions.py
index c6fb7d67731..5c13c251278 100644
--- a/psyneulink/core/components/functions/stateful/memoryfunctions.py
+++ b/psyneulink/core/components/functions/stateful/memoryfunctions.py
@@ -466,7 +466,7 @@ class ContentAddressableMemory(MemoryFunction): # ------------------------------
     An entry is stored and retrieved as an array containing a set of `fields <EpisodicMemoryMechanism_Memory_Fields>`
     each of which is a 1d array.  An array containing such entries can be used to initialize the contents of `memory
     <ContentAddressableMemory.memory>` by providing it in the **initializer** argument of the ContentAddressableMemory's
-    constructor, or in a call to its  `reset  <ContentAddressableMemory.reset>` method.  The current contents of `memory
+    constructor, or in a call to its `reset  <ContentAddressableMemory.reset>` method.  The current contents of `memory
     <ContentAddressableMemory.memory>` can be inspected using the `memory <ContentAddressableMemory.memory>` attribute,
     which returns a list containing the current entries, each as a list containing all fields for that entry.  The
     `memory_num_fields <ContentAddressableMemory.memory_num_fields>` contains the number of fields expected for each
diff --git a/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py b/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py
index 2ae1da4c11b..e8cfca7b532 100644
--- a/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py
@@ -1313,7 +1313,7 @@ def _execute(
         # Get error_signals (from ERROR_SIGNAL InputPorts) and error_matrices relevant for the current execution:
         error_signal_indices = self.error_signal_indices
         error_signal_inputs = variable[error_signal_indices]
-        # FIX 7/22/19 [JDC]: MOVE THIS TO ITS OWN METHOD CALLED ON INITALIZATION AND UPDTATED AS NECESSARY
+        # FIX 7/22/19 [JDC]: MOVE THIS TO ITS OWN METHOD CALLED ON INITALIZATION AND UPDATED AS NECESSARY
         if self.error_matrices is None:
             # KAM 6/28/19 Hack to get the correct shape and contents for initial error matrix in backprop
             if self.function is BackPropagation or isinstance(self.function, BackPropagation):
@@ -1354,7 +1354,6 @@ def _execute(
                 ]
             )
             learning_signal, error_signal = super()._execute(variable=function_variable,
-            # MODIFIED CROSS_PATHWAYS 7/22/19 END
                                                              context=context,
                                                              error_matrix=error_matrix,
                                                              runtime_params=runtime_params,
@@ -1368,7 +1367,7 @@ def _execute(
                 and self.initialization_status != ContextFlags.INITIALIZING):
             print("\n{} weight change matrix: \n{}\n".format(self.name, summed_learning_signal))
 
-        # Durning initialization return zeros so that the first "real" trial for Backprop does not start
+        # During initialization return zeros so that the first "real" trial for Backprop does not start
         # with the error computed during initialization
         if (self.in_composition and
                 isinstance(self.function, BackPropagation) and
diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py
index 724e3f2f403..9dcfc218ed7 100644
--- a/psyneulink/core/compositions/composition.py
+++ b/psyneulink/core/compositions/composition.py
@@ -3813,6 +3813,7 @@ def __init__(
         self._partially_added_nodes = []
 
         self.disable_learning = disable_learning
+        self._runtime_learning_rate = None
 
         # graph and scheduler status attributes
         self.graph_consistent = True  # Tracks if Composition is in runnable state (no dangling projections (what else?)
@@ -10178,6 +10179,7 @@ def learn(
             targets: tc.optional(dict) = None,
             num_trials: tc.optional(int) = None,
             epochs: int = 1,
+            learning_rate = None,
             minibatch_size: int = 1,
             patience: tc.optional(int) = None,
             min_delta: int = 0,
@@ -10226,6 +10228,12 @@ def learn(
             epochs : int (default=1)
                 specifies the number of training epochs (that is, repetitions of the batched input set) to run with
 
+            learning_rate : float : default None
+                specifies the learning_rate used by all `learning pathways <Composition_Learning_Pathway>`
+                when the Composition's learn method is called.  This overrides the `learning_rate specified
+                for any individual Pathways at construction, but only applies for the current execution of
+                the learn method.
+
             minibatch_size : int (default=1)
                 specifies the size of the minibatches to use. The input trials will be batched and run, after which
                 learning mechanisms with learning mode TRIAL will update weights
@@ -10315,6 +10323,7 @@ def learn(
             targets=targets,
             num_trials=num_trials,
             epochs=epochs,
+            learning_rate=learning_rate,
             minibatch_size=minibatch_size,
             patience=patience,
             min_delta=min_delta,
@@ -11210,7 +11219,7 @@ def execute(
             return self.get_output_values(context)
 
     def __call__(self, *args, **kwargs):
-        """Execute Composition of any args are provided;  else simply return results of last execution.
+        """Execute Composition if any args are provided; else simply return results of last execution.
         This allows Composition, after it has been constructed, to be run simply by calling it directly.
         """
         if not args and not kwargs:
diff --git a/psyneulink/core/globals/utilities.py b/psyneulink/core/globals/utilities.py
index 0adb9969835..61c987cb823 100644
--- a/psyneulink/core/globals/utilities.py
+++ b/psyneulink/core/globals/utilities.py
@@ -442,6 +442,16 @@ def iscompatible(candidate, reference=None, **kargs):
             warnings.simplefilter(action='ignore', category=FutureWarning)
             if reference is not None and (candidate == reference):
                 return True
+            # if reference is not None:
+            #     if (isinstance(reference, (bool, int, float))
+            #             and isinstance(candidate, (bool, int, float))
+            #             and candidate == reference):
+            #         return True
+            #     elif (isinstance(reference, (list, np.ndarray))
+            #           and isinstance(candidate, (list, np.ndarray)) and (candidate == reference).all()):
+            #         return True
+            #     elif is_iterable(reference) and is_iterable(candidate) and (candidate == reference):
+            #         return True
     except ValueError:
         # raise UtilitiesError("Could not compare {0} and {1}".format(candidate, reference))
         # IMPLEMENTATION NOTE: np.array generates the following error:
diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py
index 7a001ae3b75..9c3518e2237 100644
--- a/psyneulink/library/compositions/autodiffcomposition.py
+++ b/psyneulink/library/compositions/autodiffcomposition.py
@@ -94,9 +94,10 @@
 Logging
 ~~~~~~~
 
-Logging in AutodiffCompositions follows the same procedure as `logging in a Composition <Log>`. However, since an AutodiffComposition internally converts all of its mechanisms to an equivalent PyTorch model,
-then its inner components are not actually executed. This means that there is limited support for logging parameters of components inside an AutodiffComposition;
-Currently, the only supported parameters are:
+Logging in AutodiffCompositions follows the same procedure as `logging in a Composition <Log>`.
+However, since an AutodiffComposition internally converts all of its mechanisms to an equivalent PyTorch model,
+then its inner components are not actually executed. This means that there is limited support for
+logging parameters of components inside an AutodiffComposition; Currently, the only supported parameters are:
 
 1) the `matrix` parameter of Projections
 
@@ -132,8 +133,9 @@
 
 """
 import logging
-
+import os
 import numpy as np
+from pathlib import Path, PosixPath
 
 try:
     import torch
@@ -146,6 +148,9 @@
     from psyneulink.library.compositions.pytorchmodelcreator import PytorchModelCreator
 
 from psyneulink.library.components.mechanisms.processing.objective.comparatormechanism import ComparatorMechanism
+from psyneulink.core.components.mechanisms.processing.compositioninterfacemechanism import CompositionInterfaceMechanism
+from psyneulink.core.components.mechanisms.modulatory.modulatorymechanism import ModulatoryMechanism_Base
+from psyneulink.core.components.projections.modulatory.modulatoryprojection import ModulatoryProjection_Base
 from psyneulink.core.compositions.composition import Composition, NodeRole
 from psyneulink.core.compositions.composition import CompositionError
 from psyneulink.core.compositions.report \
@@ -159,6 +164,7 @@
 from psyneulink.core import llvm as pnlvm
 
 
+
 logger = logging.getLogger(__name__)
 
 
@@ -185,7 +191,7 @@ class AutodiffComposition(Composition):
     ---------
 
     learning_rate : float : default 0.001
-        the learning rate, which is passed to the optimizer.
+        the learning rate passed to the optimizer if none is specified in the learn method of the AutodiffComposition.
 
     disable_learning : bool: default False
         specifies whether the AutodiffComposition should disable learning when run in `learning mode
@@ -259,6 +265,7 @@ def __init__(self,
         self.force_no_retain_graph = force_no_retain_graph
         self.loss = None
         self.disable_learning = disable_learning
+        self._runtime_learning_rate = None
 
         # keeps track of average loss per epoch
         self.losses = []
@@ -276,10 +283,10 @@ def __init__(self,
 
     # CLEANUP: move some of what's done in the methods below to a "validate_params" type of method
     @handle_external_context()
-    def _build_pytorch_representation(self, context=None):
+    def _build_pytorch_representation(self, context=None, refresh=False):
         if self.scheduler is None:
             self.scheduler = Scheduler(graph=self.graph_processing)
-        if self.parameters.pytorch_representation._get(context=context) is None:
+        if self.parameters.pytorch_representation._get(context=context) is None or refresh:
             model = PytorchModelCreator(composition=self,
                                         device=self.device,
                                         context=context)
@@ -288,8 +295,9 @@ def _build_pytorch_representation(self, context=None):
 
         # Set up optimizer function
         old_opt = self.parameters.optimizer._get(context)
-        if old_opt is None:
-            opt = self._make_optimizer(self.optimizer_type, self.learning_rate, self.weight_decay, context)
+        learning_rate = self._runtime_learning_rate or self.learning_rate
+        if old_opt is None or refresh:
+            opt = self._make_optimizer(self.optimizer_type, learning_rate, self.weight_decay, context)
             self.parameters.optimizer._set(opt, context, skip_history=True, skip_log=True)
 
         # Set up loss function
@@ -355,7 +363,10 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None):
         # compute total loss across output neurons for current trial
         tracked_loss = self.parameters.tracked_loss._get(context)
         if tracked_loss is None:
-            self.parameters.tracked_loss._set(torch.zeros(1, device=self.device).double(), context=context, skip_history=True, skip_log=True)
+            self.parameters.tracked_loss._set(torch.zeros(1, device=self.device).double(),
+                                              context=context,
+                                              skip_history=True,
+                                              skip_log=True)
             tracked_loss = self.parameters.tracked_loss._get(context)
 
         curr_tensor_inputs = {}
@@ -368,10 +379,9 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None):
             curr_tensor_targets[component] = torch.tensor(target, device=self.device).double()
 
         # do forward computation on current inputs
-        curr_tensor_outputs = self.parameters.pytorch_representation._get(context).forward(
-            curr_tensor_inputs,
-            context,
-        )
+        curr_tensor_outputs = self.parameters.pytorch_representation._get(context).forward(curr_tensor_inputs,
+                                                                                           context,
+                                                                                           )
 
         for component in curr_tensor_outputs.keys():
             # possibly add custom loss option, which is a loss function that takes many args
@@ -385,7 +395,10 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None):
             component = input_port.all_afferents[0].sender.owner
             outputs.append(curr_tensor_outputs[component].detach().cpu().numpy().copy())
 
-        self.parameters.tracked_loss_count._set(self.parameters.tracked_loss_count._get(context=context) + 1, context=context, skip_history=True, skip_log=True)
+        self.parameters.tracked_loss_count._set(self.parameters.tracked_loss_count._get(context=context) + 1,
+                                                context=context,
+                                                skip_history=True,
+                                                skip_log=True)
         return outputs
 
     def clear_losses(self, context=None):
@@ -394,7 +407,7 @@ def clear_losses(self, context=None):
 
     def _update_learning_parameters(self, context):
         """
-        Updates parameters based on trials ran since last update.
+        Updates parameters based on trials run since last update.
         """
         optimizer = self.parameters.optimizer._get(context=context)
         optimizer.zero_grad()
@@ -563,6 +576,120 @@ def execute(self,
                                                         report_num=report_num
                                                         )
 
+    @handle_external_context(fallback_most_recent=True)
+    def save(self, path:PosixPath=None, directory:str=None, filename:str=None, context=None):
+        """Saves all weight matrices for all MappingProjections in the AutodiffComposition
+
+        Arguments
+        ---------
+        path: Path, PosixPath or str : default None
+            path specification; must be a legal path specification in the filesystem.
+        directory: str : default ``current working directory``
+            directory where `matrices <MappingProjection.matrix>` for all MappingProjections
+            in the AutodiffComposition are saved.
+        filename: str : default ``<name of AutodiffComposition>_matrix_wts.pnl``
+            filename in which `matrices <MappingProjection.matrix>` for all MappingProjections
+            in the AutodiffComposition are saved.
+        .. note::
+           Matrices are saved in
+           `PyTorch state_dict <https://pytorch.org/tutorials/beginner/saving_loading_models.html>`_ format.
+
+        Return
+        ------
+        Path
+
+        """
+        if path:
+            try:
+                path = Path(path)
+            except:
+                raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) "
+                                               f"is not a legal path.")
+        else:
+            try:
+                if directory:
+                    path = Path(directory)
+                else:
+                    path = Path(os.getcwd())
+                if filename:
+                    # path = Path(path / filename)
+                    path = Path(os.path.join(path / filename))
+                else:
+                    path = Path(os.path.join(path / f'{self.name}_matrix_wts.pnl'))
+            except IsADirectoryError:
+                raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) "
+                                               f"is not a legal path.")
+        proj_state = {
+            # p.name: p.parameters.matrix.get(context=context)
+            p.name: p.matrix.base
+            for p in self.projections
+            if not (isinstance(p, ModulatoryProjection_Base)
+                    or isinstance(p.sender.owner, CompositionInterfaceMechanism)
+                    or isinstance(p.receiver.owner, CompositionInterfaceMechanism)
+                    or isinstance(p.sender.owner, ModulatoryMechanism_Base)
+                    or isinstance(p.receiver.owner, ModulatoryMechanism_Base)
+                    or p.sender.owner in self.get_nodes_by_role(NodeRole.LEARNING)
+                    or p.receiver.owner in self.get_nodes_by_role(NodeRole.LEARNING)
+                )}
+        torch.save(proj_state, path)
+        return path
+
+    @handle_external_context(fallback_most_recent=True)
+    def load(self, path:PosixPath=None, directory:str=None, filename:str=None, context=None):
+        """Loads all weights matrices for all MappingProjections in the AutodiffComposition from file
+        Arguments
+        ---------
+        path: Path : default None
+            Path for file in which `MappingProjection` `matrices <MappingProjection.matrix>` are stored.
+            This must be a legal PosixPath object; if it is specified **directory** and **filename** are ignored.
+        directory: str : default ``current working directory``
+            directory where `MappingProjection` `matrices <MappingProjection.matrix>` are stored.
+        filename: str : default ``<name of AutodiffComposition>_matrix_wts.pnl``
+            name of file in which `MappingProjection` `matrices <MappingProjection.matrix>` are stored.
+        .. note::
+           Matrices must be stored in
+           `PyTorch state_dict <https://pytorch.org/tutorials/beginner/saving_loading_models.html>`_ format.
+        """
+        if path:
+            if not isinstance(path,Path):
+                raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) "
+                                               f"is not a legal path.")
+        else:
+            try:
+                if directory:
+                    path = Path(directory)
+                else:
+                    path = Path(os.getcwd())
+                if filename:
+                    # path = Path(path / filename)
+                    path = Path(os.path.join(path / filename))
+                else:
+                    # path = Path(path / f'{self.name}_matrix_wts.pnl')
+                    path = Path(os.path.join(path , f'{self.name}_matrix_wts.pnl'))
+            except IsADirectoryError:
+                raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) "
+                                               f"is not a legal path.")
+        state = torch.load(path)
+        for projection in [p for p in self.projections
+                           if not (isinstance(p, ModulatoryProjection_Base)
+                                   or isinstance(p.sender.owner, CompositionInterfaceMechanism)
+                                   or isinstance(p.receiver.owner, CompositionInterfaceMechanism)
+                                   or isinstance(p.sender.owner, ModulatoryMechanism_Base)
+                                   or isinstance(p.receiver.owner, ModulatoryMechanism_Base)
+                                   or p.sender.owner in self.get_nodes_by_role(NodeRole.LEARNING)
+                                   or p.receiver.owner in self.get_nodes_by_role(NodeRole.LEARNING)
+            )]:
+            matrix = state[projection.name]
+            if np.array(matrix).shape != projection.matrix.base.shape:
+                raise AutodiffCompositionError(f"Shape of matrix loaded for '{projection.name}' "
+                                               f"({np.array(matrix).shape}) "
+                                               f"does not match its shape ({projection.matrix.base.shape})")
+            projection.matrix.base = matrix
+            projection.parameters.matrix.set(matrix, context=context, override=True)
+            projection.parameter_ports['matrix'].parameters.value.set(matrix, context=context, override=True)
+        self._build_pytorch_representation(context=context, refresh=True)
+    # MODIFIED 11/8/22 END
+
     def _get_state_ids(self):
         return super()._get_state_ids() + ["optimizer"]
 
diff --git a/psyneulink/library/compositions/compositionrunner.py b/psyneulink/library/compositions/compositionrunner.py
index d7039a1902e..8e7a757a353 100644
--- a/psyneulink/library/compositions/compositionrunner.py
+++ b/psyneulink/library/compositions/compositionrunner.py
@@ -129,6 +129,7 @@ def run_learning(self,
                      targets: dict = None,
                      num_trials: int = None,
                      epochs: int = 1,
+                     learning_rate = None,
                      minibatch_size: int = 1,
                      patience: int = None,
                      min_delta: int = 0,
@@ -139,7 +140,7 @@ def run_learning(self,
                      execution_mode:pnlvm.ExecutionMode = pnlvm.ExecutionMode.Python,
                      **kwargs):
         """
-        Runs the composition repeatedly with the specified parameters
+        Runs the composition repeatedly with the specified parameters.
 
         Returns
         ---------
@@ -150,6 +151,9 @@ def run_learning(self,
         else:
             self._is_llvm_mode = True
 
+        # This is used by local learning-related methods to override the default learning_rate set at construction.
+        self._composition._runtime_learning_rate = learning_rate
+
         # Handle function and generator inputs
         if isgeneratorfunction(inputs):
             inputs = inputs()
diff --git a/psyneulink/library/compositions/pytorchcomponents.py b/psyneulink/library/compositions/pytorchcomponents.py
index 43122730437..e106272d91a 100644
--- a/psyneulink/library/compositions/pytorchcomponents.py
+++ b/psyneulink/library/compositions/pytorchcomponents.py
@@ -1,4 +1,4 @@
-from psyneulink.core.components.functions.nonstateful.transferfunctions import Linear, Logistic, ReLU
+from psyneulink.core.components.functions.nonstateful.transferfunctions import Linear, Logistic, ReLU, SoftMax
 from psyneulink.library.compositions.pytorchllvmhelper import *
 from psyneulink.core.globals.log import LogCondition
 from psyneulink.core import llvm as pnlvm
@@ -10,7 +10,8 @@
 def pytorch_function_creator(function, device, context=None):
     """
     Converts a PsyNeuLink function into an equivalent PyTorch lambda function.
-    NOTE: This is needed due to PyTorch limitations (see: https://github.com/PrincetonUniversity/PsyNeuLink/pull/1657#discussion_r437489990)
+    NOTE: This is needed due to PyTorch limitations
+    (see: https://github.com/PrincetonUniversity/PsyNeuLink/pull/1657#discussion_r437489990)
     """
     def get_fct_param_value(param_name):
         val = function._get_current_parameter_value(
@@ -38,6 +39,10 @@ def get_fct_param_value(param_name):
         return lambda x: (torch.max(input=(x - bias), other=torch.tensor([0], device=device).double()) * gain +
                             torch.min(input=(x - bias), other=torch.tensor([0], device=device).double()) * leak)
 
+    elif isinstance(function, SoftMax):
+        gain = get_fct_param_value('gain')
+        return lambda x: (torch.softmax(x, len(x), other=torch.tensor([0], device=device).double()))
+
     else:
         raise Exception(f"Function {function} is not currently supported in AutodiffCompositions!")
 
diff --git a/psyneulink/library/compositions/pytorchmodelcreator.py b/psyneulink/library/compositions/pytorchmodelcreator.py
index af809613bf4..916dfca438f 100644
--- a/psyneulink/library/compositions/pytorchmodelcreator.py
+++ b/psyneulink/library/compositions/pytorchmodelcreator.py
@@ -60,7 +60,8 @@ def __init__(self, composition, device, context=None):
                 proj_recv.add_afferent(new_proj)
                 self.projection_map[projection] = new_proj
                 self.projections.append(new_proj)
-                self.params.append(new_proj.matrix)
+
+        self._regenerate_paramlist()
 
         c = Context()
         try:
@@ -81,6 +82,11 @@ def __init__(self, composition, device, context=None):
 
     __deepcopy__ = get_deepcopy_with_shared(shared_types=(Component, ComponentsMeta))
 
+    def _regenerate_paramlist(self):
+        self.params = nn.ParameterList()
+        for proj in self.projections:
+            self.params.append(proj.matrix)
+
     # generates llvm function for self.forward
     def _gen_llvm_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:frozenset):
         args = [ctx.get_state_struct_type(self._composition).as_pointer(),
diff --git a/tests/composition/autodiff_composition_matrix_wts.pnl b/tests/composition/autodiff_composition_matrix_wts.pnl
new file mode 100644
index 00000000000..4053d03da1d
Binary files /dev/null and b/tests/composition/autodiff_composition_matrix_wts.pnl differ
diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py
index 2bc81653862..d04ffe6f210 100644
--- a/tests/composition/test_autodiffcomposition.py
+++ b/tests/composition/test_autodiffcomposition.py
@@ -1,6 +1,6 @@
 import logging
 import timeit as timeit
-
+import os
 import numpy as np
 
 import pytest
@@ -56,6 +56,61 @@ def test_autodiff_forward(autodiff_mode):
     outputs = xor.run(inputs=[0,0], execution_mode=autodiff_mode)
     assert np.allclose(outputs, [[0.9479085241082691]])
 
+@pytest.mark.pytorch
+def test_autodiff_saveload(tmp_path):
+    def create_xor():
+        # create xor model mechanisms and projections
+        xor_in = TransferMechanism(name='xor_in',
+                                   default_variable=np.zeros(2))
+
+        xor_hid = TransferMechanism(name='xor_hid',
+                                    default_variable=np.zeros(10),
+                                    function=Logistic())
+
+        xor_out = TransferMechanism(name='xor_out',
+                                    default_variable=np.zeros(1),
+                                    function=Logistic())
+
+        hid_map = MappingProjection(matrix=np.random.rand(2,10), name='hid_map')
+        out_map = MappingProjection(matrix=np.random.rand(10,1), name='out_map')
+
+        # put the mechanisms and projections together in an autodiff composition (AC)
+        xor = AutodiffComposition()
+
+        xor.add_node(xor_in)
+        xor.add_node(xor_hid)
+        xor.add_node(xor_out)
+
+        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
+        return xor
+
+    np.random.seed(0)
+    xor1 = create_xor()
+    xor1_outputs = xor1.run(inputs=[0,0])
+
+    # save
+    # path = xor1.save()
+    path = xor1.save(os.path.join(tmp_path, 'xor_1.pnl'))
+
+    # del xor1
+    pnl.clear_registry()
+
+    # load
+    np.random.seed(1)
+    xor2 = create_xor()
+    xor2_outputs_pre = xor2.run(inputs=[0,0])
+    # xor2.load(os.path.join(tmp_path, 'xor_1.pnl'))
+    xor2.load(path)
+    xor2_outputs_post = xor2.run(inputs=[0,0])
+
+
+    # sanity check - make sure xor2 weights differ
+    assert not np.allclose(xor2_outputs_pre, xor2_outputs_post, atol=1e-9)
+
+    # make sure loaded model is identical, and used during run
+    assert np.allclose(xor1_outputs, xor2_outputs_post, atol=1e-9)
+
 @pytest.mark.pytorch
 @pytest.mark.acconstructor
 class TestACConstructor: