diff --git a/Scripts/Models (Under Development)/N-back MODULARIZED.py b/Scripts/Models (Under Development)/N-back/N-back MODULARIZED.py similarity index 99% rename from Scripts/Models (Under Development)/N-back MODULARIZED.py rename to Scripts/Models (Under Development)/N-back/N-back MODULARIZED.py index 87ee56c5221..74dcf24e51d 100644 --- a/Scripts/Models (Under Development)/N-back MODULARIZED.py +++ b/Scripts/Models (Under Development)/N-back/N-back MODULARIZED.py @@ -151,7 +151,7 @@ def construct_model(num_tasks, stim_size, context_size, hidden_size, display=Fal hidden, decision], name="WORKING MEMORY (fnn)") comp = Composition(nodes=[stim, context, task, em, ffn, control], - name="N-Back Model") + name="N-back Model") comp.add_projection(MappingProjection(), stim, input_current_stim) comp.add_projection(MappingProjection(), context, input_current_context) comp.add_projection(MappingProjection(), task, input_task) diff --git a/Scripts/Models (Under Development)/N-back/N-back.py b/Scripts/Models (Under Development)/N-back/N-back.py new file mode 100644 index 00000000000..6504493494a --- /dev/null +++ b/Scripts/Models (Under Development)/N-back/N-back.py @@ -0,0 +1,537 @@ +""" +This implements a model of the `N-back task `_ +described in `Beukers et al. (2022) `_. The model uses a simple implementation of episodic +(content-addressable) memory to store previous stimuli and the temporal context in which they occured, +and a feedforward neural network to evaluate whether the current stimulus is a match to the n'th preceding stimulus +(n-back level). This model is an example of proposed interactions between working memory (e.g., in neocortex) and +episodic memory e.g., in hippocampus and/or cerebellum) in the performance of tasks demanding of sequential processing +and control, and along the lines of models emerging machine learning that augment the use of recurrent neural networks +(e.g., long short-term memory mechanisms; LSTMs) for active memory and control with an external memory capable of +rapid storage and content-based retrieval, such as the Neural Turing Machine (NTN; `Graves et al., 2016 +`_), Episodic Planning Networks (EPN; `Ritter et al., 2020 +`_), and Emergent Symbols through Binding Networks (ESBN; `Webb et al., 2021 +`_). + +There are three primary methods in the script: + +* construct_model(args): + takes as arguments parameters used to construct the model; for convenience, defaults are defined below, + (under "Construction parameters") + +* train_network(args) + takes as arguments the feedforward neural network Composition (FFN_COMPOSITION) and number of epochs to train. + Note: learning_rate is set at construction (can specify using LEARNING_RATE under "Training parameters" below). + +* run_model() + takes the context drift rate to be applied on each trial and the number of trials to execute as args, as well as + reporting and animation specifications (see "Execution parameters" below). + +See "Settings for running the script" to specify whether the model is trained and/or executed when the script is run, +and whether a graphic display of the network is generated when it is constructed. + +TODO: + - from Andre + - network architecture; in particular, size of hidden layer and projection patterns to and from it + - the stim+context input vector (length 90) projects to a hidden layer (length 80); + - the task input vector (length 2) projects to a different hidden layer (length 80); + - those two hidden layers project (over fixed, nonlearnable, one-one-projections?) to a third hidden layer (length 80) that simply sums them; + - the third hidden layer projects to the length 2 output layer; + - a softmax is taken over the output layer to determine the response. + - fix: were biases trained? + - training: + - learning rate: 0.001; epoch: 1 trial per epoch of training + - fix: state_dict with weights (still needed) + - get empirical stimulus sequences (still needed) + - put N-back script (with pointer to latest version on PNL) in nback-paper repo + - fix: get rid of objective_mechanism (see "VERSION *WITHOUT* ObjectiveMechanism" under control(...) + - fix: warnings on run + - complete documentation in BeukersNbackModel.rst + - validate against nback-paper results + - after validation: + - try with STIM_SIZE = NUM_STIMS rather than 20 (as in nback-paper) + - refactor generate_stim_sequence() to use actual empirical stimulus sequences + - replace get_input_sequence and get_training_inputs with generators passed to nback_model.run() and ffn.learn + +""" + +from graph_scheduler import * + +from psyneulink import * +import numpy as np + +# Settings for running script: +TRAIN = True +RUN = True +DISPLAY_MODEL = False # show visual graphic of model + +# PARAMETERS ------------------------------------------------------------------------------------------------------- + +# Fixed (structural) parameters: +MAX_NBACK_LEVELS = 3 +NUM_STIM = 8 # number of different stimuli in stimulus set - QUESTION: WHY ISN"T THIS EQUAL TO STIM_SIZE OR VICE VERSA? +FFN_TRANSFER_FUNCTION = ReLU + +# Constructor parameters: (values are from nback-paper) +STIM_SIZE=8 # length of stimulus vector +CONTEXT_SIZE=25 # length of context vector +HIDDEN_SIZE=STIM_SIZE*4 # dimension of hidden units in ff +NBACK_LEVELS = [2,3] # Currently restricted to these +NUM_NBACK_LEVELS = len(NBACK_LEVELS) +CONTEXT_DRIFT_NOISE=0.0 # noise used by DriftOnASphereIntegrator (function of Context mech) +RANDOM_WEIGHTS_INITIALIZATION=RandomMatrix(center=0.0, range=0.1) # Matrix spec used to initialize all Projections +RETRIEVAL_SOFTMAX_TEMP=1/8 # express as gain # precision of retrieval process +RETRIEVAL_HAZARD_RATE=0.04 # rate of re=sampling of em following non-match determination in a pass through ffn +RETRIEVAL_STIM_WEIGHT=.05 # weighting of stimulus field in retrieval from em +RETRIEVAL_CONTEXT_WEIGHT = 1-RETRIEVAL_STIM_WEIGHT # weighting of context field in retrieval from em +DECISION_SOFTMAX_TEMP=1 + +# Training parameters: +NUM_EPOCHS= 6250 # nback-paper: 400,000 @ one trial per epoch = 6,250 @ 64 trials per epoch +LEARNING_RATE=0.01 # nback-paper: .001 + +# Execution parameters: +CONTEXT_DRIFT_RATE=.1 # drift rate used for DriftOnASphereIntegrator (function of Context mech) on each trial +NUM_TRIALS = 48 # number of stimuli presented in a trial sequence +REPORT_OUTPUT = ReportOutput.OFF # Sets console output during run +REPORT_PROGRESS = ReportProgress.OFF # Sets console progress bar during run +REPORT_LEARNING = ReportLearning.OFF # Sets console progress bar during training +ANIMATE = False # {UNIT:EXECUTION_SET} # Specifies whether to generate animation of execution + +# Names of Compositions and Mechanisms: +NBACK_MODEL = "N-back Model" +FFN_COMPOSITION = "WORKING MEMORY (fnn)" +FFN_STIMULUS_INPUT = "CURRENT STIMULUS" +FFN_CONTEXT_INPUT = "CURRENT CONTEXT" +FFN_STIMULUS_RETRIEVED = "RETRIEVED STIMULUS" +FFN_CONTEXT_RETRIEVED = "RETRIEVED CONTEXT" +FFN_TASK = "CURRENT TASK" +FFN_HIDDEN = "HIDDEN LAYER" +FFN_OUTPUT = "DECISION LAYER" +MODEL_STIMULUS_INPUT ='STIM' +MODEL_CONTEXT_INPUT = 'CONTEXT' +MODEL_TASK_INPUT = "TASK" +EM = "EPISODIC MEMORY (dict)" +CONTROLLER = "READ/WRITE CONTROLLER" + +# ======================================== MODEL CONSTRUCTION ========================================================= + +def construct_model(stim_size = STIM_SIZE, + context_size = CONTEXT_SIZE, + hidden_size = HIDDEN_SIZE, + num_nback_levels = NUM_NBACK_LEVELS, + context_drift_noise = CONTEXT_DRIFT_NOISE, + retrievel_softmax_temp = RETRIEVAL_SOFTMAX_TEMP, + retrieval_hazard_rate = RETRIEVAL_HAZARD_RATE, + retrieval_stimulus_weight = RETRIEVAL_STIM_WEIGHT, + retrieval_context_weight = RETRIEVAL_CONTEXT_WEIGHT, + decision_softmax_temp = DECISION_SOFTMAX_TEMP): + """Construct nback_model""" + + print(f"constructing '{FFN_COMPOSITION}'...") + + # FEED FORWARD NETWORK ----------------------------------------- + + # inputs: encoding of current stimulus and context, retrieved stimulus and retrieved context, + # output: decision: match [1,0] or non-match [0,1] + # Must be trained to detect match for specified task (1-back, 2-back, etc.) + input_current_stim = TransferMechanism(name=FFN_STIMULUS_INPUT, + size=stim_size, + function=FFN_TRANSFER_FUNCTION) + input_current_context = TransferMechanism(name=FFN_CONTEXT_INPUT, + size=context_size, + function=FFN_TRANSFER_FUNCTION) + input_retrieved_stim = TransferMechanism(name=FFN_STIMULUS_RETRIEVED, + size=stim_size, + function=FFN_TRANSFER_FUNCTION) + input_retrieved_context = TransferMechanism(name=FFN_CONTEXT_RETRIEVED, + size=context_size, + function=FFN_TRANSFER_FUNCTION) + input_task = TransferMechanism(name=FFN_TASK, + size=num_nback_levels, + function=FFN_TRANSFER_FUNCTION) + hidden = TransferMechanism(name=FFN_HIDDEN, + size=hidden_size, + function=FFN_TRANSFER_FUNCTION) + decision = ProcessingMechanism(name=FFN_OUTPUT, + size=2, function=SoftMax(output=MAX_INDICATOR, + gain=decision_softmax_temp)) + ffn = AutodiffComposition(([{input_current_stim, + input_current_context, + input_retrieved_stim, + input_retrieved_context, + input_task}, + hidden, decision], + RANDOM_WEIGHTS_INITIALIZATION, + ), + name=FFN_COMPOSITION, + learning_rate=LEARNING_RATE + ) + + # FULL MODEL (Outer Composition, including input, EM and control Mechanisms) ------------------------ + + print(f"'constructing {NBACK_MODEL}'...") + + # Stimulus Encoding: takes STIM_SIZE vector as input + stim = TransferMechanism(name=MODEL_STIMULUS_INPUT, size=stim_size) + + # Context Encoding: takes scalar as drift step for current trial + context = ProcessingMechanism(name=MODEL_CONTEXT_INPUT, + function=DriftOnASphereIntegrator( + initializer=np.random.random(context_size-1), + noise=context_drift_noise, + dimension=context_size)) + + # Task: task one-hot indicating n-back (1, 2, 3 etc.) - must correspond to what ffn has been trained to do + task = ProcessingMechanism(name=MODEL_TASK_INPUT, + size=num_nback_levels) + + # Episodic Memory: + # - entries: stimulus (field[0]) and context (field[1]); randomly initialized + # - uses Softmax to retrieve best matching input, subject to weighting of stimulus and context by STIM_WEIGHT + em = EpisodicMemoryMechanism(name=EM, + input_ports=[{NAME:"STIMULUS_FIELD", + SIZE:stim_size}, + {NAME:"CONTEXT_FIELD", + SIZE:context_size}], + function=ContentAddressableMemory( + initializer=[[[0]*stim_size, [0]*context_size]], + distance_field_weights=[retrieval_stimulus_weight, + retrieval_context_weight], + # equidistant_entries_select=NEWEST, + selection_function=SoftMax(output=MAX_INDICATOR, + gain=retrievel_softmax_temp)), + ) + + # Control Mechanism + # Ensures current stimulus and context are only encoded in EM once (at beginning of trial) + # by controlling the storage_prob parameter of em: + # - if outcome of decision signifies a match or hazard rate is realized: + # - set EM[store_prob]=1 (as prep encoding stimulus in EM on next trial) + # - this also serves to terminate trial (see nback_model.termination_processing condition) + # - if outcome of decision signifies a non-match + # - set EM[store_prob]=0 (as prep for another retrieval from EM without storage) + # - continue trial + control = ControlMechanism(name=CONTROLLER, + default_variable=[[1]], # Ensure EM[store_prob]=1 at beginning of first trial + # --------- + # VERSION *WITH* ObjectiveMechanism: + objective_mechanism=ObjectiveMechanism(name="OBJECTIVE MECHANISM", + monitor=decision, + # Outcome=1 if match, else 0 + function=lambda x: int(x[0][1]>x[0][0])), + # Set ControlSignal for EM[store_prob] + function=lambda outcome: int(bool(outcome) + or (np.random.random() > retrieval_hazard_rate)), + # --------- + # # VERSION *WITHOUT* ObjectiveMechanism: + # monitor_for_control=decision, + # # Set Evaluate outcome and set ControlSignal for EM[store_prob] + # # - outcome is received from decision as one hot in the form: [[match, no-match]] + # function=lambda outcome: int(int(outcome[0][1]>outcome[0][0]) + # or (np.random.random() > retrieval_hazard_rate)), + # --------- + control=(STORAGE_PROB, em)) + + nback_model = Composition(name=NBACK_MODEL, + nodes=[stim, context, task, ffn, em, control], + # Terminate trial if value of control is still 1 after first pass through execution + termination_processing={TimeScale.TRIAL: And(Condition(lambda: control.value), + AfterPass(0, TimeScale.TRIAL))}, + ) + # # Terminate trial if value of control is still 1 after first pass through execution + # # FIX: ALL OF THE FOLLOWING STOP AFTER ~ NUMBER OF TRIALS (?90+); SHOULD BE: NUM_TRIALS*NUM_NBACK_LEVELS + 1 + # nback_model.scheduler.add_condition(nback_model, And(Condition(lambda: control.value), AfterPass(0, TimeScale.TRIAL))) + # nback_model.scheduler.termination_conds = ({TimeScale.TRIAL: And(Condition(lambda: control.value), + # AfterPass(0, TimeScale.TRIAL))}) + # nback_model.scheduler.termination_conds.update({TimeScale.TRIAL: And(Condition(lambda: control.value), + # AfterPass(0, TimeScale.TRIAL))}) + nback_model.add_projection(MappingProjection(), stim, input_current_stim) + nback_model.add_projection(MappingProjection(), context, input_current_context) + nback_model.add_projection(MappingProjection(), task, input_task) + nback_model.add_projection(MappingProjection(), em.output_ports["RETRIEVED_STIMULUS_FIELD"], input_retrieved_stim) + nback_model.add_projection(MappingProjection(), em.output_ports["RETRIEVED_CONTEXT_FIELD"], input_retrieved_context) + nback_model.add_projection(MappingProjection(), stim, em.input_ports["STIMULUS_FIELD"]) + nback_model.add_projection(MappingProjection(), context, em.input_ports["CONTEXT_FIELD"]) + + if DISPLAY_MODEL: + nback_model.show_graph( + # show_cim=True, + # show_node_structure=ALL, + # show_dimensions=True + ) + + print(f'full model constructed') + return nback_model + +# ==========================================STIMULUS GENERATION ======================================================= +# Based on nback-paper + +def get_stim_set(num_stim=STIM_SIZE): + """Construct an array of stimuli for use an experiment""" + # For now, use one-hots + return np.eye(num_stim) + +def get_task_input(nback_level): + """Construct input to task Mechanism for a given nback_level, used by run_model() and train_network()""" + task_input = list(np.zeros_like(NBACK_LEVELS)) + task_input[nback_level-NBACK_LEVELS[0]] = 1 + return task_input + +def get_run_inputs(model, nback_level, context_drift_rate, num_trials): + """Construct set of stimulus inputs for run_model()""" + + def generate_stim_sequence(nback_level, trial_num, trial_type=0, num_stim=NUM_STIM, num_trials=NUM_TRIALS): + assert nback_level in {2,3} # At present, only 2- and 3-back levels are supported + + def gen_subseq_stim(): + A = np.random.randint(0,num_stim) + B = np.random.choice( + np.setdiff1d(np.arange(num_stim),[A]) + ) + C = np.random.choice( + np.setdiff1d(np.arange(num_stim),[A,B]) + ) + X = np.random.choice( + np.setdiff1d(np.arange(num_stim),[A,B]) + ) + return A,B,C,X + + def generate_match_no_foils_sequence(nback_level,trial_num): + # AXA (2-back) or ABXA (3-back) + seq = np.random.randint(0,num_stim,num_trials) + A,B,C,X = gen_subseq_stim() + # + if nback_level==2: + subseq = [A,X,A] + elif nback_level==3: + subseq = [A,B,X,A] + seq[trial_num-(nback_level+1):trial_num] = subseq + return seq[:trial_num] + + def generate_non_match_no_foils_sequence(nback_level,trial_num): + # AXB (2-back) or ABXC (3-back) + seq = np.random.randint(0,num_stim,num_trials) + A,B,C,X = gen_subseq_stim() + # + if nback_level==2: + subseq = [A,X,B] + elif nback_level==3: + subseq = [A,B,X,C] + seq[trial_num-(nback_level+1):trial_num] = subseq + return seq[:trial_num] + + def generate_match_with_foil_sequence(nback_level,trial_num): + # AAA (2-back) or AAXA (3-back) + seq = np.random.randint(0,num_stim,num_trials) + A,B,C,X = gen_subseq_stim() + # + if nback_level==2: + subseq = [A,A,A] + elif nback_level==3: + subseq = [A,A,X,A] + seq[trial_num-(nback_level+1):trial_num] = subseq + return seq[:trial_num] + + def generate_non_match_with_foil_sequence(nback_level,trial_num): + # XAA (2-back) or ABXB (3-back) + seq = np.random.randint(0,num_stim,num_trials) + A,B,C,X = gen_subseq_stim() + # + if nback_level==2: + subseq = [X,A,A] + elif nback_level==3: + subseq = [A,B,X,B] + seq[trial_num-(nback_level+1):trial_num] = subseq + return seq[:trial_num] + + trial_types = [generate_match_no_foils_sequence, + generate_match_with_foil_sequence, + generate_non_match_no_foils_sequence, + generate_non_match_with_foil_sequence] + stim_seq = trial_types[trial_type](nback_level,trial_num) + # ytarget = [1,1,0,0][trial_type] + # ctxt = spherical_drift(trial_num) + # return stim,ctxt,ytarget + return stim_seq + + # def stim_set_generation(nback_level, num_trials): + # stim_sequence = [] + # # for seq_int, trial in itertools.product(range(4),np.arange(5,trials)): # This generates all length sequences + # for trial_type, trial_num in itertools.product(range(4),[num_trials]): # This generates only longest seq ( + # # num_trials) + # return stim_sequence.append(generate_stim_sequence(nback_level, trial_num, trial_type=trial_type, trials=num_trials)) + + def get_input_sequence(nback_level, num_trials=NUM_TRIALS): + """Get sequence of inputs for a run""" + input_set = get_stim_set() + # Construct sequence of stimulus indices + trial_seq = generate_stim_sequence(nback_level, num_trials) + # Return list of corresponding stimulus input vectors + return [input_set[trial_seq[i]] for i in range(num_trials)] + + return {model.nodes[MODEL_STIMULUS_INPUT]: get_input_sequence(nback_level, num_trials), + model.nodes[MODEL_CONTEXT_INPUT]: [[context_drift_rate]]*num_trials, + model.nodes[MODEL_TASK_INPUT]: [get_task_input(nback_level)]*num_trials} + +def get_training_inputs(network, num_epochs, nback_levels): + """Construct set of training stimuli used by ffn.learn() in train_network() + Construct one example of each condition: + match: stim_current = stim_retrieved and context_current = context_retrieved + stim_lure: stim_current = stim_retrieved and context_current != context_retrieved + context_lure: stim_current != stim_retrieved and context_current == context_retrieved + non_lure: stim_current != stim_retrieved and context_current != context_retrieved + """ + assert is_iterable(nback_levels) and all([0", + "image/svg+xml": "\n\n\n\n\n\nN-back Model\n\nN-back Model\n\ncluster_WORKING MEMORY (fnn)\n\nWORKING MEMORY (fnn)\n\n\n\nTASK\n\nTASK\n\n\n\nCURRENT TASK\n\nCURRENT TASK\n\n\n\nTASK->CURRENT TASK\n\n\n\n\n\nCONTEXT\n\nCONTEXT\n\n\n\nCURRENT CONTEXT\n\nCURRENT CONTEXT\n\n\n\nCONTEXT->CURRENT CONTEXT\n\n\n\n\n\nEPISODIC MEMORY (dict)\n\nEPISODIC MEMORY (dict)\n\n\n\nCONTEXT->EPISODIC MEMORY (dict)\n\n\n\n\n\nSTIM\n\nSTIM\n\n\n\nCURRENT STIMULUS\n\nCURRENT STIMULUS\n\n\n\nSTIM->CURRENT STIMULUS\n\n\n\n\n\nSTIM->EPISODIC MEMORY (dict)\n\n\n\n\n\nHIDDEN LAYER\n\nHIDDEN LAYER\n\n\n\nCURRENT TASK->HIDDEN LAYER\n\n\n\n\n\nCURRENT STIMULUS->HIDDEN LAYER\n\n\n\n\n\nCURRENT CONTEXT->HIDDEN LAYER\n\n\n\n\n\nRETRIEVED CONTEXT\n\nRETRIEVED CONTEXT\n\n\n\nEPISODIC MEMORY (dict)->RETRIEVED CONTEXT\n\n\n\n\n\nRETRIEVED STIMULUS\n\nRETRIEVED STIMULUS\n\n\n\nEPISODIC MEMORY (dict)->RETRIEVED STIMULUS\n\n\n\n\n\nRETRIEVED CONTEXT->HIDDEN LAYER\n\n\n\n\n\nRETRIEVED STIMULUS->HIDDEN LAYER\n\n\n\n\n\nREAD/WRITE CONTROLLER\n\nREAD/WRITE CONTROLLER\n\n\n\nREAD/WRITE CONTROLLER->EPISODIC MEMORY (dict)\n\n\n\n\n\n\nOBJECTIVE MECHANISM\n\nOBJECTIVE MECHANISM\n\n\n\nOBJECTIVE MECHANISM->READ/WRITE CONTROLLER\n\n\n\n\n\nDECISION LAYER\n\nDECISION LAYER\n\n\n\nDECISION LAYER->OBJECTIVE MECHANISM\n\n\n\n\n\nHIDDEN LAYER->DECISION LAYER\n\n\n\n\n\n" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nback_model.show_graph(output_fmt='jupyter')" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Train the model:" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "ffn = nback_model.nodes['WORKING MEMORY (fnn)']\n", + "train_network(ffn, num_epochs=100)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "run_model(nback_model)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/Scripts/Models (Under Development)/N-back.py b/Scripts/Models (Under Development)/N-back/Nback.py similarity index 84% rename from Scripts/Models (Under Development)/N-back.py rename to Scripts/Models (Under Development)/N-back/Nback.py index a7546baa6e9..abd8173c02a 100644 --- a/Scripts/Models (Under Development)/N-back.py +++ b/Scripts/Models (Under Development)/N-back/Nback.py @@ -35,19 +35,17 @@ - the stim+context input vector (length 90) projects to a hidden layer (length 80); - the task input vector (length 2) projects to a different hidden layer (length 80); - those two hidden layers project (over fixed, nonlearnable, one-one-projections?) to a third hidden layer (length 80) that simply sums them; - - the third hidden layer projections to the length 2 output layer; + - the third hidden layer projects to the length 2 output layer; - a softmax is taken over the output layer to determine the response. - - softmax temp on output/decision layer: 1 - - confirm that ReLUs all use 0 thresholds and unit slope + - fix: were biases trained? - training: - learning rate: 0.001; epoch: 1 trial per epoch of training - - state_dict with weights (still needed) + - fix: state_dict with weights (still needed) - get empirical stimulus sequences (still needed) - put N-back script (with pointer to latest version on PNL) in nback-paper repo - - get rid of objective_mechanism (see "VERSION *WITHOUT* ObjectiveMechanism" under control(...) (fix bug) - - make termination processing part of the Composition definition (fix bug) - - pass learning_rate as parameter to train_network() (add feature) - - fix warnings on run + - fix: get rid of objective_mechanism (see "VERSION *WITHOUT* ObjectiveMechanism" under control(...) + - fix: warnings on run + - complete documentation in BeukersNbackModel.rst - validate against nback-paper results - after validation: - try with STIM_SIZE = NUM_STIMS rather than 20 (as in nback-paper) @@ -62,8 +60,6 @@ import numpy as np # Settings for running script: -TRAIN = True -RUN = False DISPLAY_MODEL = False # show visual graphic of model # PARAMETERS ------------------------------------------------------------------------------------------------------- @@ -85,22 +81,22 @@ RETRIEVAL_HAZARD_RATE=0.04 # rate of re=sampling of em following non-match determination in a pass through ffn RETRIEVAL_STIM_WEIGHT=.05 # weighting of stimulus field in retrieval from em RETRIEVAL_CONTEXT_WEIGHT = 1-RETRIEVAL_STIM_WEIGHT # weighting of context field in retrieval from em -DECISION_SOFTMAX_TEMP=1/8 # express as gain # binarity of decision process +DECISION_SOFTMAX_TEMP=1 # Training parameters: -NUM_EPOCHS=10 # nback-paper: 400,000, one trial per epoch -LEARNING_RATE=0.1 # nback-paper: .001 +NUM_EPOCHS=3 # nback-paper: 400,000 @ one trial per epoch = 2,500 @ 160 trials per epoch +LEARNING_RATE=0.01 # nback-paper: .001 # Execution parameters: CONTEXT_DRIFT_RATE=.1 # drift rate used for DriftOnASphereIntegrator (function of Context mech) on each trial -NUM_TRIALS = 48 # number of stimuli presented in a trial sequence +NUM_TRIALS = 48 # number of stimuli presented in a trial sequence for a given nback_level during run REPORT_OUTPUT = ReportOutput.OFF # Sets console output during run -REPORT_PROGRESS = ReportProgress.ON # Sets console progress bar during run -REPORT_LEARNING = ReportLearning.ON # Sets console progress bar during training -ANIMATE = True # {UNIT:EXECUTION_SET} # Specifies whether to generate animation of execution +REPORT_PROGRESS = ReportProgress.OFF # Sets console progress bar during run +REPORT_LEARNING = ReportLearning.OFF # Sets console progress bar during training +ANIMATE = False # {UNIT:EXECUTION_SET} # Specifies whether to generate animation of execution # Names of Compositions and Mechanisms: -NBACK_MODEL = "N-Back Model" +NBACK_MODEL = "N-back Model" FFN_COMPOSITION = "WORKING MEMORY (fnn)" FFN_STIMULUS_INPUT = "CURRENT STIMULUS" FFN_CONTEXT_INPUT = "CURRENT CONTEXT" @@ -129,6 +125,8 @@ def construct_model(stim_size = STIM_SIZE, decision_softmax_temp = DECISION_SOFTMAX_TEMP): """Construct nback_model""" + print(f'constructing {FFN_COMPOSITION}...') + # FEED FORWARD NETWORK ----------------------------------------- # inputs: encoding of current stimulus and context, retrieved stimulus and retrieved context, @@ -161,7 +159,7 @@ def construct_model(stim_size = STIM_SIZE, input_retrieved_context, input_task}, hidden, decision], - RANDOM_WEIGHTS_INITIALIZATION, + RANDOM_WEIGHTS_INITIALIZATION, ), name=FFN_COMPOSITION, learning_rate=LEARNING_RATE @@ -169,15 +167,17 @@ def construct_model(stim_size = STIM_SIZE, # FULL MODEL (Outer Composition, including input, EM and control Mechanisms) ------------------------ + print(f'constructing {NBACK_MODEL}...') + # Stimulus Encoding: takes STIM_SIZE vector as input - stim = TransferMechanism(name=MODEL_STIMULUS_INPUT, size=STIM_SIZE) + stim = TransferMechanism(name=MODEL_STIMULUS_INPUT, size=stim_size) # Context Encoding: takes scalar as drift step for current trial context = ProcessingMechanism(name=MODEL_CONTEXT_INPUT, function=DriftOnASphereIntegrator( - initializer=np.random.random(CONTEXT_SIZE-1), + initializer=np.random.random(context_size-1), noise=context_drift_noise, - dimension=CONTEXT_SIZE)) + dimension=context_size)) # Task: task one-hot indicating n-back (1, 2, 3 etc.) - must correspond to what ffn has been trained to do task = ProcessingMechanism(name=MODEL_TASK_INPUT, @@ -188,11 +188,11 @@ def construct_model(stim_size = STIM_SIZE, # - uses Softmax to retrieve best matching input, subject to weighting of stimulus and context by STIM_WEIGHT em = EpisodicMemoryMechanism(name=EM, input_ports=[{NAME:"STIMULUS_FIELD", - SIZE:STIM_SIZE}, + SIZE:stim_size}, {NAME:"CONTEXT_FIELD", - SIZE:CONTEXT_SIZE}], + SIZE:context_size}], function=ContentAddressableMemory( - initializer=[[[0]*STIM_SIZE, [0]*CONTEXT_SIZE]], + initializer=[[[0]*stim_size, [0]*context_size]], distance_field_weights=[retrieval_stimulus_weight, retrieval_context_weight], # equidistant_entries_select=NEWEST, @@ -211,7 +211,8 @@ def construct_model(stim_size = STIM_SIZE, # - continue trial control = ControlMechanism(name=CONTROLLER, default_variable=[[1]], # Ensure EM[store_prob]=1 at beginning of first trial - # # VERSION *WITH* ObjectiveMechanism: + # --------- + # VERSION *WITH* ObjectiveMechanism: objective_mechanism=ObjectiveMechanism(name="OBJECTIVE MECHANISM", monitor=decision, # Outcome=1 if match, else 0 @@ -219,20 +220,21 @@ def construct_model(stim_size = STIM_SIZE, # Set ControlSignal for EM[store_prob] function=lambda outcome: int(bool(outcome) or (np.random.random() > retrieval_hazard_rate)), + # --------- # # VERSION *WITHOUT* ObjectiveMechanism: # monitor_for_control=decision, # # Set Evaluate outcome and set ControlSignal for EM[store_prob] # # - outcome is received from decision as one hot in the form: [[match, no-match]] # function=lambda outcome: int(int(outcome[0][1]>outcome[0][0]) - # or (np.random.random() > HAZARD_RATE)), + # or (np.random.random() > retrieval_hazard_rate)), + # --------- control=(STORAGE_PROB, em)) nback_model = Composition(name=NBACK_MODEL, nodes=[stim, context, task, ffn, em, control], - # # # Terminate trial if value of control is still 1 after first pass through execution - # # FIX: STOPS AFTER ~ NUMBER OF TRIALS (?90+); SHOULD BE: NUM_TRIALS*NUM_NBACK_LEVELS + 1 - # termination_processing={TimeScale.TRIAL: And(Condition(lambda: control.value), - # AfterPass(0, TimeScale.TRIAL))}, + # Terminate trial if value of control is still 1 after first pass through execution + termination_processing={TimeScale.TRIAL: And(Condition(lambda: control.value), + AfterPass(0, TimeScale.TRIAL))}, ) # # Terminate trial if value of control is still 1 after first pass through execution # # FIX: ALL OF THE FOLLOWING STOP AFTER ~ NUMBER OF TRIALS (?90+); SHOULD BE: NUM_TRIALS*NUM_NBACK_LEVELS + 1 @@ -256,6 +258,7 @@ def construct_model(stim_size = STIM_SIZE, # show_dimensions=True ) + print(f'full model constructed') return nback_model # ==========================================STIMULUS GENERATION ======================================================= @@ -445,18 +448,41 @@ def get_training_inputs(network, num_epochs, nback_levels): TARGETS: {network.nodes[FFN_OUTPUT]: target}, EPOCHS: num_epochs} - return training_set + batch_size = len(target) + print(f'num trials (batch_size): {len(target)}') + return training_set, batch_size # ======================================== MODEL EXECUTION ============================================================ def train_network(network, learning_rate=LEARNING_RATE, num_epochs=NUM_EPOCHS): - training_set = get_training_inputs(network=network, num_epochs=num_epochs, nback_levels=NBACK_LEVELS) + print(f"constructing training_set for '{network.name}'...") + training_set, batch_size = get_training_inputs(network=network, + num_epochs=num_epochs, + nback_levels=NBACK_LEVELS) + print(f'training_set constructed: {len(training_set)}') + print(f"\ntraining '{network.name}'...") + import timeit + start_time = timeit.default_timer() network.learn(inputs=training_set, - minibatch_size=NUM_TRIALS, + minibatch_size=batch_size, + report_progress=REPORT_PROGRESS, # report_learning=REPORT_LEARNING, + learning_rate=learning_rate, execution_mode=ExecutionMode.LLVMRun) + stop_time = timeit.default_timer() + print(f"'{network.name}' trained") + training_time = stop_time-start_time + if training_time <= 60: + training_time_str = f'{int(training_time)} seconds' + else: + training_time_str = f'{int(training_time/60)} minutes' + print(f'training time: {training_time_str} for {num_epochs} epochs') + # path = network.save() + # print(f'saved weights sample: {network.nodes[FFN_HIDDEN].path_afferents[0].matrix.base[0][:3]}...') + # network.load(path) + # print(f'loaded weights sample: {network.nodes[FFN_HIDDEN].path_afferents[0].matrix.base[0][:3]}...') def run_model(model, context_drift_rate=CONTEXT_DRIFT_RATE, @@ -465,12 +491,9 @@ def run_model(model, report_progress=REPORT_PROGRESS, animate=ANIMATE ): + print('nback_model executing...') for nback_level in NBACK_LEVELS: model.run(inputs=get_run_inputs(model, nback_level, context_drift_rate, num_trials), - # FIX: MOVE THIS TO MODEL CONSTRUCTION ONCE THAT WORKS - # Terminate trial if value of control is still 1 after first pass through execution - termination_processing={TimeScale.TRIAL: And(Condition(lambda: model.nodes[CONTROLLER].value), - AfterPass(0, TimeScale.TRIAL))}, # function arg report_output=report_output, report_progress=report_progress, animate=animate @@ -478,52 +501,7 @@ def run_model(model, # FIX: RESET MEMORY HERE? # print("Number of entries in EM: ", len(model.nodes[EM].memory)) assert len(model.nodes[EM].memory) == NUM_TRIALS*NUM_NBACK_LEVELS + 1 - - -nback_model = construct_model() -print('nback_model constructed') -if TRAIN: - print('nback_model training...') - train_network(nback_model.nodes[FFN_COMPOSITION]) - print('nback_model trained') -if RUN: - print('nback_model executing...') - run_model(nback_model) if REPORT_PROGRESS == ReportProgress.ON: print('\n') print(f'nback_model done: {len(nback_model.results)} trials executed') - -# =========================================================================== - -# TEST OF SPHERICAL DRIFT: -# stims = np.array([x[0] for x in em.memory]) -# contexts = np.array([x[1] for x in em.memory]) -# cos = Distance(metric=COSINE) -# dist = Distance(metric=EUCLIDEAN) -# diffs = [np.sum([contexts[i+1] - contexts[1]]) for i in range(NUM_TRIALS)] -# diffs_1 = [np.sum([contexts[i+1] - contexts[i]]) for i in range(NUM_TRIALS)] -# diffs_2 = [np.sum([contexts[i+2] - contexts[i]]) for i in range(NUM_TRIALS-1)] -# dots = [[contexts[i+1] @ contexts[1]] for i in range(NUM_TRIALS)] -# dot_diffs_1 = [[contexts[i+1] @ contexts[i]] for i in range(NUM_TRIALS)] -# dot_diffs_2 = [[contexts[i+2] @ contexts[i]] for i in range(NUM_TRIALS-1)] -# angle = [cos([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)] -# angle_1 = [cos([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)] -# angle_2 = [cos([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)] -# euclidean = [dist([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)] -# euclidean_1 = [dist([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)] -# euclidean_2 = [dist([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)] -# print("STIMS:", stims, "\n") -# print("DIFFS:", diffs, "\n") -# print("DIFFS 1:", diffs_1, "\n") -# print("DIFFS 2:", diffs_2, "\n") -# print("DOT PRODUCTS:", dots, "\n") -# print("DOT DIFFS 1:", dot_diffs_1, "\n") -# print("DOT DIFFS 2:", dot_diffs_2, "\n") -# print("ANGLE: ", angle, "\n") -# print("ANGLE_1: ", angle_1, "\n") -# print("ANGLE_2: ", angle_2, "\n") -# print("EUCILDEAN: ", euclidean, "\n") -# print("EUCILDEAN 1: ", euclidean_1, "\n") -# print("EUCILDEAN 2: ", euclidean_2, "\n") - -# n_back_model() + print(f'results: \n{model.results}') diff --git a/Scripts/Models (Under Development)/N-back/SphericalDrift Tests.py b/Scripts/Models (Under Development)/N-back/SphericalDrift Tests.py new file mode 100644 index 00000000000..3fb2cbed191 --- /dev/null +++ b/Scripts/Models (Under Development)/N-back/SphericalDrift Tests.py @@ -0,0 +1,34 @@ +import numpy as np +from psyneulink import * + +NUM_TRIALS = 48 + +stims = np.array([x[0] for x in em.memory]) +contexts = np.array([x[1] for x in em.memory]) +cos = Distance(metric=COSINE) +dist = Distance(metric=EUCLIDEAN) +diffs = [np.sum([contexts[i+1] - contexts[1]]) for i in range(NUM_TRIALS)] +diffs_1 = [np.sum([contexts[i+1] - contexts[i]]) for i in range(NUM_TRIALS)] +diffs_2 = [np.sum([contexts[i+2] - contexts[i]]) for i in range(NUM_TRIALS-1)] +dots = [[contexts[i+1] @ contexts[1]] for i in range(NUM_TRIALS)] +dot_diffs_1 = [[contexts[i+1] @ contexts[i]] for i in range(NUM_TRIALS)] +dot_diffs_2 = [[contexts[i+2] @ contexts[i]] for i in range(NUM_TRIALS-1)] +angle = [cos([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)] +angle_1 = [cos([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)] +angle_2 = [cos([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)] +euclidean = [dist([contexts[i+1], contexts[1]]) for i in range(NUM_TRIALS)] +euclidean_1 = [dist([contexts[i+1], contexts[i]]) for i in range(NUM_TRIALS)] +euclidean_2 = [dist([contexts[i+2], contexts[i]]) for i in range(NUM_TRIALS-1)] +print("STIMS:", stims, "\n") +print("DIFFS:", diffs, "\n") +print("DIFFS 1:", diffs_1, "\n") +print("DIFFS 2:", diffs_2, "\n") +print("DOT PRODUCTS:", dots, "\n") +print("DOT DIFFS 1:", dot_diffs_1, "\n") +print("DOT DIFFS 2:", dot_diffs_2, "\n") +print("ANGLE: ", angle, "\n") +print("ANGLE_1: ", angle_1, "\n") +print("ANGLE_2: ", angle_2, "\n") +print("EUCILDEAN: ", euclidean, "\n") +print("EUCILDEAN 1: ", euclidean_1, "\n") +print("EUCILDEAN 2: ", euclidean_2, "\n") diff --git a/Scripts/Models (Under Development)/N-back/WORKING MEMORY (fnn)_matrix_wts_20stim_2500ep.pnl b/Scripts/Models (Under Development)/N-back/WORKING MEMORY (fnn)_matrix_wts_20stim_2500ep.pnl new file mode 100644 index 00000000000..2a46665f63c Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/WORKING MEMORY (fnn)_matrix_wts_20stim_2500ep.pnl differ diff --git a/Scripts/Models (Under Development)/N-back/__init__.py b/Scripts/Models (Under Development)/N-back/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/Scripts/Models (Under Development)/N-back/ffn.wts_nep_1_lr_01.pnl b/Scripts/Models (Under Development)/N-back/ffn.wts_nep_1_lr_01.pnl new file mode 100644 index 00000000000..4903636b03d Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/ffn.wts_nep_1_lr_01.pnl differ diff --git a/Scripts/Models (Under Development)/N-back/ffn.wts_nep_6250_lr_01.pnl b/Scripts/Models (Under Development)/N-back/ffn.wts_nep_6250_lr_01.pnl new file mode 100644 index 00000000000..cb1e3a49a5e Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/ffn.wts_nep_6250_lr_01.pnl differ diff --git a/Scripts/Models (Under Development)/N-back/nback.results_nep_1_lr_01.pnl.npy b/Scripts/Models (Under Development)/N-back/nback.results_nep_1_lr_01.pnl.npy new file mode 100644 index 00000000000..dc1b2a21074 Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/nback.results_nep_1_lr_01.pnl.npy differ diff --git a/Scripts/Models (Under Development)/N-back/nback.results_nep_6250_lr_01.pnl.npy b/Scripts/Models (Under Development)/N-back/nback.results_nep_6250_lr_01.pnl.npy new file mode 100644 index 00000000000..f0a8a235271 Binary files /dev/null and b/Scripts/Models (Under Development)/N-back/nback.results_nep_6250_lr_01.pnl.npy differ diff --git a/Scripts/Models (Under Development)/WORKING MEMORY (fnn)_matrix_wts.pnl b/Scripts/Models (Under Development)/WORKING MEMORY (fnn)_matrix_wts.pnl new file mode 100644 index 00000000000..f94b91cb028 Binary files /dev/null and b/Scripts/Models (Under Development)/WORKING MEMORY (fnn)_matrix_wts.pnl differ diff --git a/Scripts/Models (Under Development)/ffn.wts.pnl b/Scripts/Models (Under Development)/ffn.wts.pnl new file mode 100644 index 00000000000..072920a24fe Binary files /dev/null and b/Scripts/Models (Under Development)/ffn.wts.pnl differ diff --git a/Scripts/Models (Under Development)/ffn.wts_01.pnl b/Scripts/Models (Under Development)/ffn.wts_01.pnl new file mode 100644 index 00000000000..20016bdf831 Binary files /dev/null and b/Scripts/Models (Under Development)/ffn.wts_01.pnl differ diff --git a/Scripts/Models (Under Development)/ffn.wts_nep_1_lr_01.pnl b/Scripts/Models (Under Development)/ffn.wts_nep_1_lr_01.pnl new file mode 100644 index 00000000000..f5f9c4d160b Binary files /dev/null and b/Scripts/Models (Under Development)/ffn.wts_nep_1_lr_01.pnl differ diff --git a/autodiff_composition_matrix_wts.pnl b/autodiff_composition_matrix_wts.pnl new file mode 100644 index 00000000000..4053d03da1d Binary files /dev/null and b/autodiff_composition_matrix_wts.pnl differ diff --git a/psyneulink/core/components/functions/nonstateful/learningfunctions.py b/psyneulink/core/components/functions/nonstateful/learningfunctions.py index e1d9b5ab1a7..49f15cf4f8d 100644 --- a/psyneulink/core/components/functions/nonstateful/learningfunctions.py +++ b/psyneulink/core/components/functions/nonstateful/learningfunctions.py @@ -1123,7 +1123,6 @@ def _function(self, # then need to assign it to the default value # If learning_rate was not specified for instance or composition, use default value learning_rate = self._get_current_parameter_value(LEARNING_RATE, context) - # learning_rate = self.learning_rate if learning_rate is None: learning_rate = self.defaults.learning_rate # diff --git a/psyneulink/core/components/functions/nonstateful/transferfunctions.py b/psyneulink/core/components/functions/nonstateful/transferfunctions.py index 9e2d9fb3939..774da8a96ba 100644 --- a/psyneulink/core/components/functions/nonstateful/transferfunctions.py +++ b/psyneulink/core/components/functions/nonstateful/transferfunctions.py @@ -1620,8 +1620,8 @@ def derivative(self, input, output=None, context=None): # # MODIFIED 11/5/22 NEW: # bias = self._get_current_parameter_value(BIAS, context) # input = np.asarray(input).copy() - # input[(input-bias)>0] = gain - # input[(input-bias)<=0] = gain * leak + # input[(input - bias) > 0] = gain + # input[(input - bias) <= 0] = gain * leak # MODIFIED 11/5/22 END return input diff --git a/psyneulink/core/components/functions/stateful/memoryfunctions.py b/psyneulink/core/components/functions/stateful/memoryfunctions.py index c6fb7d67731..5c13c251278 100644 --- a/psyneulink/core/components/functions/stateful/memoryfunctions.py +++ b/psyneulink/core/components/functions/stateful/memoryfunctions.py @@ -466,7 +466,7 @@ class ContentAddressableMemory(MemoryFunction): # ------------------------------ An entry is stored and retrieved as an array containing a set of `fields ` each of which is a 1d array. An array containing such entries can be used to initialize the contents of `memory ` by providing it in the **initializer** argument of the ContentAddressableMemory's - constructor, or in a call to its `reset ` method. The current contents of `memory + constructor, or in a call to its `reset ` method. The current contents of `memory ` can be inspected using the `memory ` attribute, which returns a list containing the current entries, each as a list containing all fields for that entry. The `memory_num_fields ` contains the number of fields expected for each diff --git a/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py b/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py index 2ae1da4c11b..e8cfca7b532 100644 --- a/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py +++ b/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py @@ -1313,7 +1313,7 @@ def _execute( # Get error_signals (from ERROR_SIGNAL InputPorts) and error_matrices relevant for the current execution: error_signal_indices = self.error_signal_indices error_signal_inputs = variable[error_signal_indices] - # FIX 7/22/19 [JDC]: MOVE THIS TO ITS OWN METHOD CALLED ON INITALIZATION AND UPDTATED AS NECESSARY + # FIX 7/22/19 [JDC]: MOVE THIS TO ITS OWN METHOD CALLED ON INITALIZATION AND UPDATED AS NECESSARY if self.error_matrices is None: # KAM 6/28/19 Hack to get the correct shape and contents for initial error matrix in backprop if self.function is BackPropagation or isinstance(self.function, BackPropagation): @@ -1354,7 +1354,6 @@ def _execute( ] ) learning_signal, error_signal = super()._execute(variable=function_variable, - # MODIFIED CROSS_PATHWAYS 7/22/19 END context=context, error_matrix=error_matrix, runtime_params=runtime_params, @@ -1368,7 +1367,7 @@ def _execute( and self.initialization_status != ContextFlags.INITIALIZING): print("\n{} weight change matrix: \n{}\n".format(self.name, summed_learning_signal)) - # Durning initialization return zeros so that the first "real" trial for Backprop does not start + # During initialization return zeros so that the first "real" trial for Backprop does not start # with the error computed during initialization if (self.in_composition and isinstance(self.function, BackPropagation) and diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py index 724e3f2f403..9dcfc218ed7 100644 --- a/psyneulink/core/compositions/composition.py +++ b/psyneulink/core/compositions/composition.py @@ -3813,6 +3813,7 @@ def __init__( self._partially_added_nodes = [] self.disable_learning = disable_learning + self._runtime_learning_rate = None # graph and scheduler status attributes self.graph_consistent = True # Tracks if Composition is in runnable state (no dangling projections (what else?) @@ -10178,6 +10179,7 @@ def learn( targets: tc.optional(dict) = None, num_trials: tc.optional(int) = None, epochs: int = 1, + learning_rate = None, minibatch_size: int = 1, patience: tc.optional(int) = None, min_delta: int = 0, @@ -10226,6 +10228,12 @@ def learn( epochs : int (default=1) specifies the number of training epochs (that is, repetitions of the batched input set) to run with + learning_rate : float : default None + specifies the learning_rate used by all `learning pathways ` + when the Composition's learn method is called. This overrides the `learning_rate specified + for any individual Pathways at construction, but only applies for the current execution of + the learn method. + minibatch_size : int (default=1) specifies the size of the minibatches to use. The input trials will be batched and run, after which learning mechanisms with learning mode TRIAL will update weights @@ -10315,6 +10323,7 @@ def learn( targets=targets, num_trials=num_trials, epochs=epochs, + learning_rate=learning_rate, minibatch_size=minibatch_size, patience=patience, min_delta=min_delta, @@ -11210,7 +11219,7 @@ def execute( return self.get_output_values(context) def __call__(self, *args, **kwargs): - """Execute Composition of any args are provided; else simply return results of last execution. + """Execute Composition if any args are provided; else simply return results of last execution. This allows Composition, after it has been constructed, to be run simply by calling it directly. """ if not args and not kwargs: diff --git a/psyneulink/core/globals/utilities.py b/psyneulink/core/globals/utilities.py index 0adb9969835..61c987cb823 100644 --- a/psyneulink/core/globals/utilities.py +++ b/psyneulink/core/globals/utilities.py @@ -442,6 +442,16 @@ def iscompatible(candidate, reference=None, **kargs): warnings.simplefilter(action='ignore', category=FutureWarning) if reference is not None and (candidate == reference): return True + # if reference is not None: + # if (isinstance(reference, (bool, int, float)) + # and isinstance(candidate, (bool, int, float)) + # and candidate == reference): + # return True + # elif (isinstance(reference, (list, np.ndarray)) + # and isinstance(candidate, (list, np.ndarray)) and (candidate == reference).all()): + # return True + # elif is_iterable(reference) and is_iterable(candidate) and (candidate == reference): + # return True except ValueError: # raise UtilitiesError("Could not compare {0} and {1}".format(candidate, reference)) # IMPLEMENTATION NOTE: np.array generates the following error: diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py index 7a001ae3b75..9c3518e2237 100644 --- a/psyneulink/library/compositions/autodiffcomposition.py +++ b/psyneulink/library/compositions/autodiffcomposition.py @@ -94,9 +94,10 @@ Logging ~~~~~~~ -Logging in AutodiffCompositions follows the same procedure as `logging in a Composition `. However, since an AutodiffComposition internally converts all of its mechanisms to an equivalent PyTorch model, -then its inner components are not actually executed. This means that there is limited support for logging parameters of components inside an AutodiffComposition; -Currently, the only supported parameters are: +Logging in AutodiffCompositions follows the same procedure as `logging in a Composition `. +However, since an AutodiffComposition internally converts all of its mechanisms to an equivalent PyTorch model, +then its inner components are not actually executed. This means that there is limited support for +logging parameters of components inside an AutodiffComposition; Currently, the only supported parameters are: 1) the `matrix` parameter of Projections @@ -132,8 +133,9 @@ """ import logging - +import os import numpy as np +from pathlib import Path, PosixPath try: import torch @@ -146,6 +148,9 @@ from psyneulink.library.compositions.pytorchmodelcreator import PytorchModelCreator from psyneulink.library.components.mechanisms.processing.objective.comparatormechanism import ComparatorMechanism +from psyneulink.core.components.mechanisms.processing.compositioninterfacemechanism import CompositionInterfaceMechanism +from psyneulink.core.components.mechanisms.modulatory.modulatorymechanism import ModulatoryMechanism_Base +from psyneulink.core.components.projections.modulatory.modulatoryprojection import ModulatoryProjection_Base from psyneulink.core.compositions.composition import Composition, NodeRole from psyneulink.core.compositions.composition import CompositionError from psyneulink.core.compositions.report \ @@ -159,6 +164,7 @@ from psyneulink.core import llvm as pnlvm + logger = logging.getLogger(__name__) @@ -185,7 +191,7 @@ class AutodiffComposition(Composition): --------- learning_rate : float : default 0.001 - the learning rate, which is passed to the optimizer. + the learning rate passed to the optimizer if none is specified in the learn method of the AutodiffComposition. disable_learning : bool: default False specifies whether the AutodiffComposition should disable learning when run in `learning mode @@ -259,6 +265,7 @@ def __init__(self, self.force_no_retain_graph = force_no_retain_graph self.loss = None self.disable_learning = disable_learning + self._runtime_learning_rate = None # keeps track of average loss per epoch self.losses = [] @@ -276,10 +283,10 @@ def __init__(self, # CLEANUP: move some of what's done in the methods below to a "validate_params" type of method @handle_external_context() - def _build_pytorch_representation(self, context=None): + def _build_pytorch_representation(self, context=None, refresh=False): if self.scheduler is None: self.scheduler = Scheduler(graph=self.graph_processing) - if self.parameters.pytorch_representation._get(context=context) is None: + if self.parameters.pytorch_representation._get(context=context) is None or refresh: model = PytorchModelCreator(composition=self, device=self.device, context=context) @@ -288,8 +295,9 @@ def _build_pytorch_representation(self, context=None): # Set up optimizer function old_opt = self.parameters.optimizer._get(context) - if old_opt is None: - opt = self._make_optimizer(self.optimizer_type, self.learning_rate, self.weight_decay, context) + learning_rate = self._runtime_learning_rate or self.learning_rate + if old_opt is None or refresh: + opt = self._make_optimizer(self.optimizer_type, learning_rate, self.weight_decay, context) self.parameters.optimizer._set(opt, context, skip_history=True, skip_log=True) # Set up loss function @@ -355,7 +363,10 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None): # compute total loss across output neurons for current trial tracked_loss = self.parameters.tracked_loss._get(context) if tracked_loss is None: - self.parameters.tracked_loss._set(torch.zeros(1, device=self.device).double(), context=context, skip_history=True, skip_log=True) + self.parameters.tracked_loss._set(torch.zeros(1, device=self.device).double(), + context=context, + skip_history=True, + skip_log=True) tracked_loss = self.parameters.tracked_loss._get(context) curr_tensor_inputs = {} @@ -368,10 +379,9 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None): curr_tensor_targets[component] = torch.tensor(target, device=self.device).double() # do forward computation on current inputs - curr_tensor_outputs = self.parameters.pytorch_representation._get(context).forward( - curr_tensor_inputs, - context, - ) + curr_tensor_outputs = self.parameters.pytorch_representation._get(context).forward(curr_tensor_inputs, + context, + ) for component in curr_tensor_outputs.keys(): # possibly add custom loss option, which is a loss function that takes many args @@ -385,7 +395,10 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None): component = input_port.all_afferents[0].sender.owner outputs.append(curr_tensor_outputs[component].detach().cpu().numpy().copy()) - self.parameters.tracked_loss_count._set(self.parameters.tracked_loss_count._get(context=context) + 1, context=context, skip_history=True, skip_log=True) + self.parameters.tracked_loss_count._set(self.parameters.tracked_loss_count._get(context=context) + 1, + context=context, + skip_history=True, + skip_log=True) return outputs def clear_losses(self, context=None): @@ -394,7 +407,7 @@ def clear_losses(self, context=None): def _update_learning_parameters(self, context): """ - Updates parameters based on trials ran since last update. + Updates parameters based on trials run since last update. """ optimizer = self.parameters.optimizer._get(context=context) optimizer.zero_grad() @@ -563,6 +576,120 @@ def execute(self, report_num=report_num ) + @handle_external_context(fallback_most_recent=True) + def save(self, path:PosixPath=None, directory:str=None, filename:str=None, context=None): + """Saves all weight matrices for all MappingProjections in the AutodiffComposition + + Arguments + --------- + path: Path, PosixPath or str : default None + path specification; must be a legal path specification in the filesystem. + directory: str : default ``current working directory`` + directory where `matrices ` for all MappingProjections + in the AutodiffComposition are saved. + filename: str : default ``_matrix_wts.pnl`` + filename in which `matrices ` for all MappingProjections + in the AutodiffComposition are saved. + .. note:: + Matrices are saved in + `PyTorch state_dict `_ format. + + Return + ------ + Path + + """ + if path: + try: + path = Path(path) + except: + raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) " + f"is not a legal path.") + else: + try: + if directory: + path = Path(directory) + else: + path = Path(os.getcwd()) + if filename: + # path = Path(path / filename) + path = Path(os.path.join(path / filename)) + else: + path = Path(os.path.join(path / f'{self.name}_matrix_wts.pnl')) + except IsADirectoryError: + raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) " + f"is not a legal path.") + proj_state = { + # p.name: p.parameters.matrix.get(context=context) + p.name: p.matrix.base + for p in self.projections + if not (isinstance(p, ModulatoryProjection_Base) + or isinstance(p.sender.owner, CompositionInterfaceMechanism) + or isinstance(p.receiver.owner, CompositionInterfaceMechanism) + or isinstance(p.sender.owner, ModulatoryMechanism_Base) + or isinstance(p.receiver.owner, ModulatoryMechanism_Base) + or p.sender.owner in self.get_nodes_by_role(NodeRole.LEARNING) + or p.receiver.owner in self.get_nodes_by_role(NodeRole.LEARNING) + )} + torch.save(proj_state, path) + return path + + @handle_external_context(fallback_most_recent=True) + def load(self, path:PosixPath=None, directory:str=None, filename:str=None, context=None): + """Loads all weights matrices for all MappingProjections in the AutodiffComposition from file + Arguments + --------- + path: Path : default None + Path for file in which `MappingProjection` `matrices ` are stored. + This must be a legal PosixPath object; if it is specified **directory** and **filename** are ignored. + directory: str : default ``current working directory`` + directory where `MappingProjection` `matrices ` are stored. + filename: str : default ``_matrix_wts.pnl`` + name of file in which `MappingProjection` `matrices ` are stored. + .. note:: + Matrices must be stored in + `PyTorch state_dict `_ format. + """ + if path: + if not isinstance(path,Path): + raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) " + f"is not a legal path.") + else: + try: + if directory: + path = Path(directory) + else: + path = Path(os.getcwd()) + if filename: + # path = Path(path / filename) + path = Path(os.path.join(path / filename)) + else: + # path = Path(path / f'{self.name}_matrix_wts.pnl') + path = Path(os.path.join(path , f'{self.name}_matrix_wts.pnl')) + except IsADirectoryError: + raise AutodiffCompositionError(f"'{path}' (for saving weight matrices of ({self.name}) " + f"is not a legal path.") + state = torch.load(path) + for projection in [p for p in self.projections + if not (isinstance(p, ModulatoryProjection_Base) + or isinstance(p.sender.owner, CompositionInterfaceMechanism) + or isinstance(p.receiver.owner, CompositionInterfaceMechanism) + or isinstance(p.sender.owner, ModulatoryMechanism_Base) + or isinstance(p.receiver.owner, ModulatoryMechanism_Base) + or p.sender.owner in self.get_nodes_by_role(NodeRole.LEARNING) + or p.receiver.owner in self.get_nodes_by_role(NodeRole.LEARNING) + )]: + matrix = state[projection.name] + if np.array(matrix).shape != projection.matrix.base.shape: + raise AutodiffCompositionError(f"Shape of matrix loaded for '{projection.name}' " + f"({np.array(matrix).shape}) " + f"does not match its shape ({projection.matrix.base.shape})") + projection.matrix.base = matrix + projection.parameters.matrix.set(matrix, context=context, override=True) + projection.parameter_ports['matrix'].parameters.value.set(matrix, context=context, override=True) + self._build_pytorch_representation(context=context, refresh=True) + # MODIFIED 11/8/22 END + def _get_state_ids(self): return super()._get_state_ids() + ["optimizer"] diff --git a/psyneulink/library/compositions/compositionrunner.py b/psyneulink/library/compositions/compositionrunner.py index d7039a1902e..8e7a757a353 100644 --- a/psyneulink/library/compositions/compositionrunner.py +++ b/psyneulink/library/compositions/compositionrunner.py @@ -129,6 +129,7 @@ def run_learning(self, targets: dict = None, num_trials: int = None, epochs: int = 1, + learning_rate = None, minibatch_size: int = 1, patience: int = None, min_delta: int = 0, @@ -139,7 +140,7 @@ def run_learning(self, execution_mode:pnlvm.ExecutionMode = pnlvm.ExecutionMode.Python, **kwargs): """ - Runs the composition repeatedly with the specified parameters + Runs the composition repeatedly with the specified parameters. Returns --------- @@ -150,6 +151,9 @@ def run_learning(self, else: self._is_llvm_mode = True + # This is used by local learning-related methods to override the default learning_rate set at construction. + self._composition._runtime_learning_rate = learning_rate + # Handle function and generator inputs if isgeneratorfunction(inputs): inputs = inputs() diff --git a/psyneulink/library/compositions/pytorchcomponents.py b/psyneulink/library/compositions/pytorchcomponents.py index 43122730437..e106272d91a 100644 --- a/psyneulink/library/compositions/pytorchcomponents.py +++ b/psyneulink/library/compositions/pytorchcomponents.py @@ -1,4 +1,4 @@ -from psyneulink.core.components.functions.nonstateful.transferfunctions import Linear, Logistic, ReLU +from psyneulink.core.components.functions.nonstateful.transferfunctions import Linear, Logistic, ReLU, SoftMax from psyneulink.library.compositions.pytorchllvmhelper import * from psyneulink.core.globals.log import LogCondition from psyneulink.core import llvm as pnlvm @@ -10,7 +10,8 @@ def pytorch_function_creator(function, device, context=None): """ Converts a PsyNeuLink function into an equivalent PyTorch lambda function. - NOTE: This is needed due to PyTorch limitations (see: https://github.com/PrincetonUniversity/PsyNeuLink/pull/1657#discussion_r437489990) + NOTE: This is needed due to PyTorch limitations + (see: https://github.com/PrincetonUniversity/PsyNeuLink/pull/1657#discussion_r437489990) """ def get_fct_param_value(param_name): val = function._get_current_parameter_value( @@ -38,6 +39,10 @@ def get_fct_param_value(param_name): return lambda x: (torch.max(input=(x - bias), other=torch.tensor([0], device=device).double()) * gain + torch.min(input=(x - bias), other=torch.tensor([0], device=device).double()) * leak) + elif isinstance(function, SoftMax): + gain = get_fct_param_value('gain') + return lambda x: (torch.softmax(x, len(x), other=torch.tensor([0], device=device).double())) + else: raise Exception(f"Function {function} is not currently supported in AutodiffCompositions!") diff --git a/psyneulink/library/compositions/pytorchmodelcreator.py b/psyneulink/library/compositions/pytorchmodelcreator.py index af809613bf4..916dfca438f 100644 --- a/psyneulink/library/compositions/pytorchmodelcreator.py +++ b/psyneulink/library/compositions/pytorchmodelcreator.py @@ -60,7 +60,8 @@ def __init__(self, composition, device, context=None): proj_recv.add_afferent(new_proj) self.projection_map[projection] = new_proj self.projections.append(new_proj) - self.params.append(new_proj.matrix) + + self._regenerate_paramlist() c = Context() try: @@ -81,6 +82,11 @@ def __init__(self, composition, device, context=None): __deepcopy__ = get_deepcopy_with_shared(shared_types=(Component, ComponentsMeta)) + def _regenerate_paramlist(self): + self.params = nn.ParameterList() + for proj in self.projections: + self.params.append(proj.matrix) + # generates llvm function for self.forward def _gen_llvm_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:frozenset): args = [ctx.get_state_struct_type(self._composition).as_pointer(), diff --git a/tests/composition/autodiff_composition_matrix_wts.pnl b/tests/composition/autodiff_composition_matrix_wts.pnl new file mode 100644 index 00000000000..4053d03da1d Binary files /dev/null and b/tests/composition/autodiff_composition_matrix_wts.pnl differ diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py index 2bc81653862..d04ffe6f210 100644 --- a/tests/composition/test_autodiffcomposition.py +++ b/tests/composition/test_autodiffcomposition.py @@ -1,6 +1,6 @@ import logging import timeit as timeit - +import os import numpy as np import pytest @@ -56,6 +56,61 @@ def test_autodiff_forward(autodiff_mode): outputs = xor.run(inputs=[0,0], execution_mode=autodiff_mode) assert np.allclose(outputs, [[0.9479085241082691]]) +@pytest.mark.pytorch +def test_autodiff_saveload(tmp_path): + def create_xor(): + # create xor model mechanisms and projections + xor_in = TransferMechanism(name='xor_in', + default_variable=np.zeros(2)) + + xor_hid = TransferMechanism(name='xor_hid', + default_variable=np.zeros(10), + function=Logistic()) + + xor_out = TransferMechanism(name='xor_out', + default_variable=np.zeros(1), + function=Logistic()) + + hid_map = MappingProjection(matrix=np.random.rand(2,10), name='hid_map') + out_map = MappingProjection(matrix=np.random.rand(10,1), name='out_map') + + # put the mechanisms and projections together in an autodiff composition (AC) + xor = AutodiffComposition() + + xor.add_node(xor_in) + xor.add_node(xor_hid) + xor.add_node(xor_out) + + xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid) + xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out) + return xor + + np.random.seed(0) + xor1 = create_xor() + xor1_outputs = xor1.run(inputs=[0,0]) + + # save + # path = xor1.save() + path = xor1.save(os.path.join(tmp_path, 'xor_1.pnl')) + + # del xor1 + pnl.clear_registry() + + # load + np.random.seed(1) + xor2 = create_xor() + xor2_outputs_pre = xor2.run(inputs=[0,0]) + # xor2.load(os.path.join(tmp_path, 'xor_1.pnl')) + xor2.load(path) + xor2_outputs_post = xor2.run(inputs=[0,0]) + + + # sanity check - make sure xor2 weights differ + assert not np.allclose(xor2_outputs_pre, xor2_outputs_post, atol=1e-9) + + # make sure loaded model is identical, and used during run + assert np.allclose(xor1_outputs, xor2_outputs_post, atol=1e-9) + @pytest.mark.pytorch @pytest.mark.acconstructor class TestACConstructor: