JuliaGraphs · CarloLucibello · Jul 25, 2022 · Jul 22, 2022 · Jul 22, 2022
diff --git a/Project.toml b/Project.toml
@@ -29,7 +29,7 @@ Adapt = "3"
 CUDA = "3.3"
 ChainRulesCore = "1"
 DataStructures = "0.18"
-Flux = "0.13"
+Flux = "0.13.4"
 Functors = "0.2, 0.3"
 Graphs = "1.4"
 KrylovKit = "0.5"

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -54,7 +54,7 @@ model = GNNChain(GCNConv(16 => 64),
                 Dense(64, 1)) |> device
 
 ps = Flux.params(model)
-opt = ADAM(1f-4)
+opt = Adam(1f-4)
 ```
 
 ### Training 

diff --git a/docs/src/tutorials/gnn_intro_pluto.jl b/docs/src/tutorials/gnn_intro_pluto.jl
@@ -266,7 +266,7 @@ Since everything in our model is differentiable and parameterized, we can add so
 Here, we make use of a semi-supervised or transductive learning procedure: We simply train against one node per class, but are allowed to make use of the complete input graph data.
 
 Training our model is very similar to any other Flux model.
-In addition to defining our network architecture, we define a loss criterion (here, `logitcrossentropy` and initialize a stochastic gradient optimizer (here, `ADAM`).
+In addition to defining our network architecture, we define a loss criterion (here, `logitcrossentropy` and initialize a stochastic gradient optimizer (here, `Adam`).
 After that, we perform multiple rounds of optimization, where each round consists of a forward and backward pass to compute the gradients of our model parameters w.r.t. to the loss derived from the forward pass.
 If you are not new to Flux, this scheme should appear familar to you. 
 
@@ -285,7 +285,7 @@ Let us now start training and see how our node embeddings evolve over time (best
 begin
 	model = GCN(num_features, num_classes)
     ps = Flux.params(model)
-    opt = ADAM(1e-2)
+    opt = Adam(1e-2)
 	epochs = 2000
 
 	emb = h

diff --git a/docs/src/tutorials/graph_classification_pluto.jl b/docs/src/tutorials/graph_classification_pluto.jl
@@ -202,7 +202,7 @@ function train!(model; epochs=200, η=1e-2, infotime=10)
 	device = Flux.cpu
 	model = model |> device
 	ps = Flux.params(model)
-    opt = ADAM(1e-3)
+    opt = Adam(1e-3)
 
 
     function report(epoch)

diff --git a/examples/graph_classification_tudataset.jl b/examples/graph_classification_tudataset.jl
@@ -82,7 +82,7 @@ function train(; kws...)
                      Dense(nhidden, 1))  |> device
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
     # LOGGING FUNCTION
 

diff --git a/examples/link_prediction_pubmed.jl b/examples/link_prediction_pubmed.jl
@@ -77,7 +77,7 @@ function train(; kws...)
     pred = DotPredictor()
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
     ### LOSS FUNCTION ############
 

diff --git a/examples/neural_ode_cora.jl b/examples/neural_ode_cora.jl
@@ -48,7 +48,7 @@ model = GNNChain(GCNConv(nin => nhidden, relu),
 ps = Flux.params(model);
 
 # ## Optimizer
-opt = ADAM(0.01)
+opt = Adam(0.01)
 
 
 function eval_loss_accuracy(X, y, mask)

diff --git a/examples/node_classification_cora.jl b/examples/node_classification_cora.jl
@@ -57,7 +57,7 @@ function train(; kws...)
                      Dense(nhidden, nout))  |> device
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
     display(g)
 

diff --git a/perf/neural_ode_mnist.jl b/perf/neural_ode_mnist.jl
@@ -40,7 +40,7 @@ model = Chain(Flux.flatten,
 ps = Flux.params(model);
 
 # ## Optimizer
-opt = ADAM(0.01)
+opt = Adam(0.01)
 
 function eval_loss_accuracy(X, y)
     ŷ = model(X)

diff --git a/perf/node_classification_cora_geometricflux.jl b/perf/node_classification_cora_geometricflux.jl
@@ -59,7 +59,7 @@ function train(; kws...)
                 Dense(nhidden, nout))  |> device
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
     @info g
 

diff --git a/test/examples/node_classification_cora.jl b/test/examples/node_classification_cora.jl
@@ -53,7 +53,7 @@ function train(Layer; verbose=false, kws...)
                      Dense(nhidden, nout))  |> device
 
     ps = Flux.params(model)
-    opt = ADAM(args.η)
+    opt = Adam(args.η)
 
 
     ## TRAINING