From 252212f36b7a8f4b40ece48b842b36977673dbb8 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliahub.com>
Date: Wed, 5 Jun 2024 00:39:33 +0000
Subject: [PATCH] Don't let setglobal! implicitly create bindings

PR #44231 (part of Julia 1.9) introduced the ability to modify globals with `Mod.sym = val` syntax.
However, the intention of this syntax was always to modify *existing* globals in other modules.
Unfortunately, as implemented, it also implicitly creates new bindings in the other module, even
if the binding was not previously declared. This was not intended, but it's a bit of a syntax corner
case, so nobody caught it at the time.

After some extensive discussions and taking into account the near future direction we want to go
with bindings (#54654 for both), the consensus was reached that we should try to undo the implicit
creation of bindings (but not the ability to assign the *value* of globals in other modules).
Note that this was always an error until Julia 1.9, so hopefully it hasn't crept into too many
packages yet. We'll see what pkgeval says. If use is extensive, we may want to consider a softer
removal strategy.

Across base and stdlib, there's two cases affected by this change:
1. A left over debug statement in `precompile` that wanted to assign a new variable in Base for debugging. Removed in this PR.
2. Distributed wanting to create new bindings. This is a legimitate use case for wanting to
   create bindings in other modules. This is fixed in https://github.com/JuliaLang/Distributed.jl/pull/102.

As noted in that PR, the recommended replacement where implicit binding creation is desired is:
```
Core.eval(mod, Expr(:global, sym))
invokelatest(setglobal!, mod, sym, val)
```

The `invokelatest` is not presently required, but may be needed by #54654, so it's included
in the recommendation now.

Fixes #54607
---
 doc/src/manual/embedding.md |  2 +-
 src/builtins.c              | 12 ++++++------
 src/codegen.cpp             | 36 ++++++++++++++++++++++++------------
 src/interpreter.c           |  6 +++++-
 src/julia-syntax.scm        |  1 +
 src/julia.h                 |  2 +-
 src/module.c                |  9 ++++++---
 src/toplevel.c              |  8 ++++----
 test/core.jl                |  1 +
 test/precompile.jl          |  1 -
 test/syntax.jl              | 37 +++++++++++++++++++++++++++++++++++++
 11 files changed, 86 insertions(+), 29 deletions(-)

diff --git a/doc/src/manual/embedding.md b/doc/src/manual/embedding.md
index 9df9a6c1980038..f578e10764101f 100644
--- a/doc/src/manual/embedding.md
+++ b/doc/src/manual/embedding.md
@@ -412,7 +412,7 @@ per pointer using
 ```c
 jl_module_t *mod = jl_main_module;
 jl_sym_t *var = jl_symbol("var");
-jl_binding_t *bp = jl_get_binding_wr(mod, var);
+jl_binding_t *bp = jl_get_binding_wr(mod, var, 1);
 jl_checked_assignment(bp, mod, var, val);
 ```
 
diff --git a/src/builtins.c b/src/builtins.c
index f285a2e5c03867..b3a372cf74acd8 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -1355,7 +1355,7 @@ JL_CALLABLE(jl_f_setglobal)
         jl_atomic_error("setglobal!: module binding cannot be written non-atomically");
     else if (order >= jl_memory_order_seq_cst)
         jl_fence();
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     jl_checked_assignment(b, mod, var, args[2]); // release store
     if (order >= jl_memory_order_seq_cst)
         jl_fence();
@@ -1393,7 +1393,7 @@ JL_CALLABLE(jl_f_set_binding_type)
     JL_TYPECHK(set_binding_type!, symbol, (jl_value_t*)s);
     jl_value_t *ty = nargs == 2 ? (jl_value_t*)jl_any_type : args[2];
     JL_TYPECHK(set_binding_type!, type, ty);
-    jl_binding_t *b = jl_get_binding_wr(m, s);
+    jl_binding_t *b = jl_get_binding_wr(m, s, 0);
     jl_value_t *old_ty = NULL;
     if (jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, ty)) {
         jl_gc_wb(b, ty);
@@ -1420,7 +1420,7 @@ JL_CALLABLE(jl_f_swapglobal)
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("swapglobal!: module binding cannot be written non-atomically");
     // is seq_cst already, no fence needed
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     return jl_checked_swap(b, mod, var, args[2]);
 }
 
@@ -1438,7 +1438,7 @@ JL_CALLABLE(jl_f_modifyglobal)
     JL_TYPECHK(modifyglobal!, symbol, (jl_value_t*)var);
     if (order == jl_memory_order_notatomic)
         jl_atomic_error("modifyglobal!: module binding cannot be written non-atomically");
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     // is seq_cst already, no fence needed
     return jl_checked_modify(b, mod, var, args[2], args[3]);
 }
@@ -1467,7 +1467,7 @@ JL_CALLABLE(jl_f_replaceglobal)
         jl_atomic_error("replaceglobal!: module binding cannot be written non-atomically");
     if (failure_order == jl_memory_order_notatomic)
         jl_atomic_error("replaceglobal!: module binding cannot be accessed non-atomically");
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     // is seq_cst already, no fence needed
     return jl_checked_replace(b, mod, var, args[2], args[3]);
 }
@@ -1496,7 +1496,7 @@ JL_CALLABLE(jl_f_setglobalonce)
         jl_atomic_error("setglobalonce!: module binding cannot be written non-atomically");
     if (failure_order == jl_memory_order_notatomic)
         jl_atomic_error("setglobalonce!: module binding cannot be accessed non-atomically");
-    jl_binding_t *b = jl_get_binding_wr(mod, var);
+    jl_binding_t *b = jl_get_binding_wr(mod, var, 0);
     // is seq_cst already, no fence needed
     jl_value_t *old = jl_checked_assignonce(b, mod, var, args[2]);
     return old == NULL ? jl_true : jl_false;
diff --git a/src/codegen.cpp b/src/codegen.cpp
index dc9bccb7b4fa0b..6ebe386a4b179a 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -946,7 +946,7 @@ static const auto jlgetbindingwrorerror_func = new JuliaFunction<>{
     [](LLVMContext &C) {
         auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
         return FunctionType::get(T_pjlvalue,
-                {T_pjlvalue, T_pjlvalue}, false);
+                {T_pjlvalue, T_pjlvalue, getInt32Ty(C)}, false);
     },
     nullptr,
 };
@@ -2099,7 +2099,7 @@ static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed
 static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value *fval, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure, bool gcstack_arg, BitVector *used_arguments=nullptr, size_t *args_begin=nullptr);
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
-                                     jl_binding_t **pbnd, bool assign);
+                                     jl_binding_t **pbnd, bool assign, bool alloc);
 static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, jl_value_t *scope, bool isvol, MDNode *tbaa);
 static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
 static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const Twine &msg);
@@ -3189,7 +3189,7 @@ static jl_value_t *jl_ensure_rooted(jl_codectx_t &ctx, jl_value_t *val)
 static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *name, AtomicOrdering order)
 {
     jl_binding_t *bnd = NULL;
-    Value *bp = global_binding_pointer(ctx, mod, name, &bnd, false);
+    Value *bp = global_binding_pointer(ctx, mod, name, &bnd, false, false);
     if (bp == NULL)
         return jl_cgval_t();
     bp = julia_binding_pvalue(ctx, bp);
@@ -3208,10 +3208,10 @@ static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *
 static jl_cgval_t emit_globalop(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *sym, jl_cgval_t rval, const jl_cgval_t &cmp,
                                 AtomicOrdering Order, AtomicOrdering FailOrder,
                                 bool issetglobal, bool isreplaceglobal, bool isswapglobal, bool ismodifyglobal, bool issetglobalonce,
-                                const jl_cgval_t *modifyop)
+                                const jl_cgval_t *modifyop, bool alloc)
 {
     jl_binding_t *bnd = NULL;
-    Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
+    Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true, alloc);
     if (bp == NULL)
         return jl_cgval_t();
     if (bnd && !bnd->constp) {
@@ -3760,7 +3760,8 @@ static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                                  isswapglobal,
                                  ismodifyglobal,
                                  issetglobalonce,
-                                 modifyop);
+                                 modifyop,
+                                 false);
             return true;
         }
     }
@@ -5448,7 +5449,7 @@ static void emit_hasnofield_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *
 // if the reference currently bound or assign == true,
 //   pbnd will also be assigned with the binding address
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
-                                     jl_binding_t **pbnd, bool assign)
+                                     jl_binding_t **pbnd, bool assign, bool alloc)
 {
     jl_binding_t *b = jl_get_module_binding(m, s, 1);
     if (assign) {
@@ -5478,9 +5479,17 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
         ctx.builder.CreateCondBr(iscached, have_val, not_found);
         not_found->insertInto(ctx.f);
         ctx.builder.SetInsertPoint(not_found);
-        Value *bval = ctx.builder.CreateCall(prepare_call(assign ? jlgetbindingwrorerror_func : jlgetbindingorerror_func),
-                { literal_pointer_val(ctx, (jl_value_t*)m),
-                  literal_pointer_val(ctx, (jl_value_t*)s) });
+        Value *bval = nullptr;
+        if (assign) {
+            bval = ctx.builder.CreateCall(prepare_call(jlgetbindingwrorerror_func),
+                    { literal_pointer_val(ctx, (jl_value_t*)m),
+                    literal_pointer_val(ctx, (jl_value_t*)s),
+                    ConstantInt::get(getInt32Ty(ctx.builder.getContext()), alloc)});
+        } else {
+            bval = ctx.builder.CreateCall(prepare_call(jlgetbindingorerror_func),
+                    { literal_pointer_val(ctx, (jl_value_t*)m),
+                    literal_pointer_val(ctx, (jl_value_t*)s)});
+        }
         setName(ctx.emission_context, bval, jl_symbol_name(m->name) + StringRef(".") + jl_symbol_name(s) + ".found");
         ctx.builder.CreateAlignedStore(bval, bindinggv, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
         ctx.builder.CreateBr(have_val);
@@ -5992,17 +6001,20 @@ static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssi
 
     jl_module_t *mod;
     jl_sym_t *sym;
+    bool toplevel = jl_is_module(ctx.linfo->def.value);
+    bool alloc = toplevel;
     if (jl_is_symbol(l)) {
         mod = ctx.module;
         sym = (jl_sym_t*)l;
     }
     else {
         assert(jl_is_globalref(l));
+        alloc &= jl_globalref_mod(l) == ctx.module;
         mod = jl_globalref_mod(l);
         sym = jl_globalref_name(l);
     }
     emit_globalop(ctx, mod, sym, rval_info, jl_cgval_t(), AtomicOrdering::Release, AtomicOrdering::NotAtomic,
-                  true, false, false, false, false, nullptr);
+                  true, false, false, false, false, nullptr, alloc);
     // Global variable. Does not need debug info because the debugger knows about
     // its memory location.
 }
@@ -6456,7 +6468,7 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaidx_
         }
         if (jl_is_symbol(sym)) {
             jl_binding_t *bnd = NULL;
-            Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
+            Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true, true);
             if (bp)
                 ctx.builder.CreateCall(prepare_call(jldeclareconst_func),
                         { bp, literal_pointer_val(ctx, (jl_value_t*)mod), literal_pointer_val(ctx, (jl_value_t*)sym) });
diff --git a/src/interpreter.c b/src/interpreter.c
index d5d440ae289c86..c383b7f4bbf65f 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -569,9 +569,13 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                 else {
                     jl_module_t *modu;
                     jl_sym_t *sym;
+                    // Plain assignment is allowed to create bindings at
+                    // toplevel and only for the current module
+                    int alloc = toplevel;
                     if (jl_is_globalref(lhs)) {
                         modu = jl_globalref_mod(lhs);
                         sym = jl_globalref_name(lhs);
+                        alloc &= modu == s->module;
                     }
                     else {
                         assert(jl_is_symbol(lhs));
@@ -579,7 +583,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                         sym = (jl_sym_t*)lhs;
                     }
                     JL_GC_PUSH1(&rhs);
-                    jl_binding_t *b = jl_get_binding_wr(modu, sym);
+                    jl_binding_t *b = jl_get_binding_wr(modu, sym, alloc);
                     jl_checked_assignment(b, modu, sym, rhs);
                     JL_GC_POP();
                 }
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index 899476afc093a9..fbabad81ee55d3 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -4233,6 +4233,7 @@ f(x) = yt(x)
                            (put! globals ref #t)
                            `(block
                              (toplevel-only set_binding_type! ,(cadr e))
+                             (global ,ref)
                              (call (core set_binding_type!) ,(cadr ref) (inert ,(caddr ref)) ,(caddr e))))
                          `(call (core typeassert) ,@(cdr e))))
                    fname lam namemap defined toplevel interp opaq globals locals))))
diff --git a/src/julia.h b/src/julia.h
index 0d46f15776610c..9c9538ade704eb 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -1946,7 +1946,7 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var
 JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT jl_value_t *jl_get_binding_type(jl_module_t *m, jl_sym_t *var);
 // get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var);
diff --git a/src/module.c b/src/module.c
index 1be2d5c8673d9d..2d78e3a505d164 100644
--- a/src/module.c
+++ b/src/module.c
@@ -219,13 +219,16 @@ static void check_safe_newbinding(jl_module_t *m, jl_sym_t *var)
 static jl_module_t *jl_binding_dbgmodule(jl_binding_t *b, jl_module_t *m, jl_sym_t *var) JL_GLOBALLY_ROOTED;
 
 // get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var)
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc)
 {
     jl_binding_t *b = jl_get_module_binding(m, var, 1);
     jl_binding_t *b2 = jl_atomic_load_relaxed(&b->owner);
     if (b2 != b) {
-        if (b2 == NULL)
+        if (b2 == NULL) {
             check_safe_newbinding(m, var);
+            if (!alloc)
+                jl_errorf("Global %s.%s does not exist and cannot be assigned. Declare it using `global` before attempting assignment.", jl_symbol_name(m->name), jl_symbol_name(var));
+        }
         if (b2 != NULL || (!jl_atomic_cmpswap(&b->owner, &b2, b) && b2 != b)) {
             jl_module_t *from = jl_binding_dbgmodule(b, m, var);
             if (from == m)
@@ -791,7 +794,7 @@ JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
 
 JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
 {
-    jl_binding_t *bp = jl_get_binding_wr(m, var);
+    jl_binding_t *bp = jl_get_binding_wr(m, var, 0);
     jl_checked_assignment(bp, m, var, val);
 }
 
diff --git a/src/toplevel.c b/src/toplevel.c
index 1899c9e18db304..c2fa9b72a97098 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -155,7 +155,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         }
     }
     else {
-        jl_binding_t *b = jl_get_binding_wr(parent_module, name);
+        jl_binding_t *b = jl_get_binding_wr(parent_module, name, 1);
         jl_declare_constant(b, parent_module, name);
         jl_value_t *old = NULL;
         if (!jl_atomic_cmpswap(&b->value, &old, (jl_value_t*)newm)) {
@@ -325,7 +325,7 @@ void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type) {
             gs = (jl_sym_t*)arg;
         }
         if (!jl_binding_resolved_p(gm, gs)) {
-            jl_binding_t *b = jl_get_binding_wr(gm, gs);
+            jl_binding_t *b = jl_get_binding_wr(gm, gs, 1);
             if (set_type) {
                 jl_value_t *old_ty = NULL;
                 // maybe set the type too, perhaps
@@ -638,7 +638,7 @@ static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym
                       jl_symbol_name(name), jl_symbol_name(m->name));
     }
     else {
-        b = jl_get_binding_wr(m, name);
+        b = jl_get_binding_wr(m, name, 1);
     }
     jl_declare_constant(b, m, name);
     jl_checked_assignment(b, m, name, (jl_value_t*)import);
@@ -897,7 +897,7 @@ JL_DLLEXPORT jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_val
             gm = m;
             gs = (jl_sym_t*)arg;
         }
-        jl_binding_t *b = jl_get_binding_wr(gm, gs);
+        jl_binding_t *b = jl_get_binding_wr(gm, gs, 1);
         jl_declare_constant(b, gm, gs);
         JL_GC_POP();
         return jl_nothing;
diff --git a/test/core.jl b/test/core.jl
index 9db32e50c69474..0d12bb36c35756 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -8145,6 +8145,7 @@ end
 @test_broken Int isa Union{Union, Type{Union{Int,T1}} where {T1}}
 
 let M = @__MODULE__
+    Core.eval(M, :(global a_typed_global))
     @test Core.set_binding_type!(M, :a_typed_global, Tuple{Union{Integer,Nothing}}) === nothing
     @test Core.get_binding_type(M, :a_typed_global) === Tuple{Union{Integer,Nothing}}
     @test Core.set_binding_type!(M, :a_typed_global, Tuple{Union{Integer,Nothing}}) === nothing
diff --git a/test/precompile.jl b/test/precompile.jl
index e164c2917d06ff..e8047858f195ad 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -1023,7 +1023,6 @@ precompile_test_harness("code caching") do dir
         @test mi.specTypes.parameters[end] === Integer ? !hv : hv
     end
 
-    setglobal!(Main, :inval, invalidations)
     idxs = findall(==("verify_methods"), invalidations)
     idxsbits = filter(idxs) do i
         mi = invalidations[i-1]
diff --git a/test/syntax.jl b/test/syntax.jl
index bfe47abffe6ffb..2cf346466f44fe 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -3683,3 +3683,40 @@ end
 # Issue #53729 - Lowering recursion into Expr(:toplevel)
 @test eval(Expr(:let, Expr(:block), Expr(:block, Expr(:toplevel, :(f53729(x) = x)), :(x=1)))) == 1
 @test f53729(2) == 2
+
+# Issue #54607 - binding creation in foreign modules should not be permitted
+module Foreign54607
+    # Syntactic, not dynamic
+    try_to_create_binding1() = (Foreign54607.foo = 2)
+    @eval try_to_create_binding2() = ($(GlobalRef(Foreign54607, :foo)) = 2)
+    function global_create_binding()
+        global bar
+        bar = 3
+    end
+    baz = 4
+    begin;
+        for i = 1:10; end
+        compiled_assign = 5
+    end
+    @eval $(GlobalRef(Foreign54607, :gr_assign)) = 6
+end
+@test_throws ErrorException (Foreign54607.foo = 1)
+@test_throws ErrorException Foreign54607.try_to_create_binding1()
+@test_throws ErrorException Foreign54607.try_to_create_binding2()
+@test_throws ErrorException begin
+    for i = 1:10; end
+    (Foreign54607.foo = 1)
+end
+@test_throws ErrorException @eval (GlobalRef(Foreign54607, :gr_assign2)) = 7
+Foreign54607.global_create_binding()
+@test isdefined(Foreign54607, :bar)
+@test isdefined(Foreign54607, :baz)
+@test isdefined(Foreign54607, :compiled_assign)
+@test isdefined(Foreign54607, :gr_assign)
+Foreign54607.bar = 8
+@test Foreign54607.bar == 8
+begin
+    for i = 1:10; end
+    Foreign54607.bar = 9
+end
+@test Foreign54607.bar == 9