From 389d1177a57a442b7814d9fdede2a088c614b69d Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Tue, 4 May 2021 18:43:31 +0200
Subject: [PATCH 1/3] stage1: Fix LLVM error in inline asm invocation

Pointer types need an extra indirection layer during the generation of
the function prototype for inline asm blocks.

Closes #3606
---
 src/stage1/codegen.cpp       |  4 +++-
 test/stage1/behavior/asm.zig | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index ede15e439499..015a64f68a26 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -4880,6 +4880,9 @@ static LLVMValueRef ir_render_asm_gen(CodeGen *g, IrExecutableGen *executable, I
                 type_ref = get_llvm_type(g, wider_type);
                 value_ref = gen_widen_or_shorten(g, false, type, wider_type, value_ref);
             }
+        } else if (handle_is_ptr(g, type)) {
+            ZigType *gen_type = get_pointer_to_type(g, type, true);
+            type_ref = get_llvm_type(g, gen_type);
         }
 
         param_types[param_index] = type_ref;
@@ -9296,7 +9299,6 @@ static void init(CodeGen *g) {
     char *layout_str = LLVMCopyStringRepOfTargetData(g->target_data_ref);
     LLVMSetDataLayout(g->module, layout_str);
 
-
     assert(g->pointer_size_bytes == LLVMPointerSize(g->target_data_ref));
     g->is_big_endian = (LLVMByteOrder(g->target_data_ref) == LLVMBigEndian);
 
diff --git a/test/stage1/behavior/asm.zig b/test/stage1/behavior/asm.zig
index 170ad3325d8a..ade774910dbd 100644
--- a/test/stage1/behavior/asm.zig
+++ b/test/stage1/behavior/asm.zig
@@ -87,6 +87,21 @@ test "sized integer/float in asm input" {
     );
 }
 
+test "struct/array/union types as input values" {
+    asm volatile (""
+        :
+        : [_] "m" (@as([1]u32, undefined))
+    ); // fails
+    asm volatile (""
+        :
+        : [_] "m" (@as(struct { x: u32, y: u8 }, undefined))
+    ); // fails
+    asm volatile (""
+        :
+        : [_] "m" (@as(union { x: u32, y: u8 }, undefined))
+    ); // fails
+}
+
 extern fn this_is_my_alias() i32;
 
 export fn derp() i32 {

From 4bf093f1a00e481d923452955ab9c394c30b8694 Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Tue, 4 May 2021 18:45:52 +0200
Subject: [PATCH 2/3] compiler-rt: Better selection of __clzsi implementation

To be honest all this detection logic is starting to become a real PITA,
the ARM32 version can be possibly removed as the generic version
optimizes pretty well...
---
 lib/std/special/compiler_rt/clzsi2.zig      | 27 +++++++++++++++------
 lib/std/special/compiler_rt/clzsi2_test.zig |  2 ++
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/lib/std/special/compiler_rt/clzsi2.zig b/lib/std/special/compiler_rt/clzsi2.zig
index c10786b462c8..d7464d5ea997 100644
--- a/lib/std/special/compiler_rt/clzsi2.zig
+++ b/lib/std/special/compiler_rt/clzsi2.zig
@@ -26,6 +26,8 @@ fn __clzsi2_generic(a: i32) callconv(.C) i32 {
 }
 
 fn __clzsi2_thumb1() callconv(.Naked) void {
+    @setRuntimeSafety(false);
+
     // Similar to the generic version with the last two rounds replaced by a LUT
     asm volatile (
         \\ movs r1, #32
@@ -58,6 +60,8 @@ fn __clzsi2_thumb1() callconv(.Naked) void {
 }
 
 fn __clzsi2_arm32() callconv(.Naked) void {
+    @setRuntimeSafety(false);
+
     asm volatile (
         \\ // Assumption: n != 0
         \\ // r0: n
@@ -104,13 +108,22 @@ fn __clzsi2_arm32() callconv(.Naked) void {
     unreachable;
 }
 
-pub const __clzsi2 = switch (std.Target.current.cpu.arch) {
-    .arm, .armeb => if (std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm))
-        __clzsi2_thumb1
-    else
-        __clzsi2_arm32,
-    .thumb, .thumbeb => __clzsi2_thumb1,
-    else => __clzsi2_generic,
+pub const __clzsi2 = impl: {
+    switch (std.Target.current.cpu.arch) {
+        .arm, .armeb, .thumb, .thumbeb => {
+            const use_thumb1 =
+                (std.Target.current.cpu.arch.isThumb() or
+                std.Target.arm.featureSetHas(std.Target.current.cpu.features, .noarm)) and
+                !std.Target.arm.featureSetHas(std.Target.current.cpu.features, .thumb2);
+
+            if (use_thumb1) break :impl __clzsi2_thumb1
+            // From here on we're either targeting Thumb2 or ARM.
+            else if (!std.Target.current.cpu.arch.isThumb()) break :impl __clzsi2_arm32
+            // Use the generic implementation otherwise.
+            else break :impl __clzsi2_generic;
+        },
+        else => break :impl __clzsi2_generic,
+    }
 };
 
 test "test clzsi2" {
diff --git a/lib/std/special/compiler_rt/clzsi2_test.zig b/lib/std/special/compiler_rt/clzsi2_test.zig
index 2b860afd2279..c74a1c3ec258 100644
--- a/lib/std/special/compiler_rt/clzsi2_test.zig
+++ b/lib/std/special/compiler_rt/clzsi2_test.zig
@@ -7,6 +7,8 @@ const clzsi2 = @import("clzsi2.zig");
 const testing = @import("std").testing;
 
 fn test__clzsi2(a: u32, expected: i32) void {
+    // XXX At high optimization levels this test may be horribly miscompiled if
+    // one of the naked implementations is selected.
     var nakedClzsi2 = clzsi2.__clzsi2;
     var actualClzsi2 = @ptrCast(fn (a: i32) callconv(.C) i32, nakedClzsi2);
     var x = @bitCast(i32, a);

From afbcb6209dbe6812679324aab564884085b8cf44 Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Tue, 4 May 2021 18:52:53 +0200
Subject: [PATCH 3/3] std: Initial bringup for Linux on Thumb2

There are some small problems here and there, mostly due to the pointers
having the lsb set and disrupting the fn alignment tests and the
`@FrameSize` implementation.
---
 lib/std/os/bits/linux.zig         |   2 +-
 lib/std/os/linux.zig              |   1 +
 lib/std/os/linux/thumb.zig        | 168 ++++++++++++++++++++++++++++++
 lib/std/os/linux/tls.zig          |   6 +-
 lib/std/special/c.zig             |   2 +-
 lib/std/start.zig                 |   2 +-
 lib/std/zig/system.zig            |   9 ++
 test/stage1/behavior/align.zig    |   3 +
 test/stage1/behavior/async_fn.zig |   3 +
 test/stage1/behavior/atomics.zig  |   5 +-
 10 files changed, 193 insertions(+), 8 deletions(-)
 create mode 100644 lib/std/os/linux/thumb.zig

diff --git a/lib/std/os/bits/linux.zig b/lib/std/os/bits/linux.zig
index 94da5cc99a61..97cdbef78249 100644
--- a/lib/std/os/bits/linux.zig
+++ b/lib/std/os/bits/linux.zig
@@ -18,7 +18,7 @@ pub usingnamespace switch (builtin.arch) {
     .i386 => @import("linux/i386.zig"),
     .x86_64 => @import("linux/x86_64.zig"),
     .aarch64 => @import("linux/arm64.zig"),
-    .arm => @import("linux/arm-eabi.zig"),
+    .arm, .thumb => @import("linux/arm-eabi.zig"),
     .riscv64 => @import("linux/riscv64.zig"),
     .sparcv9 => @import("linux/sparc64.zig"),
     .mips, .mipsel => @import("linux/mips.zig"),
diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig
index 4a67ca768583..6c88d9eae1d8 100644
--- a/lib/std/os/linux.zig
+++ b/lib/std/os/linux.zig
@@ -23,6 +23,7 @@ pub usingnamespace switch (builtin.arch) {
     .x86_64 => @import("linux/x86_64.zig"),
     .aarch64 => @import("linux/arm64.zig"),
     .arm => @import("linux/arm-eabi.zig"),
+    .thumb => @import("linux/thumb.zig"),
     .riscv64 => @import("linux/riscv64.zig"),
     .sparcv9 => @import("linux/sparc64.zig"),
     .mips, .mipsel => @import("linux/mips.zig"),
diff --git a/lib/std/os/linux/thumb.zig b/lib/std/os/linux/thumb.zig
new file mode 100644
index 000000000000..5db9d2cbf4c0
--- /dev/null
+++ b/lib/std/os/linux/thumb.zig
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: MIT
+// Copyright (c) 2015-2021 Zig Contributors
+// This file is part of [zig](https://ziglang.org/), which is MIT licensed.
+// The MIT license requires this copyright notice to be included in all copies
+// and substantial portions of the software.
+usingnamespace @import("../bits.zig");
+
+// The syscall interface is identical to the ARM one but we're facing an extra
+// challenge: r7, the register where the syscall number is stored, may be
+// reserved for the frame pointer.
+// Save and restore r7 around the syscall without touching the stack pointer not
+// to break the frame chain.
+
+pub fn syscall0(number: SYS) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r1}" (buf)
+        : "memory"
+    );
+}
+
+pub fn syscall1(number: SYS, arg1: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r1}" (buf),
+          [arg1] "{r0}" (arg1)
+        : "memory"
+    );
+}
+
+pub fn syscall2(number: SYS, arg1: usize, arg2: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r2}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2)
+        : "memory"
+    );
+}
+
+pub fn syscall3(number: SYS, arg1: usize, arg2: usize, arg3: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r3}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2),
+          [arg3] "{r2}" (arg3)
+        : "memory"
+    );
+}
+
+pub fn syscall4(number: SYS, arg1: usize, arg2: usize, arg3: usize, arg4: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r4}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2),
+          [arg3] "{r2}" (arg3),
+          [arg4] "{r3}" (arg4)
+        : "memory"
+    );
+}
+
+pub fn syscall5(number: SYS, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r5}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2),
+          [arg3] "{r2}" (arg3),
+          [arg4] "{r3}" (arg4),
+          [arg5] "{r4}" (arg5)
+        : "memory"
+    );
+}
+
+pub fn syscall6(
+    number: SYS,
+    arg1: usize,
+    arg2: usize,
+    arg3: usize,
+    arg4: usize,
+    arg5: usize,
+    arg6: usize,
+) usize {
+    @setRuntimeSafety(false);
+
+    var buf: [2]usize = .{ @enumToInt(number), undefined };
+    return asm volatile (
+        \\ str r7, [%[tmp], #4]
+        \\ ldr r7, [%[tmp]]
+        \\ svc #0
+        \\ ldr r7, [%[tmp], #4]
+        : [ret] "={r0}" (-> usize)
+        : [tmp] "{r6}" (buf),
+          [arg1] "{r0}" (arg1),
+          [arg2] "{r1}" (arg2),
+          [arg3] "{r2}" (arg3),
+          [arg4] "{r3}" (arg4),
+          [arg5] "{r4}" (arg5),
+          [arg6] "{r5}" (arg6)
+        : "memory"
+    );
+}
+
+/// This matches the libc clone function.
+pub extern fn clone(func: fn (arg: usize) callconv(.C) u8, stack: usize, flags: u32, arg: usize, ptid: *i32, tls: usize, ctid: *i32) usize;
+
+pub fn restore() callconv(.Naked) void {
+    return asm volatile (
+        \\ mov r7, %[number]
+        \\ svc #0
+        :
+        : [number] "I" (@enumToInt(SYS.sigreturn))
+    );
+}
+
+pub fn restore_rt() callconv(.Naked) void {
+    return asm volatile (
+        \\ mov r7, %[number]
+        \\ svc #0
+        :
+        : [number] "I" (@enumToInt(SYS.rt_sigreturn))
+        : "memory"
+    );
+}
diff --git a/lib/std/os/linux/tls.zig b/lib/std/os/linux/tls.zig
index 4a36b0d48540..0830dcbfdaf7 100644
--- a/lib/std/os/linux/tls.zig
+++ b/lib/std/os/linux/tls.zig
@@ -53,7 +53,7 @@ const TLSVariant = enum {
 };
 
 const tls_variant = switch (builtin.arch) {
-    .arm, .armeb, .aarch64, .aarch64_be, .riscv32, .riscv64, .mips, .mipsel, .powerpc, .powerpc64, .powerpc64le => TLSVariant.VariantI,
+    .arm, .armeb, .thumb, .aarch64, .aarch64_be, .riscv32, .riscv64, .mips, .mipsel, .powerpc, .powerpc64, .powerpc64le => TLSVariant.VariantI,
     .x86_64, .i386, .sparcv9 => TLSVariant.VariantII,
     else => @compileError("undefined tls_variant for this architecture"),
 };
@@ -62,7 +62,7 @@ const tls_variant = switch (builtin.arch) {
 const tls_tcb_size = switch (builtin.arch) {
     // ARM EABI mandates enough space for two pointers: the first one points to
     // the DTV while the second one is unspecified but reserved
-    .arm, .armeb, .aarch64, .aarch64_be => 2 * @sizeOf(usize),
+    .arm, .armeb, .thumb, .aarch64, .aarch64_be => 2 * @sizeOf(usize),
     // One pointer-sized word that points either to the DTV or the TCB itself
     else => @sizeOf(usize),
 };
@@ -150,7 +150,7 @@ pub fn setThreadPointer(addr: usize) void {
                 : [addr] "r" (addr)
             );
         },
-        .arm => {
+        .arm, .thumb => {
             const rc = std.os.linux.syscall1(.set_tls, addr);
             assert(rc == 0);
         },
diff --git a/lib/std/special/c.zig b/lib/std/special/c.zig
index c7084f3a11b7..29feae830f9e 100644
--- a/lib/std/special/c.zig
+++ b/lib/std/special/c.zig
@@ -385,7 +385,7 @@ fn clone() callconv(.Naked) void {
                 \\      svc #0
             );
         },
-        .arm => {
+        .arm, .thumb => {
             // __clone(func, stack, flags, arg, ptid, tls, ctid)
             //           r0,    r1,    r2,  r3,   +0,  +4,   +8
 
diff --git a/lib/std/start.zig b/lib/std/start.zig
index 89f5eb0b1f74..e1e331a68257 100644
--- a/lib/std/start.zig
+++ b/lib/std/start.zig
@@ -176,7 +176,7 @@ fn _start() callconv(.Naked) noreturn {
                 : [argc] "={esp}" (-> [*]usize)
             );
         },
-        .aarch64, .aarch64_be, .arm, .armeb => {
+        .aarch64, .aarch64_be, .arm, .armeb, .thumb => {
             argc_argv_ptr = asm volatile (
                 \\ mov fp, #0
                 \\ mov lr, #0
diff --git a/lib/std/zig/system.zig b/lib/std/zig/system.zig
index 42099c6efe79..d9657d9db445 100644
--- a/lib/std/zig/system.zig
+++ b/lib/std/zig/system.zig
@@ -349,6 +349,15 @@ pub const NativeTargetInfo = struct {
                     }
                 }
             },
+            .arm, .armeb => {
+                // XXX What do we do if the target has the noarm feature?
+                //     What do we do if the user specifies +thumb_mode?
+            },
+            .thumb, .thumbeb => {
+                result.target.cpu.features.addFeature(
+                    @enumToInt(std.Target.arm.Feature.thumb_mode),
+                );
+            },
             else => {},
         }
         cross_target.updateCpuFeatures(&result.target.cpu.features);
diff --git a/test/stage1/behavior/align.zig b/test/stage1/behavior/align.zig
index 0a0cc3bcc043..38f5df017650 100644
--- a/test/stage1/behavior/align.zig
+++ b/test/stage1/behavior/align.zig
@@ -141,6 +141,7 @@ fn alignedBig() align(16) i32 {
 test "@alignCast functions" {
     // function alignment is a compile error on wasm32/wasm64
     if (builtin.arch == .wasm32 or builtin.arch == .wasm64) return error.SkipZigTest;
+    if (builtin.arch == .thumb) return error.SkipZigTest;
 
     expect(fnExpectsOnly1(simple4) == 0x19);
 }
@@ -157,6 +158,7 @@ fn simple4() align(4) i32 {
 test "generic function with align param" {
     // function alignment is a compile error on wasm32/wasm64
     if (builtin.arch == .wasm32 or builtin.arch == .wasm64) return error.SkipZigTest;
+    if (builtin.arch == .thumb) return error.SkipZigTest;
 
     expect(whyWouldYouEverDoThis(1) == 0x1);
     expect(whyWouldYouEverDoThis(4) == 0x1);
@@ -338,6 +340,7 @@ test "align(@alignOf(T)) T does not force resolution of T" {
 test "align(N) on functions" {
     // function alignment is a compile error on wasm32/wasm64
     if (builtin.arch == .wasm32 or builtin.arch == .wasm64) return error.SkipZigTest;
+    if (builtin.arch == .thumb) return error.SkipZigTest;
 
     expect((@ptrToInt(overaligned_fn) & (0x1000 - 1)) == 0);
 }
diff --git a/test/stage1/behavior/async_fn.zig b/test/stage1/behavior/async_fn.zig
index 0765eac7e848..09db0eeb29f7 100644
--- a/test/stage1/behavior/async_fn.zig
+++ b/test/stage1/behavior/async_fn.zig
@@ -110,6 +110,9 @@ test "calling an inferred async function" {
 }
 
 test "@frameSize" {
+    if (builtin.arch == .thumb or builtin.arch == .thumbeb)
+        return error.SkipZigTest;
+
     const S = struct {
         fn doTheTest() void {
             {
diff --git a/test/stage1/behavior/atomics.zig b/test/stage1/behavior/atomics.zig
index f9703e730830..d49ca730e630 100644
--- a/test/stage1/behavior/atomics.zig
+++ b/test/stage1/behavior/atomics.zig
@@ -149,9 +149,10 @@ fn testAtomicStore() void {
 }
 
 test "atomicrmw with floats" {
-    if (builtin.arch == .aarch64 or builtin.arch == .arm or builtin.arch == .riscv64) {
+    switch (builtin.arch) {
         // https://github.com/ziglang/zig/issues/4457
-        return error.SkipZigTest;
+        .aarch64, .arm, .thumb, .riscv64 => return error.SkipZigTest,
+        else => {},
     }
     testAtomicRmwFloat();
     comptime testAtomicRmwFloat();