From 72c77d5814933b9cc992537dc4111d252aab2773 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Sun, 9 Apr 2023 15:45:26 +0100
Subject: [PATCH 01/18] std.math.big.int: Implement depositBits and extractBits

This change implements depositBits and extractBits (equivalents of PDEP
and PEXT) for Zig's bit ints. This change lays the groundwork for
implementation of `@depositBits` and `@extractBits`.

Tests have been added to check the behaviour of these two functions.

The functions currently don't handle negative values (though negative
values may be converted to twos complement externally), and
aren't optimal in either memory or performance.
---
 lib/std/math/big/int.zig      | 74 +++++++++++++++++++++++++++++++++++
 lib/std/math/big/int_test.zig | 54 +++++++++++++++++++++++++
 2 files changed, 128 insertions(+)

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 846a809e0565..950f152233e7 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1732,6 +1732,80 @@ pub const Mutable = struct {
         y.shiftRight(y.toConst(), norm_shift);
     }
 
+    // TODO this function is quite inefficient and could be optimised
+    /// r = @depositBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    ///
+    /// `limbs_buffer` is used as a working area. It must have length of at least `mask.limbs.len`.
+    pub fn depositBits(r: *Mutable, source: Const, mask: Const, limbs_buffer: []Limb) void {
+        assert(source.positive);
+        assert(mask.positive);
+
+        r.positive = true;
+        std.mem.set(Limb, r.limbs, 0);
+
+        var mut_mask = Mutable{ .limbs = limbs_buffer[0..mask.limbs.len], .positive = undefined, .len = undefined };
+        mut_mask.copy(mask);
+
+        var mask_bit_index = mut_mask.toConst().ctz();
+        var i: usize = 0;
+        while (!mut_mask.eqZero()) : ({
+            mask_bit_index = mut_mask.toConst().ctz();
+            i += 1;
+        }) {
+            const mask_limb_index = mask_bit_index / limb_bits;
+            const mask_limb_bit = @intCast(u6, mask_bit_index % limb_bits);
+
+            const i_limb_index = i / limb_bits;
+            const i_limb_bit = @intCast(u6, i % limb_bits);
+
+            mut_mask.limbs[mask_limb_index] &= ~(@as(Limb, 1) << mask_limb_bit); // Unset the mask bit
+            const source_bit_set = source.limbs[i_limb_index] & (@as(Limb, 1) << i_limb_bit) != 0;
+
+            r.limbs[mask_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
+        }
+
+        r.normalize(r.limbs.len);
+    }
+
+    // TODO this function is quite inefficient and could be optimised
+    /// r = @extractBits(source, mask)
+    ///
+    /// Asserts that `source` and `mask` are positive
+    ///
+    /// `limbs_buffer` is used as a working area. It must have length of at least `mask.limbs.len`.
+    pub fn extractBits(r: *Mutable, source: Const, mask: Const, limbs_buffer: []Limb) void {
+        assert(source.positive);
+        assert(mask.positive);
+
+        r.positive = true;
+        std.mem.set(Limb, r.limbs, 0);
+
+        var mut_mask = Mutable{ .limbs = limbs_buffer[0..mask.limbs.len], .positive = undefined, .len = undefined };
+        mut_mask.copy(mask);
+
+        var mask_bit_index = mut_mask.toConst().ctz();
+        var i: usize = 0;
+        while (!mut_mask.eqZero()) : ({
+            mask_bit_index = mut_mask.toConst().ctz();
+            i += 1;
+        }) {
+            const mask_limb_index = mask_bit_index / limb_bits;
+            const mask_limb_bit = @intCast(u6, mask_bit_index % limb_bits);
+
+            const i_limb_index = i / limb_bits;
+            const i_limb_bit = @intCast(u6, i % limb_bits);
+
+            mut_mask.limbs[mask_limb_index] &= ~(@as(Limb, 1) << mask_limb_bit); // Unset the mask bit
+            const source_bit_set = source.limbs[mask_limb_index] & (@as(Limb, 1) << mask_limb_bit) != 0;
+
+            r.limbs[i_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;
+        }
+
+        r.normalize(r.limbs.len);
+    }
+
     /// If a is positive, this passes through to truncate.
     /// If a is negative, then r is set to positive with the bit pattern ~(a - 1).
     /// r may alias a.
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 9c3c1b68815f..2d441461097f 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -2762,6 +2762,60 @@ fn popCountTest(val: *const Managed, bit_count: usize, expected: usize) !void {
     try testing.expectEqual(expected, val.toConst().popCount(bit_count));
 }
 
+test "big int extractBits" {
+    try extractBitsTest(0x12345678, 0x0, 0x0);
+    try extractBitsTest(0x12345678, 0xf0f0f0f0, 0x1357);
+    try extractBitsTest(0x12345678, 0xff00ff00, 0x1256);
+    try extractBitsTest(0x12345678, 0xffff, 0x5678);
+
+    try extractBitsTest(0x12345678_90123456_78901234_56789012, 0xff << 64, 0x56);
+    try extractBitsTest(0x12345678_90123456_78901234_56789012, (0xff << 64) | 0xff00f, 0x56892);
+}
+
+fn extractBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void {
+    var source_bigint = try Managed.initSet(testing.allocator, source);
+    defer source_bigint.deinit();
+    var mask_bigint = try Managed.initSet(testing.allocator, mask);
+    defer mask_bigint.deinit();
+    const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs);
+    var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+
+    const limbs_buffer = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs_buffer);
+
+    result.extractBits(source_bigint.toConst(), mask_bigint.toConst(), limbs_buffer);
+
+    try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
+}
+
+test "big int depositBits" {
+    try depositBitsTest(0x12345678, 0x0, 0x0);
+    try depositBitsTest(0x12345678, 0xf0f0f0f0, 0x50607080);
+    try depositBitsTest(0x12345678, 0xff00ff00, 0x56007800);
+    try depositBitsTest(0x12345678, 0xffff, 0x5678);
+
+    try depositBitsTest(0x1234, 0xff << 64, 0x34_00000000_00000000);
+    try depositBitsTest(0x12345678, (0xff << 64) | 0xff00f, 0x45_00000000_00067008);
+}
+
+fn depositBitsTest(comptime source: comptime_int, comptime mask: comptime_int, comptime expected: comptime_int) !void {
+    var source_bigint = try Managed.initSet(testing.allocator, source);
+    defer source_bigint.deinit();
+    var mask_bigint = try Managed.initSet(testing.allocator, mask);
+    defer mask_bigint.deinit();
+    const limbs = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs);
+    var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+
+    const limbs_buffer = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
+    defer testing.allocator.free(limbs_buffer);
+
+    result.depositBits(source_bigint.toConst(), mask_bigint.toConst(), limbs_buffer);
+
+    try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
+}
+
 test "big int conversion read/write twos complement" {
     var a = try Managed.initSet(testing.allocator, (1 << 493) - 1);
     defer a.deinit();

From 9862b07564b7276f13244700ab08c9d0a3e4188a Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Tue, 11 Apr 2023 12:25:30 +0100
Subject: [PATCH 02/18] std.math.big.int: Conversion from 2's complement

Implements std.math.big.int.Mutable.convertFromTwosComplement, to match
convertToTwosComplement.
---
 lib/std/math/big/int.zig      | 34 ++++++++++++++++++++++++++++++++++
 lib/std/math/big/int_test.zig | 27 +++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 950f152233e7..7ea40878de21 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1841,6 +1841,40 @@ pub const Mutable = struct {
         r.normalize(r.len);
     }
 
+    /// Converts a twos-complement value to a magnitude, and sets the sign of `r` to match.
+    /// `a.positive` is ignored
+    /// r may alias a
+    ///
+    /// Asserts `r` has enough storage to store the result.
+    /// The upper bound is `calcTwosCompLimbCount(bit_count)`
+    pub fn convertFromTwosComplement(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
+        const req_limbs = calcTwosCompLimbCount(bit_count);
+        if (req_limbs == 0 or a.eqZero()) {
+            r.set(0);
+            return;
+        }
+
+        const bit = @truncate(Log2Limb, bit_count - 1);
+        const signmask = @as(Limb, 1) << bit;
+        const mask = (signmask << 1) -% 1;
+
+        if (signedness == .unsigned or req_limbs > a.limbs.len or a.limbs[req_limbs - 1] & signmask == 0) {
+            r.truncate(a, signedness, bit_count);
+            return;
+        }
+
+        r.copy(a);
+        assert(r.limbs.len >= req_limbs);
+        r.len = req_limbs;
+
+        r.addScalar(r.toConst(), -1);
+        llnot(r.limbs[0..r.len]);
+        r.limbs[r.len - 1] &= mask;
+
+        r.positive = false;
+        r.normalize(r.len);
+    }
+
     /// Truncate an integer to a number of bits, following 2s-complement semantics.
     /// r may alias a.
     ///
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 2d441461097f..69ef31d484ab 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -2816,6 +2816,33 @@ fn depositBitsTest(comptime source: comptime_int, comptime mask: comptime_int, c
     try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
 }
 
+test "big int conversion to/from twos complement" {
+    var a = try Managed.initSet(testing.allocator, maxInt(u64));
+    defer a.deinit();
+    var b = try Managed.initSet(testing.allocator, maxInt(u32));
+    defer b.deinit();
+    var c = try Managed.initSet(testing.allocator, maxInt(u493));
+    defer c.deinit();
+
+    var m_a = a.toMutable();
+    m_a.convertToTwosComplement(m_a.toConst(), .unsigned, 64);
+    try testing.expectEqual(m_a.toConst().orderAgainstScalar(maxInt(u64)), .eq);
+    m_a.convertFromTwosComplement(m_a.toConst(), .signed, 64);
+    try testing.expectEqual(m_a.toConst().orderAgainstScalar(-1), .eq);
+
+    var m_b = b.toMutable();
+    m_b.convertToTwosComplement(m_b.toConst(), .unsigned, 32);
+    try testing.expectEqual(m_b.toConst().orderAgainstScalar(maxInt(u32)), .eq);
+    m_b.convertFromTwosComplement(m_b.toConst(), .signed, 32);
+    try testing.expectEqual(m_b.toConst().orderAgainstScalar(-1), .eq);
+
+    var m_c = c.toMutable();
+    m_c.convertToTwosComplement(m_c.toConst(), .unsigned, 493);
+    try testing.expectEqual(m_c.toConst().orderAgainstScalar(maxInt(u493)), .eq);
+    m_c.convertFromTwosComplement(m_c.toConst(), .signed, 493);
+    try testing.expectEqual(m_c.toConst().orderAgainstScalar(-1), .eq);
+}
+
 test "big int conversion read/write twos complement" {
     var a = try Managed.initSet(testing.allocator, (1 << 493) - 1);
     defer a.deinit();

From ad8bff82b780e9888db3d0d8f78e4a81a18e16f2 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Wed, 12 Apr 2023 21:00:14 +0100
Subject: [PATCH 03/18] Write docs for `@depositBits` and `@extractBits`

---
 doc/langref.html.in | 56 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 7c184c8f3688..db049649afd8 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -8292,6 +8292,34 @@ test "main" {
       {#see_also|@cVaArg|@cVaCopy|@cVaEnd#}
       {#header_close#}
 
+      {#header_open|@depositBits#}
+      <pre>{#syntax#}@depositBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}@TypeOf(source){#endsyntax#} must be an integer type.
+      </p>
+      <p>
+      Uses a mask to transfer contiguous lower bits in the {#syntax#}source{#endsyntax#} operand to the destination, transferring them to the corresponding bits in the destination that are set in the mask. All other bits in the destination are zeroed.
+      </p>
+      <p>
+      Currently, only x86 processors with BMI2 enabled support this in hardware. On processors without support for the instruction, it will be emulated. AMD processors before Zen 3 implement the corresponding instruction (PDEP) in microcode. It may be faster to use an alternative method in both of these cases.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      <!-- TODO make this a test when implemented-->
+      {#syntax_block|zig|@depositBits test#}
+const std = @import("std");
+
+test "deposit bits" {
+    comptime {
+        try std.testing.expectEqual(@depositBits(0x00001234, 0xf0f0f0f0), 0x10203040);
+    }
+}
+      {#end_syntax_block#}
+      {#see_also|@extractBits#}
+      {#header_close#}
+
       {#header_open|@divExact#}
       <pre>{#syntax#}@divExact(numerator: T, denominator: T) T{#endsyntax#}</pre>
       <p>
@@ -8462,6 +8490,34 @@ export fn @"A function name that is a complete sentence."() void {}
       {#see_also|@export#}
       {#header_close#}
 
+      {#header_open|@extractBits#}
+      <pre>{#syntax#}@extractBits(source: T, mask: T) T{#endsyntax#}</pre>
+      <p>
+      {#syntax#}T{#endsyntax#} must be an integer type.
+      </p>
+      <p>
+      Uses a mask to transfer bits in the {#syntax#}source{#endsyntax#} operand to the destination, writing them as contiguous lower bits in the destination. The upper bits of the destination are zeroed.
+      </p>
+      <p>
+      Currently, only x86 processors with BMI2 enabled support this in hardware. On processors without support for the instruction, it will be emulated. AMD processors before Zen 3 implement the corresponding instruction (PEXT) in microcode. It may be faster to use an alternative method in both of these cases.
+      </p>
+      <p>
+      Example:
+      </p>
+
+      <!-- TODO Make this a test when implemented -->
+      {#syntax_block|zig|@extractBits test#}
+const std = @import("std");
+
+test "extract bits" {
+    comptime {
+        try std.testing.expectEqual(@extractBits(0x12345678, 0xf0f0f0f0), 0x00001357);
+    }
+}
+      {#end_syntax_block#}
+      {#see_also|@depositBits#}
+      {#header_close#}
+
       {#header_open|@fence#}
       <pre>{#syntax#}@fence(order: AtomicOrder) void{#endsyntax#}</pre>
       <p>

From befa47ff647f8dea48ad2350a60d7afd98031838 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Wed, 12 Apr 2023 21:00:40 +0100
Subject: [PATCH 04/18] Implement `@depositBits` and `@extractBits`

Incomplete: currently only implemented for 64-bit-or-smaller integers
for x86(-64) in the LLVM backend.
---
 src/Air.zig                  |   9 ++
 src/AstGen.zig               |  21 ++++
 src/BuiltinFn.zig            |  15 +++
 src/Liveness.zig             |   4 +
 src/Sema.zig                 | 186 +++++++++++++++++++++++++++++++++++
 src/Zir.zig                  |   6 ++
 src/arch/aarch64/CodeGen.zig |   3 +
 src/arch/arm/CodeGen.zig     |   3 +
 src/arch/riscv64/CodeGen.zig |   3 +
 src/arch/sparc64/CodeGen.zig |   3 +
 src/arch/wasm/CodeGen.zig    |   4 +
 src/arch/x86_64/CodeGen.zig  |   3 +
 src/codegen/c.zig            |   3 +
 src/codegen/llvm.zig         | 159 ++++++++++++++++++++++++++++++
 src/print_air.zig            |   2 +
 src/print_zir.zig            |   2 +
 16 files changed, 426 insertions(+)

diff --git a/src/Air.zig b/src/Air.zig
index d4d4de07f28c..64e6d918ea7e 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -822,6 +822,13 @@ pub const Inst = struct {
         /// Operand is unused and set to Ref.none
         work_group_id,
 
+        /// Implements @depositBits builtin.
+        /// Uses the `bin_op` field.
+        deposit_bits,
+        /// Implements @extractBits builtin.
+        /// Uses the `bin_op` field.
+        extract_bits,
+
         pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
             switch (op) {
                 .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
@@ -1232,6 +1239,8 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index, ip: *const InternPool) Type {
         .div_exact_optimized,
         .rem_optimized,
         .mod_optimized,
+        .deposit_bits,
+        .extract_bits,
         => return air.typeOf(datas[inst].bin_op.lhs, ip),
 
         .sqrt,
diff --git a/src/AstGen.zig b/src/AstGen.zig
index f1acd7e3e3c2..3c81526dac8b 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -8699,6 +8699,9 @@ fn builtinCall(
             });
             return rvalue(gz, ri, result, node);
         },
+
+        .deposit_bits => return depositExtractBits(gz, scope, ri, node, params, .deposit_bits),
+        .extract_bits => return depositExtractBits(gz, scope, ri, node, params, .extract_bits),
     }
 }
 
@@ -8966,6 +8969,24 @@ fn overflowArithmetic(
     return rvalue(gz, ri, result, node);
 }
 
+fn depositExtractBits(
+    gz: *GenZir,
+    scope: *Scope,
+    ri: ResultInfo,
+    node: Ast.Node.Index,
+    params: []const Ast.Node.Index,
+    tag: Zir.Inst.Extended,
+) InnerError!Zir.Inst.Ref {
+    const lhs = try expr(gz, scope, .{ .rl = .none }, params[0]);
+    const rhs = try expr(gz, scope, .{ .rl = .none }, params[1]);
+    const result = try gz.addExtendedPayload(tag, Zir.Inst.BinNode{
+        .node = gz.nodeIndexToRelative(node),
+        .lhs = lhs,
+        .rhs = rhs,
+    });
+    return rvalue(gz, ri, result, node);
+}
+
 fn callExpr(
     gz: *GenZir,
     scope: *Scope,
diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig
index 27b963f52871..769f191c78c7 100644
--- a/src/BuiltinFn.zig
+++ b/src/BuiltinFn.zig
@@ -35,6 +35,7 @@ pub const Tag = enum {
     c_va_copy,
     c_va_end,
     c_va_start,
+    deposit_bits,
     div_exact,
     div_floor,
     div_trunc,
@@ -46,6 +47,7 @@ pub const Tag = enum {
     err_set_cast,
     @"export",
     @"extern",
+    extract_bits,
     fence,
     field,
     field_parent_ptr,
@@ -396,6 +398,12 @@ pub const list = list: {
                 .param_count = 0,
             },
         },
+        .{
+            "@depositBits", .{
+                .tag = .deposit_bits,
+                .param_count = 2,
+            },
+        },
         .{
             "@divExact",
             .{
@@ -474,6 +482,13 @@ pub const list = list: {
                 .param_count = 2,
             },
         },
+        .{
+            "@extractBits",
+            .{
+                .tag = .extract_bits,
+                .param_count = 2,
+            },
+        },
         .{
             "@fence",
             .{
diff --git a/src/Liveness.zig b/src/Liveness.zig
index 2ba029136406..5ae53c575bdb 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -286,6 +286,8 @@ pub fn categorizeOperand(
         .cmp_gte_optimized,
         .cmp_gt_optimized,
         .cmp_neq_optimized,
+        .deposit_bits,
+        .extract_bits,
         => {
             const o = air_datas[inst].bin_op;
             if (o.lhs == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none);
@@ -942,6 +944,8 @@ fn analyzeInst(
         .memset,
         .memset_safe,
         .memcpy,
+        .deposit_bits,
+        .extract_bits,
         => {
             const o = inst_datas[inst].bin_op;
             return analyzeOperands(a, pass, data, inst, .{ o.lhs, o.rhs, .none });
diff --git a/src/Sema.zig b/src/Sema.zig
index bb2ef22ca560..2f0514fc40f1 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -1183,6 +1183,8 @@ fn analyzeBodyInner(
                     .work_group_size       => try sema.zirWorkItem(          block, extended, extended.opcode),
                     .work_group_id         => try sema.zirWorkItem(          block, extended, extended.opcode),
                     .in_comptime           => try sema.zirInComptime(        block),
+                    .deposit_bits          => try sema.zirDepositExtractBits(block, extended, .deposit_bits),
+                    .extract_bits          => try sema.zirDepositExtractBits(block, extended, .extract_bits),
                     // zig fmt: on
 
                     .fence => {
@@ -24145,6 +24147,84 @@ fn zirInComptime(
     }
 }
 
+fn zirDepositExtractBits(
+    sema: *Sema,
+    block: *Block,
+    extended: Zir.Inst.Extended.InstData,
+    air_tag: Air.Inst.Tag,
+) CompileError!Air.Inst.Ref {
+    const extra = sema.code.extraData(Zir.Inst.BinNode, extended.operand).data;
+    const src = LazySrcLoc.nodeOffset(extra.node);
+
+    const lhs_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node };
+    const rhs_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = extra.node };
+
+    const uncasted_lhs = try sema.resolveInst(extra.lhs);
+    const uncasted_rhs = try sema.resolveInst(extra.rhs);
+
+    const lhs_ty = sema.typeOf(uncasted_lhs);
+    const rhs_ty = sema.typeOf(uncasted_rhs);
+
+    if (lhs_ty.zigTypeTag() != .Int) {
+        return sema.fail(block, lhs_src, "expected integer type, found '{}'", .{lhs_ty.fmt(sema.mod)});
+    }
+
+    if (rhs_ty.zigTypeTag() != .Int) {
+        return sema.fail(block, rhs_src, "expected integer type, found '{}'", .{rhs_ty.fmt(sema.mod)});
+    }
+
+    const instructions = &[_]Air.Inst.Ref{ uncasted_lhs, uncasted_rhs };
+    const dest_ty = try sema.resolvePeerTypes(block, src, instructions, .{
+        .override = &[_]?LazySrcLoc{ lhs_src, rhs_src },
+    });
+
+    assert(dest_ty.zigTypeTag() == .Int);
+
+    const lhs = try sema.coerce(block, dest_ty, uncasted_lhs, lhs_src);
+    const rhs = try sema.coerce(block, dest_ty, uncasted_rhs, rhs_src);
+
+    const maybe_lhs_val = try sema.resolveMaybeUndefVal(lhs);
+    const maybe_rhs_val = try sema.resolveMaybeUndefVal(rhs);
+
+    // If either of the operands are zero, the result is zero
+    // If either of the operands are undefined, the result is undefined
+    if (maybe_lhs_val) |lhs_val| {
+        if (lhs_val.isUndef()) return sema.addConstUndef(dest_ty);
+        if (try lhs_val.compareAllWithZeroAdvanced(.eq, sema)) {
+            return sema.addConstant(dest_ty, Value.zero);
+        }
+    }
+    if (maybe_rhs_val) |rhs_val| {
+        if (rhs_val.isUndef()) return sema.addConstUndef(dest_ty);
+        if (try rhs_val.compareAllWithZeroAdvanced(.eq, sema)) {
+            return sema.addConstant(dest_ty, Value.zero);
+        }
+    }
+
+    if (maybe_lhs_val) |lhs_val| {
+        if (maybe_rhs_val) |rhs_val| {
+            const dest_val = switch (air_tag) {
+                .deposit_bits => try sema.intDepositBits(lhs_val, rhs_val, dest_ty),
+                .extract_bits => try sema.intExtractBits(lhs_val, rhs_val, dest_ty),
+                else => unreachable,
+            };
+
+            return sema.addConstant(dest_ty, dest_val);
+        }
+    }
+
+    const runtime_src = if (maybe_lhs_val == null) lhs_src else rhs_src;
+    try sema.requireRuntimeBlock(block, src, runtime_src);
+
+    return block.addInst(.{
+        .tag = air_tag,
+        .data = .{ .bin_op = .{
+            .lhs = lhs,
+            .rhs = rhs,
+        } },
+    });
+}
+
 fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: ?LazySrcLoc) !void {
     if (block.is_comptime) {
         const msg = msg: {
@@ -36247,6 +36327,112 @@ fn intAddWithOverflowScalar(
     };
 }
 
+fn intDepositBits(
+    sema: *Sema,
+    lhs: Value,
+    rhs: Value,
+    ty: Type,
+) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first. For non-bigints, @intDeposit could be used?
+    const target = sema.mod.getTarget();
+    const arena = sema.arena;
+    const info = ty.intInfo(target);
+
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const lhs_bigint = lhs.toBigInt(&lhs_space, target);
+    const rhs_bigint = rhs.toBigInt(&rhs_space, target);
+
+    const result_limbs = try arena.alloc(
+        std.math.big.Limb,
+        std.math.big.int.calcTwosCompLimbCount(info.bits),
+    );
+
+    const source_limbs = try arena.alloc(
+        std.math.big.Limb,
+        std.math.big.int.calcTwosCompLimbCount(info.bits),
+    );
+    defer arena.free(source_limbs);
+
+    const mask_limbs = try arena.alloc(
+        std.math.big.Limb,
+        std.math.big.int.calcTwosCompLimbCount(info.bits),
+    );
+    defer arena.free(mask_limbs);
+
+    const limbs_buffer = try arena.alloc(
+        std.math.big.Limb,
+        rhs_bigint.limbs.len,
+    );
+    defer arena.free(limbs_buffer);
+
+    var source = std.math.big.int.Mutable{ .limbs = source_limbs, .positive = undefined, .len = undefined };
+    var mask = std.math.big.int.Mutable{ .limbs = mask_limbs, .positive = undefined, .len = undefined };
+    var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
+
+    source.convertToTwosComplement(lhs_bigint, info.signedness, info.bits);
+    mask.convertToTwosComplement(rhs_bigint, info.signedness, info.bits);
+
+    result.depositBits(source.toConst(), mask.toConst(), limbs_buffer);
+
+    result.convertFromTwosComplement(result.toConst(), info.signedness, info.bits);
+    return Value.fromBigInt(arena, result.toConst());
+}
+
+fn intExtractBits(
+    sema: *Sema,
+    lhs: Value,
+    rhs: Value,
+    ty: Type,
+) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first. For non-bigints, @intExtract could be used?
+    const target = sema.mod.getTarget();
+    const arena = sema.arena;
+    const info = ty.intInfo(target);
+
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const lhs_bigint = lhs.toBigInt(&lhs_space, target);
+    const rhs_bigint = rhs.toBigInt(&rhs_space, target);
+
+    const result_limbs = try arena.alloc(
+        std.math.big.Limb,
+        std.math.big.int.calcTwosCompLimbCount(info.bits),
+    );
+
+    const source_limbs = try arena.alloc(
+        std.math.big.Limb,
+        std.math.big.int.calcTwosCompLimbCount(info.bits),
+    );
+    defer arena.free(source_limbs);
+
+    const mask_limbs = try arena.alloc(
+        std.math.big.Limb,
+        std.math.big.int.calcTwosCompLimbCount(info.bits),
+    );
+    defer arena.free(mask_limbs);
+
+    const limbs_buffer = try arena.alloc(
+        std.math.big.Limb,
+        rhs_bigint.limbs.len,
+    );
+    defer arena.free(limbs_buffer);
+
+    var source = std.math.big.int.Mutable{ .limbs = source_limbs, .positive = undefined, .len = undefined };
+    var mask = std.math.big.int.Mutable{ .limbs = mask_limbs, .positive = undefined, .len = undefined };
+    var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
+
+    source.convertToTwosComplement(lhs_bigint, info.signedness, info.bits);
+    mask.convertToTwosComplement(rhs_bigint, info.signedness, info.bits);
+
+    result.extractBits(source.toConst(), mask.toConst(), limbs_buffer);
+
+    result.convertFromTwosComplement(result.toConst(), info.signedness, info.bits);
+    return Value.fromBigInt(arena, result.toConst());
+}
+
 /// Asserts the values are comparable. Both operands have type `ty`.
 /// For vectors, returns true if the comparison is true for ALL elements.
 ///
diff --git a/src/Zir.zig b/src/Zir.zig
index 4a0fdde24f95..7b6284d26412 100644
--- a/src/Zir.zig
+++ b/src/Zir.zig
@@ -1985,6 +1985,12 @@ pub const Inst = struct {
         /// with a specific value. For instance, this is used for the capture of an `errdefer`.
         /// This should never appear in a body.
         value_placeholder,
+        /// Implements the `@depositBits` builtin.
+        /// `operand` is payload index to `BinNode`.
+        deposit_bits,
+        /// Implements the `@extractBits` builtin.
+        /// `operand` is payload index to `BinNode`.
+        extract_bits,
 
         pub const InstData = struct {
             opcode: Extended,
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index 5080a0451a7c..076ce8764b89 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -903,6 +903,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 7ece4ba2e3a3..bcc651e99481 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -887,6 +887,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index cba1de92c1ed..65b1cfe6b0f4 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -717,6 +717,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
         if (std.debug.runtime_safety) {
diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index f210f8e14461..4aa5e87311dd 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -735,6 +735,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => @panic("TODO implement deposit_bits"),
+            .extract_bits => @panic("TODO implement extract_bits"),
             // zig fmt: on
         }
 
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index efd5ea6642f9..d9be132d4fa4 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -2068,6 +2068,10 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         .work_group_size,
         .work_group_id,
         => unreachable,
+
+        .deposit_bits,
+        .extract_bits,
+        => |tag| return func.fail("TODO implement {s}", .{@tagName(tag)}),
     };
 }
 
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index b4ef42b953c5..fabfc1877666 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -1985,6 +1985,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .work_item_id => unreachable,
             .work_group_size => unreachable,
             .work_group_id => unreachable,
+
+            .deposit_bits => return self.fail("TODO implement deposit_bits", .{}),
+            .extract_bits => return self.fail("TODO implement extract_bits", .{}),
             // zig fmt: on
         }
 
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 498eca4ce28b..a5c824669ed9 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -3084,6 +3084,9 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .work_group_size,
             .work_group_id,
             => unreachable,
+
+            .deposit_bits => return f.fail("TODO: C backend: implement deposit_bits", .{}),
+            .extract_bits => return f.fail("TODO: C backend: implement extract_bits", .{}),
             // zig fmt: on
         };
         if (result_value == .new_local) {
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 1f390edc0210..bff258d77156 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -4550,6 +4550,9 @@ pub const FuncGen = struct {
                 .work_item_id => try self.airWorkItemId(inst),
                 .work_group_size => try self.airWorkGroupSize(inst),
                 .work_group_id => try self.airWorkGroupId(inst),
+
+                .deposit_bits => try self.airDepositBits(inst),
+                .extract_bits => try self.airExtractBits(inst),
                 // zig fmt: on
             };
             if (opt_value) |val| {
@@ -9447,6 +9450,162 @@ pub const FuncGen = struct {
         return self.amdgcnWorkIntrinsic(dimension, 0, "llvm.amdgcn.workgroup.id");
     }
 
+    fn airDepositBits(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+        const lhs = try self.resolveInst(bin_op.lhs);
+        const rhs = try self.resolveInst(bin_op.rhs);
+        const inst_ty = self.air.typeOfIndex(inst);
+
+        const target = self.dg.module.getTarget();
+        const params = [2]*llvm.Value{ lhs, rhs };
+        switch (target.cpu.arch) {
+            .x86, .x86_64 => |tag| blk: {
+                // Doesn't have pdep
+                if (!std.Target.x86.featureSetHas(target.cpu.features, .bmi2)) break :blk;
+
+                const bits = inst_ty.intInfo(target).bits;
+                const supports_64 = tag == .x86_64;
+                // Integer size doesn't match the available instruction(s)
+                if (!(bits <= 32 or (bits <= 64 and supports_64))) break :blk;
+
+                return self.buildDepositBitsNative(inst_ty, params);
+            },
+            else => {},
+        }
+
+        return self.buildDepositBitsEmulated(inst_ty, params);
+    }
+
+    fn buildDepositBitsNative(
+        self: *FuncGen,
+        ty: Type,
+        params: [2]*llvm.Value,
+    ) !*llvm.Value {
+        const target = self.dg.module.getTarget();
+
+        assert(target.cpu.arch.isX86());
+        assert(std.Target.x86.featureSetHas(target.cpu.features, .bmi2));
+
+        const bits = ty.intInfo(target).bits;
+        const intrinsic_name = switch (bits) {
+            1...32 => "llvm.x86.bmi.pdep.32",
+            33...64 => "llvm.x86.bmi.pdep.64",
+            else => unreachable,
+        };
+        const needs_extend = bits != 32 and bits != 64;
+
+        var params_cast = params;
+
+        // Cast to either a 32 or 64-bit integer
+        if (needs_extend) {
+            const llvm_extend_ty = self.context.intType(if (bits <= 32) 32 else 64);
+            params_cast = .{
+                self.builder.buildZExt(params[0], llvm_extend_ty, ""),
+                self.builder.buildZExt(params[1], llvm_extend_ty, ""),
+            };
+        }
+
+        const llvm_fn = self.getIntrinsic(intrinsic_name, &.{});
+        const result = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params_cast, 2, .Fast, .Auto, "");
+
+        // No cast needed!
+        if (!needs_extend) return result;
+
+        // Cast back to the original integer size
+        const llvm_trunc_ty = try self.dg.lowerType(ty);
+        return self.builder.buildTrunc(result, llvm_trunc_ty, "");
+    }
+
+    fn buildDepositBitsEmulated(
+        self: *FuncGen,
+        ty: Type,
+        params: [2]*llvm.Value,
+    ) !*llvm.Value {
+        _ = ty;
+        _ = params;
+        return self.dg.todo("implement deposit_bits emulation", .{});
+    }
+
+    fn airExtractBits(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const bin_op = self.air.instructions.items(.data)[inst].bin_op;
+        const lhs = try self.resolveInst(bin_op.lhs);
+        const rhs = try self.resolveInst(bin_op.rhs);
+        const inst_ty = self.air.typeOfIndex(inst);
+
+        const target = self.dg.module.getTarget();
+        const params = [2]*llvm.Value{ lhs, rhs };
+        switch (target.cpu.arch) {
+            .x86, .x86_64 => |tag| blk: {
+                // Doesn't have pext
+                if (!std.Target.x86.featureSetHas(target.cpu.features, .bmi2)) break :blk;
+
+                const bits = inst_ty.intInfo(target).bits;
+                const supports_64 = tag == .x86_64;
+                // Integer size doesn't match the available instruction(s)
+                if (!(bits <= 32 or (bits <= 64 and supports_64))) break :blk;
+
+                return self.buildExtractBitsNative(inst_ty, params);
+            },
+            else => {},
+        }
+
+        return self.buildExtractBitsEmulated(inst_ty, params);
+    }
+
+    fn buildExtractBitsNative(
+        self: *FuncGen,
+        ty: Type,
+        params: [2]*llvm.Value,
+    ) !*llvm.Value {
+        const target = self.dg.module.getTarget();
+
+        assert(target.cpu.arch.isX86());
+        assert(std.Target.x86.featureSetHas(target.cpu.features, .bmi2));
+
+        const bits = ty.intInfo(target).bits;
+        const intrinsic_name = switch (bits) {
+            1...32 => "llvm.x86.bmi.pext.32",
+            33...64 => "llvm.x86.bmi.pext.64",
+            else => unreachable,
+        };
+        const needs_extend = bits != 32 and bits != 64;
+
+        var params_cast = params;
+
+        // Cast to either a 32 or 64-bit integer
+        if (needs_extend) {
+            const llvm_extend_ty = self.context.intType(if (bits <= 32) 32 else 64);
+            params_cast = .{
+                self.builder.buildZExt(params[0], llvm_extend_ty, ""),
+                self.builder.buildZExt(params[1], llvm_extend_ty, ""),
+            };
+        }
+
+        const llvm_fn = self.getIntrinsic(intrinsic_name, &.{});
+        const result = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &params_cast, 2, .Fast, .Auto, "");
+
+        // No cast needed!
+        if (!needs_extend) return result;
+
+        // Cast back to the original integer size
+        const llvm_trunc_ty = try self.dg.lowerType(ty);
+        return self.builder.buildTrunc(result, llvm_trunc_ty, "");
+    }
+
+    fn buildExtractBitsEmulated(
+        self: *FuncGen,
+        ty: Type,
+        params: [2]*llvm.Value,
+    ) !*llvm.Value {
+        _ = ty;
+        _ = params;
+        return self.dg.todo("implement extract_bits emulation", .{});
+    }
+
     fn getErrorNameTable(self: *FuncGen) !*llvm.Value {
         if (self.dg.object.error_name_table) |table| {
             return table;
diff --git a/src/print_air.zig b/src/print_air.zig
index d73ec308917f..700fdbffadef 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -173,6 +173,8 @@ const Writer = struct {
             .memcpy,
             .memset,
             .memset_safe,
+            .deposit_bits,
+            .extract_bits,
             => try w.writeBinOp(s, inst),
 
             .is_null,
diff --git a/src/print_zir.zig b/src/print_zir.zig
index 029157818957..b85cf3e2eff7 100644
--- a/src/print_zir.zig
+++ b/src/print_zir.zig
@@ -527,6 +527,8 @@ const Writer = struct {
             .prefetch,
             .addrspace_cast,
             .c_va_arg,
+            .deposit_bits,
+            .extract_bits,
             => {
                 const inst_data = self.code.extraData(Zir.Inst.BinNode, extended.operand).data;
                 const src = LazySrcLoc.nodeOffset(inst_data.node);

From 70d2dc4a3f47d6a079554833fb8603f78f7f5d19 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Thu, 13 Apr 2023 19:33:55 +0100
Subject: [PATCH 05/18] LLVM: Implement emulation for `@depositBits`

---
 src/codegen/llvm.zig | 75 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 3 deletions(-)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index bff258d77156..3fe036ceb93f 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -9518,14 +9518,83 @@ pub const FuncGen = struct {
         return self.builder.buildTrunc(result, llvm_trunc_ty, "");
     }
 
+    // TODO Should this belong in compiler-rt?
+    //
+    // Implements @depositBits(source, mask) in software
+    // (i.e. without platform-specific instructions)
+    //
+    // var bb = 1;
+    // var result = 0;
+    // do {
+    //     const bit = mask & -mask;
+    //     mask &= ~bit;
+    //     const source_bit = source & bb;
+    //     if (source_bit) result |= bit;
+    //     bb += bb;
+    // } while (mask)
+    //
+    // return result;
     fn buildDepositBitsEmulated(
         self: *FuncGen,
         ty: Type,
         params: [2]*llvm.Value,
     ) !*llvm.Value {
-        _ = ty;
-        _ = params;
-        return self.dg.todo("implement deposit_bits emulation", .{});
+        const llvm_ty = try self.dg.lowerType(ty);
+
+        const source = params[0];
+        const mask_start = params[1];
+        const zero = llvm_ty.constNull();
+        const one = llvm_ty.constInt(1, .False);
+        const minus_one = llvm_ty.constInt(@bitCast(c_ulonglong, @as(c_longlong, -1)), .True);
+
+        const prev_block = self.builder.getInsertBlock();
+        const loop_block = self.context.appendBasicBlock(self.llvm_func, "Loop");
+        const after_block = self.context.appendBasicBlock(self.llvm_func, "After");
+
+        _ = self.builder.buildBr(loop_block);
+        self.builder.positionBuilderAtEnd(loop_block);
+        const mask_phi = self.builder.buildPhi(llvm_ty, "");
+        const result_phi = self.builder.buildPhi(llvm_ty, "");
+        const bb_phi = self.builder.buildPhi(llvm_ty, "");
+        const minus_mask = self.builder.buildSub(zero, mask_phi, "");
+        const bit = self.builder.buildAnd(mask_phi, minus_mask, "");
+        const not_bit = self.builder.buildXor(bit, minus_one, "");
+        const new_mask = self.builder.buildAnd(mask_phi, not_bit, "");
+        const source_bit = self.builder.buildAnd(source, bb_phi, "");
+        const source_bit_set = self.builder.buildICmp(.NE, source_bit, zero, "");
+        const bit_or_zero = self.builder.buildSelect(source_bit_set, bit, zero, ""); // avoid using control flow
+        const new_result = self.builder.buildOr(result_phi, bit_or_zero, "");
+        const new_bb = self.builder.buildAdd(bb_phi, bb_phi, "");
+        const while_cond = self.builder.buildICmp(.NE, new_mask, zero, "");
+        _ = self.builder.buildCondBr(while_cond, loop_block, after_block);
+
+        mask_phi.addIncoming(
+            &[2]*llvm.Value{ mask_start, new_mask },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        result_phi.addIncoming(
+            &[2]*llvm.Value{ zero, new_result },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        bb_phi.addIncoming(
+            &[2]*llvm.Value{ one, new_bb },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        self.builder.positionBuilderAtEnd(after_block);
+        const final_result = self.builder.buildPhi(llvm_ty, "");
+        final_result.addIncoming(
+            &[1]*llvm.Value{ new_result },
+            &[1]*llvm.BasicBlock{ loop_block },
+            1,
+        );
+
+        return final_result;
     }
 
     fn airExtractBits(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {

From 98f70c024d217dfb5c01d741fdbac235ebcee858 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Thu, 13 Apr 2023 21:53:31 +0100
Subject: [PATCH 06/18] LLVM: Implement emulation for `@extractBits`

---
 src/codegen/llvm.zig | 77 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 74 insertions(+), 3 deletions(-)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 3fe036ceb93f..e2714b6dd4e0 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -9665,14 +9665,85 @@ pub const FuncGen = struct {
         return self.builder.buildTrunc(result, llvm_trunc_ty, "");
     }
 
+    // TODO Should this belong in compiler-rt?
+    //
+    // Implements @extractBits(source, mask) in software
+    // (i.e. without platform-specific instructions)
+    //
+    // var bb = 1;
+    // var result = 0;
+    // do {
+    //     const bit = mask & -mask;
+    //     mask &= ~bit;
+    //     const source_bit = source & bit;
+    //     if (source_bit != 0) result |= bb;
+    //     bb += bb;
+    // } while (mask)
+    //
+    // return result;
     fn buildExtractBitsEmulated(
         self: *FuncGen,
         ty: Type,
         params: [2]*llvm.Value,
     ) !*llvm.Value {
-        _ = ty;
-        _ = params;
-        return self.dg.todo("implement extract_bits emulation", .{});
+        const llvm_ty = try self.dg.lowerType(ty);
+
+        const zero = llvm_ty.constNull();
+        const one = llvm_ty.constInt(1, .False);
+        const minus_one = llvm_ty.constInt(@bitCast(c_ulonglong, @as(c_longlong, -1)), .True);
+        const source = params[0];
+        const start_mask = params[1];
+        const start_result = zero;
+        const start_bb = one;
+
+        const prev_block = self.builder.getInsertBlock();
+        const loop_block = self.context.appendBasicBlock(self.llvm_func, "Loop");
+        const after_block = self.context.appendBasicBlock(self.llvm_func, "After");
+
+        _ = self.builder.buildBr(loop_block);
+        self.builder.positionBuilderAtEnd(loop_block);
+        const mask_phi = self.builder.buildPhi(llvm_ty, "");
+        const result_phi = self.builder.buildPhi(llvm_ty, "");
+        const bb_phi = self.builder.buildPhi(llvm_ty, "");
+        const minus_mask = self.builder.buildSub(zero, mask_phi, "");
+        const bit = self.builder.buildAnd(mask_phi, minus_mask, "");
+        const not_bit = self.builder.buildXor(bit, minus_one, "");
+        const new_mask = self.builder.buildAnd(mask_phi, not_bit, "");
+        const source_bit = self.builder.buildAnd(source, bit, "");
+        const source_bit_set = self.builder.buildICmp(.NE, source_bit, zero, "");
+        const bb_or_zero = self.builder.buildSelect(source_bit_set, bb_phi, zero, ""); // avoid using control flow
+        const new_result = self.builder.buildOr(result_phi, bb_or_zero, "");
+        const new_bb = self.builder.buildAdd(bb_phi, bb_phi, "");
+        const while_cond = self.builder.buildICmp(.NE, new_mask, zero, "");
+        _ = self.builder.buildCondBr(while_cond, loop_block, after_block);
+
+        mask_phi.addIncoming(
+            &[2]*llvm.Value{ start_mask, new_mask },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        result_phi.addIncoming(
+            &[2]*llvm.Value{ start_result, new_result },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        bb_phi.addIncoming(
+            &[2]*llvm.Value{ start_bb, new_bb },
+            &[2]*llvm.BasicBlock{ prev_block, loop_block },
+            2,
+        );
+
+        self.builder.positionBuilderAtEnd(after_block);
+        const final_result = self.builder.buildPhi(llvm_ty, "");
+        final_result.addIncoming(
+            &[1]*llvm.Value{ new_result },
+            &[1]*llvm.BasicBlock{ loop_block },
+            1,
+        );
+
+        return final_result;
     }
 
     fn getErrorNameTable(self: *FuncGen) !*llvm.Value {

From 472734e43a133641c56f4f09ea046a347539f99c Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Fri, 14 Apr 2023 12:45:18 +0100
Subject: [PATCH 07/18] std.math.big.int: Fix index out-of-bounds

---
 lib/std/math/big/int.zig | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 7ea40878de21..143f971c6d57 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1761,6 +1761,9 @@ pub const Mutable = struct {
             const i_limb_bit = @intCast(u6, i % limb_bits);
 
             mut_mask.limbs[mask_limb_index] &= ~(@as(Limb, 1) << mask_limb_bit); // Unset the mask bit
+
+            if (i_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)
+
             const source_bit_set = source.limbs[i_limb_index] & (@as(Limb, 1) << i_limb_bit) != 0;
 
             r.limbs[mask_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
@@ -1798,6 +1801,9 @@ pub const Mutable = struct {
             const i_limb_bit = @intCast(u6, i % limb_bits);
 
             mut_mask.limbs[mask_limb_index] &= ~(@as(Limb, 1) << mask_limb_bit); // Unset the mask bit
+
+            if (mask_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)
+
             const source_bit_set = source.limbs[mask_limb_index] & (@as(Limb, 1) << mask_limb_bit) != 0;
 
             r.limbs[i_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;

From a4ae063ffe3285b6c46c681143b08d69d3a4e85a Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Fri, 14 Apr 2023 12:45:37 +0100
Subject: [PATCH 08/18] Add behaviour tests for `@depositBits` and
 `@extractBits`

---
 test/behavior/deposit_extract_bits.zig | 58 ++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 test/behavior/deposit_extract_bits.zig

diff --git a/test/behavior/deposit_extract_bits.zig b/test/behavior/deposit_extract_bits.zig
new file mode 100644
index 000000000000..9f2bafe22560
--- /dev/null
+++ b/test/behavior/deposit_extract_bits.zig
@@ -0,0 +1,58 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const mem = std.mem;
+const expect = std.testing.expect;
+const expectEqual = std.testing.expectEqual;
+
+test "@depositBits" {
+    if (builtin.zig_backend != .stage2_llvm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0;
+            var b: u64 = 0xFFFF_FFFF_FFFF_FFFF;
+            var c: u64 = 0x1234_5678_9012_3456;
+            var d: u64 = 0x00F0_FF00_F00F_00FF;
+            var e: u128 = @as(u128, d) << 64;
+
+            try expect(@depositBits(b, a) == 0);
+            try expect(@depositBits(a, b) == 0);
+
+            try expect(@depositBits(b, c) == c);
+            try expect(@depositBits(b, d) == d);
+
+            try expect(@depositBits(c, d) == 0x0000_1200_3004_0056);
+            try expect(@depositBits(c, e) == 0x0000_1200_3004_0056 << 64);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}
+
+test "@extractBits" {
+    if (builtin.zig_backend != .stage2_llvm) return error.SkipZigTest; // TODO
+
+    const S = struct {
+        pub fn doTheTest() !void {
+            var a: u64 = 0;
+            var b: u64 = 0xFFFF_FFFF_FFFF_FFFF;
+            var c: u64 = 0x1234_5678_9012_3456;
+            var d: u64 = 0x00F0_FF00_F00F_00FF;
+            var e: u128 = @as(u128, c) << 64;
+            var f: u128 = @as(u128, d) << 64;
+
+            try expect(@extractBits(b, a) == 0);
+            try expect(@extractBits(a, b) == 0);
+
+            try expect(@extractBits(c, b) == c);
+            try expect(@extractBits(d, b) == d);
+
+            try expect(@extractBits(c, d) == 0x0356_9256);
+            try expect(@extractBits(e, f) == 0x0356_9256);
+        }
+    };
+
+    try S.doTheTest();
+    try comptime S.doTheTest();
+}

From fb1cb9f2e8a4a59cc7fa32b220f18a9d63b1fa23 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Sat, 15 Apr 2023 12:00:06 +0100
Subject: [PATCH 09/18] zig fmt

---
 src/codegen/llvm.zig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index e2714b6dd4e0..74838e3845b6 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -9589,8 +9589,8 @@ pub const FuncGen = struct {
         self.builder.positionBuilderAtEnd(after_block);
         const final_result = self.builder.buildPhi(llvm_ty, "");
         final_result.addIncoming(
-            &[1]*llvm.Value{ new_result },
-            &[1]*llvm.BasicBlock{ loop_block },
+            &[1]*llvm.Value{new_result},
+            &[1]*llvm.BasicBlock{loop_block},
             1,
         );
 
@@ -9738,8 +9738,8 @@ pub const FuncGen = struct {
         self.builder.positionBuilderAtEnd(after_block);
         const final_result = self.builder.buildPhi(llvm_ty, "");
         final_result.addIncoming(
-            &[1]*llvm.Value{ new_result },
-            &[1]*llvm.BasicBlock{ loop_block },
+            &[1]*llvm.Value{new_result},
+            &[1]*llvm.BasicBlock{loop_block},
             1,
         );
 

From d4da312d85513af70f557994029acb534182186c Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Sat, 15 Apr 2023 12:02:06 +0100
Subject: [PATCH 10/18] Replace `u6` with `Log2Limb`

---
 lib/std/math/big/int.zig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 143f971c6d57..8f20d944ea6a 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1755,10 +1755,10 @@ pub const Mutable = struct {
             i += 1;
         }) {
             const mask_limb_index = mask_bit_index / limb_bits;
-            const mask_limb_bit = @intCast(u6, mask_bit_index % limb_bits);
+            const mask_limb_bit = @intCast(Log2Limb, mask_bit_index % limb_bits);
 
             const i_limb_index = i / limb_bits;
-            const i_limb_bit = @intCast(u6, i % limb_bits);
+            const i_limb_bit = @intCast(Log2Limb, i % limb_bits);
 
             mut_mask.limbs[mask_limb_index] &= ~(@as(Limb, 1) << mask_limb_bit); // Unset the mask bit
 
@@ -1795,10 +1795,10 @@ pub const Mutable = struct {
             i += 1;
         }) {
             const mask_limb_index = mask_bit_index / limb_bits;
-            const mask_limb_bit = @intCast(u6, mask_bit_index % limb_bits);
+            const mask_limb_bit = @intCast(Log2Limb, mask_bit_index % limb_bits);
 
             const i_limb_index = i / limb_bits;
-            const i_limb_bit = @intCast(u6, i % limb_bits);
+            const i_limb_bit = @intCast(Log2Limb, i % limb_bits);
 
             mut_mask.limbs[mask_limb_index] &= ~(@as(Limb, 1) << mask_limb_bit); // Unset the mask bit
 

From 3015a6a2c9aff3dd7b2a97d9e0397c698fed9943 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Mon, 17 Apr 2023 16:14:56 +0100
Subject: [PATCH 11/18] big.int.depositBits/extractBits: Remove limbs_buffer

Removes the requirement to copy and modify `mask`, removing the need to
clone `mask` into a `Mutable` bigint.
---
 lib/std/math/big/int.zig      | 76 ++++++++++++++++++++---------------
 lib/std/math/big/int_test.zig | 10 +----
 src/Sema.zig                  | 16 +-------
 3 files changed, 48 insertions(+), 54 deletions(-)

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 8f20d944ea6a..2862410fd371 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1736,31 +1736,37 @@ pub const Mutable = struct {
     /// r = @depositBits(source, mask)
     ///
     /// Asserts that `source` and `mask` are positive
-    ///
-    /// `limbs_buffer` is used as a working area. It must have length of at least `mask.limbs.len`.
-    pub fn depositBits(r: *Mutable, source: Const, mask: Const, limbs_buffer: []Limb) void {
+    pub fn depositBits(r: *Mutable, source: Const, mask: Const) void {
         assert(source.positive);
         assert(mask.positive);
 
         r.positive = true;
         std.mem.set(Limb, r.limbs, 0);
 
-        var mut_mask = Mutable{ .limbs = limbs_buffer[0..mask.limbs.len], .positive = undefined, .len = undefined };
-        mut_mask.copy(mask);
-
-        var mask_bit_index = mut_mask.toConst().ctz();
+        var mask_limb: Limb = mask.limbs[0];
+        var mask_limb_index: Limb = 0;
         var i: usize = 0;
-        while (!mut_mask.eqZero()) : ({
-            mask_bit_index = mut_mask.toConst().ctz();
-            i += 1;
-        }) {
-            const mask_limb_index = mask_bit_index / limb_bits;
-            const mask_limb_bit = @intCast(Log2Limb, mask_bit_index % limb_bits);
+        outer: while (true) : (i += 1) {
+            // Find next bit in mask
+            const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+                const mask_limb_tz = @ctz(mask_limb);
+                if (mask_limb_tz != @sizeOf(Limb) * 8) {
+                    const cast_limb_bit = @intCast(Log2Limb, mask_limb_tz);
+                    mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                    break :limb_bit cast_limb_bit;
+                }
 
-            const i_limb_index = i / limb_bits;
-            const i_limb_bit = @intCast(Log2Limb, i % limb_bits);
+                mask_limb_index += 1;
+                // No more limbs, we've finished iterating the mask
+                if (mask_limb_index >= mask.limbs.len) {
+                    break :outer;
+                }
+
+                mask_limb = mask.limbs[mask_limb_index];
+            };
 
-            mut_mask.limbs[mask_limb_index] &= ~(@as(Limb, 1) << mask_limb_bit); // Unset the mask bit
+            const i_limb_index = i / limb_bits;
+            const i_limb_bit = @truncate(Log2Limb, i);
 
             if (i_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)
 
@@ -1776,31 +1782,37 @@ pub const Mutable = struct {
     /// r = @extractBits(source, mask)
     ///
     /// Asserts that `source` and `mask` are positive
-    ///
-    /// `limbs_buffer` is used as a working area. It must have length of at least `mask.limbs.len`.
-    pub fn extractBits(r: *Mutable, source: Const, mask: Const, limbs_buffer: []Limb) void {
+    pub fn extractBits(r: *Mutable, source: Const, mask: Const) void {
         assert(source.positive);
         assert(mask.positive);
 
         r.positive = true;
         std.mem.set(Limb, r.limbs, 0);
 
-        var mut_mask = Mutable{ .limbs = limbs_buffer[0..mask.limbs.len], .positive = undefined, .len = undefined };
-        mut_mask.copy(mask);
-
-        var mask_bit_index = mut_mask.toConst().ctz();
+        var mask_limb: Limb = mask.limbs[0];
+        var mask_limb_index: Limb = 0;
         var i: usize = 0;
-        while (!mut_mask.eqZero()) : ({
-            mask_bit_index = mut_mask.toConst().ctz();
-            i += 1;
-        }) {
-            const mask_limb_index = mask_bit_index / limb_bits;
-            const mask_limb_bit = @intCast(Log2Limb, mask_bit_index % limb_bits);
+        outer: while (true) : (i += 1) {
+            // Find next bit in mask
+            const mask_limb_bit: Log2Limb = limb_bit: while (true) {
+                const mask_limb_tz = @ctz(mask_limb);
+                if (mask_limb_tz != @sizeOf(Limb) * 8) {
+                    const cast_limb_bit = @intCast(Log2Limb, mask_limb_tz);
+                    mask_limb ^= @as(Limb, 1) << cast_limb_bit;
+                    break :limb_bit cast_limb_bit;
+                }
 
-            const i_limb_index = i / limb_bits;
-            const i_limb_bit = @intCast(Log2Limb, i % limb_bits);
+                mask_limb_index += 1;
+                // No more limbs, we've finished iterating the mask
+                if (mask_limb_index >= mask.limbs.len) {
+                    break :outer;
+                }
+
+                mask_limb = mask.limbs[mask_limb_index];
+            };
 
-            mut_mask.limbs[mask_limb_index] &= ~(@as(Limb, 1) << mask_limb_bit); // Unset the mask bit
+            const i_limb_index = i / limb_bits;
+            const i_limb_bit = @truncate(Log2Limb, i);
 
             if (mask_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)
 
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 69ef31d484ab..63091a8e6a3d 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -2781,10 +2781,7 @@ fn extractBitsTest(comptime source: comptime_int, comptime mask: comptime_int, c
     defer testing.allocator.free(limbs);
     var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
 
-    const limbs_buffer = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
-    defer testing.allocator.free(limbs_buffer);
-
-    result.extractBits(source_bigint.toConst(), mask_bigint.toConst(), limbs_buffer);
+    result.extractBits(source_bigint.toConst(), mask_bigint.toConst());
 
     try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
 }
@@ -2808,10 +2805,7 @@ fn depositBitsTest(comptime source: comptime_int, comptime mask: comptime_int, c
     defer testing.allocator.free(limbs);
     var result = Mutable{ .limbs = limbs, .positive = undefined, .len = undefined };
 
-    const limbs_buffer = try testing.allocator.alloc(Limb, mask_bigint.limbs.len);
-    defer testing.allocator.free(limbs_buffer);
-
-    result.depositBits(source_bigint.toConst(), mask_bigint.toConst(), limbs_buffer);
+    result.depositBits(source_bigint.toConst(), mask_bigint.toConst());
 
     try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
 }
diff --git a/src/Sema.zig b/src/Sema.zig
index 2f0514fc40f1..4be441dd415c 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -36361,12 +36361,6 @@ fn intDepositBits(
     );
     defer arena.free(mask_limbs);
 
-    const limbs_buffer = try arena.alloc(
-        std.math.big.Limb,
-        rhs_bigint.limbs.len,
-    );
-    defer arena.free(limbs_buffer);
-
     var source = std.math.big.int.Mutable{ .limbs = source_limbs, .positive = undefined, .len = undefined };
     var mask = std.math.big.int.Mutable{ .limbs = mask_limbs, .positive = undefined, .len = undefined };
     var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
@@ -36374,7 +36368,7 @@ fn intDepositBits(
     source.convertToTwosComplement(lhs_bigint, info.signedness, info.bits);
     mask.convertToTwosComplement(rhs_bigint, info.signedness, info.bits);
 
-    result.depositBits(source.toConst(), mask.toConst(), limbs_buffer);
+    result.depositBits(source.toConst(), mask.toConst());
 
     result.convertFromTwosComplement(result.toConst(), info.signedness, info.bits);
     return Value.fromBigInt(arena, result.toConst());
@@ -36414,12 +36408,6 @@ fn intExtractBits(
     );
     defer arena.free(mask_limbs);
 
-    const limbs_buffer = try arena.alloc(
-        std.math.big.Limb,
-        rhs_bigint.limbs.len,
-    );
-    defer arena.free(limbs_buffer);
-
     var source = std.math.big.int.Mutable{ .limbs = source_limbs, .positive = undefined, .len = undefined };
     var mask = std.math.big.int.Mutable{ .limbs = mask_limbs, .positive = undefined, .len = undefined };
     var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
@@ -36427,7 +36415,7 @@ fn intExtractBits(
     source.convertToTwosComplement(lhs_bigint, info.signedness, info.bits);
     mask.convertToTwosComplement(rhs_bigint, info.signedness, info.bits);
 
-    result.extractBits(source.toConst(), mask.toConst(), limbs_buffer);
+    result.extractBits(source.toConst(), mask.toConst());
 
     result.convertFromTwosComplement(result.toConst(), info.signedness, info.bits);
     return Value.fromBigInt(arena, result.toConst());

From c13a54f97d528db970ece8f713dbc9ce0a5a057e Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Tue, 18 Apr 2023 14:52:46 +0100
Subject: [PATCH 12/18] Disallow signed integer types for deposit/extract

---
 lib/std/math/big/int.zig      | 34 -----------------
 lib/std/math/big/int_test.zig | 27 --------------
 src/Sema.zig                  | 70 ++++++++---------------------------
 3 files changed, 16 insertions(+), 115 deletions(-)

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 2862410fd371..93edb9255388 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1859,40 +1859,6 @@ pub const Mutable = struct {
         r.normalize(r.len);
     }
 
-    /// Converts a twos-complement value to a magnitude, and sets the sign of `r` to match.
-    /// `a.positive` is ignored
-    /// r may alias a
-    ///
-    /// Asserts `r` has enough storage to store the result.
-    /// The upper bound is `calcTwosCompLimbCount(bit_count)`
-    pub fn convertFromTwosComplement(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
-        const req_limbs = calcTwosCompLimbCount(bit_count);
-        if (req_limbs == 0 or a.eqZero()) {
-            r.set(0);
-            return;
-        }
-
-        const bit = @truncate(Log2Limb, bit_count - 1);
-        const signmask = @as(Limb, 1) << bit;
-        const mask = (signmask << 1) -% 1;
-
-        if (signedness == .unsigned or req_limbs > a.limbs.len or a.limbs[req_limbs - 1] & signmask == 0) {
-            r.truncate(a, signedness, bit_count);
-            return;
-        }
-
-        r.copy(a);
-        assert(r.limbs.len >= req_limbs);
-        r.len = req_limbs;
-
-        r.addScalar(r.toConst(), -1);
-        llnot(r.limbs[0..r.len]);
-        r.limbs[r.len - 1] &= mask;
-
-        r.positive = false;
-        r.normalize(r.len);
-    }
-
     /// Truncate an integer to a number of bits, following 2s-complement semantics.
     /// r may alias a.
     ///
diff --git a/lib/std/math/big/int_test.zig b/lib/std/math/big/int_test.zig
index 63091a8e6a3d..25705bdcf985 100644
--- a/lib/std/math/big/int_test.zig
+++ b/lib/std/math/big/int_test.zig
@@ -2810,33 +2810,6 @@ fn depositBitsTest(comptime source: comptime_int, comptime mask: comptime_int, c
     try testing.expectEqual(std.math.Order.eq, result.toConst().orderAgainstScalar(expected));
 }
 
-test "big int conversion to/from twos complement" {
-    var a = try Managed.initSet(testing.allocator, maxInt(u64));
-    defer a.deinit();
-    var b = try Managed.initSet(testing.allocator, maxInt(u32));
-    defer b.deinit();
-    var c = try Managed.initSet(testing.allocator, maxInt(u493));
-    defer c.deinit();
-
-    var m_a = a.toMutable();
-    m_a.convertToTwosComplement(m_a.toConst(), .unsigned, 64);
-    try testing.expectEqual(m_a.toConst().orderAgainstScalar(maxInt(u64)), .eq);
-    m_a.convertFromTwosComplement(m_a.toConst(), .signed, 64);
-    try testing.expectEqual(m_a.toConst().orderAgainstScalar(-1), .eq);
-
-    var m_b = b.toMutable();
-    m_b.convertToTwosComplement(m_b.toConst(), .unsigned, 32);
-    try testing.expectEqual(m_b.toConst().orderAgainstScalar(maxInt(u32)), .eq);
-    m_b.convertFromTwosComplement(m_b.toConst(), .signed, 32);
-    try testing.expectEqual(m_b.toConst().orderAgainstScalar(-1), .eq);
-
-    var m_c = c.toMutable();
-    m_c.convertToTwosComplement(m_c.toConst(), .unsigned, 493);
-    try testing.expectEqual(m_c.toConst().orderAgainstScalar(maxInt(u493)), .eq);
-    m_c.convertFromTwosComplement(m_c.toConst(), .signed, 493);
-    try testing.expectEqual(m_c.toConst().orderAgainstScalar(-1), .eq);
-}
-
 test "big int conversion read/write twos complement" {
     var a = try Managed.initSet(testing.allocator, (1 << 493) - 1);
     defer a.deinit();
diff --git a/src/Sema.zig b/src/Sema.zig
index 4be441dd415c..cdead89ef033 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -24165,12 +24165,12 @@ fn zirDepositExtractBits(
     const lhs_ty = sema.typeOf(uncasted_lhs);
     const rhs_ty = sema.typeOf(uncasted_rhs);
 
-    if (lhs_ty.zigTypeTag() != .Int) {
-        return sema.fail(block, lhs_src, "expected integer type, found '{}'", .{lhs_ty.fmt(sema.mod)});
+    if (!lhs_ty.isUnsignedInt()) {
+        return sema.fail(block, lhs_src, "expected unsigned integer type, found '{}'", .{lhs_ty.fmt(sema.mod)});
     }
 
-    if (rhs_ty.zigTypeTag() != .Int) {
-        return sema.fail(block, rhs_src, "expected integer type, found '{}'", .{rhs_ty.fmt(sema.mod)});
+    if (!rhs_ty.isUnsignedInt()) {
+        return sema.fail(block, rhs_src, "expected unsigned integer type, found '{}'", .{rhs_ty.fmt(sema.mod)});
     }
 
     const instructions = &[_]Air.Inst.Ref{ uncasted_lhs, uncasted_rhs };
@@ -24189,16 +24189,12 @@ fn zirDepositExtractBits(
     // If either of the operands are zero, the result is zero
     // If either of the operands are undefined, the result is undefined
     if (maybe_lhs_val) |lhs_val| {
+        if (try lhs_val.compareAllWithZeroAdvanced(.eq, sema)) return sema.addConstant(dest_ty, Value.zero);
         if (lhs_val.isUndef()) return sema.addConstUndef(dest_ty);
-        if (try lhs_val.compareAllWithZeroAdvanced(.eq, sema)) {
-            return sema.addConstant(dest_ty, Value.zero);
-        }
     }
     if (maybe_rhs_val) |rhs_val| {
+        if (try rhs_val.compareAllWithZeroAdvanced(.eq, sema)) return sema.addConstant(dest_ty, Value.zero);
         if (rhs_val.isUndef()) return sema.addConstUndef(dest_ty);
-        if (try rhs_val.compareAllWithZeroAdvanced(.eq, sema)) {
-            return sema.addConstant(dest_ty, Value.zero);
-        }
     }
 
     if (maybe_lhs_val) |lhs_val| {
@@ -36339,38 +36335,21 @@ fn intDepositBits(
     const arena = sema.arena;
     const info = ty.intInfo(target);
 
+    assert(ty.intInfo(target).signedness == .unsigned);
+
     var lhs_space: Value.BigIntSpace = undefined;
     var rhs_space: Value.BigIntSpace = undefined;
-    const lhs_bigint = lhs.toBigInt(&lhs_space, target);
-    const rhs_bigint = rhs.toBigInt(&rhs_space, target);
+    const source = lhs.toBigInt(&lhs_space, target);
+    const mask = rhs.toBigInt(&rhs_space, target);
 
     const result_limbs = try arena.alloc(
         std.math.big.Limb,
         std.math.big.int.calcTwosCompLimbCount(info.bits),
     );
 
-    const source_limbs = try arena.alloc(
-        std.math.big.Limb,
-        std.math.big.int.calcTwosCompLimbCount(info.bits),
-    );
-    defer arena.free(source_limbs);
-
-    const mask_limbs = try arena.alloc(
-        std.math.big.Limb,
-        std.math.big.int.calcTwosCompLimbCount(info.bits),
-    );
-    defer arena.free(mask_limbs);
-
-    var source = std.math.big.int.Mutable{ .limbs = source_limbs, .positive = undefined, .len = undefined };
-    var mask = std.math.big.int.Mutable{ .limbs = mask_limbs, .positive = undefined, .len = undefined };
     var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
 
-    source.convertToTwosComplement(lhs_bigint, info.signedness, info.bits);
-    mask.convertToTwosComplement(rhs_bigint, info.signedness, info.bits);
-
-    result.depositBits(source.toConst(), mask.toConst());
-
-    result.convertFromTwosComplement(result.toConst(), info.signedness, info.bits);
+    result.depositBits(source, mask);
     return Value.fromBigInt(arena, result.toConst());
 }
 
@@ -36386,38 +36365,21 @@ fn intExtractBits(
     const arena = sema.arena;
     const info = ty.intInfo(target);
 
+    assert(ty.intInfo(target).signedness == .unsigned);
+
     var lhs_space: Value.BigIntSpace = undefined;
     var rhs_space: Value.BigIntSpace = undefined;
-    const lhs_bigint = lhs.toBigInt(&lhs_space, target);
-    const rhs_bigint = rhs.toBigInt(&rhs_space, target);
+    const source = lhs.toBigInt(&lhs_space, target);
+    const mask = rhs.toBigInt(&rhs_space, target);
 
     const result_limbs = try arena.alloc(
         std.math.big.Limb,
         std.math.big.int.calcTwosCompLimbCount(info.bits),
     );
 
-    const source_limbs = try arena.alloc(
-        std.math.big.Limb,
-        std.math.big.int.calcTwosCompLimbCount(info.bits),
-    );
-    defer arena.free(source_limbs);
-
-    const mask_limbs = try arena.alloc(
-        std.math.big.Limb,
-        std.math.big.int.calcTwosCompLimbCount(info.bits),
-    );
-    defer arena.free(mask_limbs);
-
-    var source = std.math.big.int.Mutable{ .limbs = source_limbs, .positive = undefined, .len = undefined };
-    var mask = std.math.big.int.Mutable{ .limbs = mask_limbs, .positive = undefined, .len = undefined };
     var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
 
-    source.convertToTwosComplement(lhs_bigint, info.signedness, info.bits);
-    mask.convertToTwosComplement(rhs_bigint, info.signedness, info.bits);
-
-    result.extractBits(source.toConst(), mask.toConst());
-
-    result.convertFromTwosComplement(result.toConst(), info.signedness, info.bits);
+    result.extractBits(source, mask);
     return Value.fromBigInt(arena, result.toConst());
 }
 

From b2bba7af83735cbaac67cc03bcdf7149a8003808 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Wed, 19 Apr 2023 11:18:19 +0100
Subject: [PATCH 13/18] Actually use deposit/extract behaviour test

---
 test/behavior.zig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/behavior.zig b/test/behavior.zig
index 6e9435c49ef4..3006a7602a3c 100644
--- a/test/behavior.zig
+++ b/test/behavior.zig
@@ -152,6 +152,7 @@ test {
     _ = @import("behavior/const_slice_child.zig");
     _ = @import("behavior/decltest.zig");
     _ = @import("behavior/duplicated_test_names.zig");
+    _ = @import("behavior/deposit_extract_bits.zig");
     _ = @import("behavior/defer.zig");
     _ = @import("behavior/empty_tuple_fields.zig");
     _ = @import("behavior/empty_union.zig");

From 3792b6e6f024cbe44ce8bebc95af057e682cade1 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Wed, 19 Apr 2023 11:40:21 +0100
Subject: [PATCH 14/18] Enable langref tests for deposit and extract

---
 doc/langref.html.in | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/doc/langref.html.in b/doc/langref.html.in
index db049649afd8..e1c1aa268c43 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -8295,7 +8295,7 @@ test "main" {
       {#header_open|@depositBits#}
       <pre>{#syntax#}@depositBits(source: T, mask: T) T{#endsyntax#}</pre>
       <p>
-      {#syntax#}@TypeOf(source){#endsyntax#} must be an integer type.
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or a `comptime_int` (for which both parameters must be positive). `T` is determined by peer-type resolution.
       </p>
       <p>
       Uses a mask to transfer contiguous lower bits in the {#syntax#}source{#endsyntax#} operand to the destination, transferring them to the corresponding bits in the destination that are set in the mask. All other bits in the destination are zeroed.
@@ -8307,8 +8307,7 @@ test "main" {
       Example:
       </p>
 
-      <!-- TODO make this a test when implemented-->
-      {#syntax_block|zig|@depositBits test#}
+      {#code_begin|test|test_depositbits_builtin#}
 const std = @import("std");
 
 test "deposit bits" {
@@ -8316,7 +8315,7 @@ test "deposit bits" {
         try std.testing.expectEqual(@depositBits(0x00001234, 0xf0f0f0f0), 0x10203040);
     }
 }
-      {#end_syntax_block#}
+      {#code_end#}
       {#see_also|@extractBits#}
       {#header_close#}
 
@@ -8493,7 +8492,7 @@ export fn @"A function name that is a complete sentence."() void {}
       {#header_open|@extractBits#}
       <pre>{#syntax#}@extractBits(source: T, mask: T) T{#endsyntax#}</pre>
       <p>
-      {#syntax#}T{#endsyntax#} must be an integer type.
+      {#syntax#}T{#endsyntax#} must be an unsigned integer type, or a `comptime_int` (for which both parameters must be positive). `T` is determined by peer-type resolution.
       </p>
       <p>
       Uses a mask to transfer bits in the {#syntax#}source{#endsyntax#} operand to the destination, writing them as contiguous lower bits in the destination. The upper bits of the destination are zeroed.
@@ -8505,8 +8504,7 @@ export fn @"A function name that is a complete sentence."() void {}
       Example:
       </p>
 
-      <!-- TODO Make this a test when implemented -->
-      {#syntax_block|zig|@extractBits test#}
+      {#code_begin|test|test_depositbits_builtin#}
 const std = @import("std");
 
 test "extract bits" {
@@ -8514,7 +8512,7 @@ test "extract bits" {
         try std.testing.expectEqual(@extractBits(0x12345678, 0xf0f0f0f0), 0x00001357);
     }
 }
-      {#end_syntax_block#}
+      {#code_end#}
       {#see_also|@depositBits#}
       {#header_close#}
 

From 1843795f6575522647430bba12be56141a99d7d7 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Wed, 19 Apr 2023 19:16:56 +0100
Subject: [PATCH 15/18] Allow use of `comptime_int` with deposit/extract

---
 src/Sema.zig | 52 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/src/Sema.zig b/src/Sema.zig
index cdead89ef033..8e070ecea6c9 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -24153,6 +24153,7 @@ fn zirDepositExtractBits(
     extended: Zir.Inst.Extended.InstData,
     air_tag: Air.Inst.Tag,
 ) CompileError!Air.Inst.Ref {
+    const target = sema.mod.getTarget();
     const extra = sema.code.extraData(Zir.Inst.BinNode, extended.operand).data;
     const src = LazySrcLoc.nodeOffset(extra.node);
 
@@ -24165,12 +24166,12 @@ fn zirDepositExtractBits(
     const lhs_ty = sema.typeOf(uncasted_lhs);
     const rhs_ty = sema.typeOf(uncasted_rhs);
 
-    if (!lhs_ty.isUnsignedInt()) {
-        return sema.fail(block, lhs_src, "expected unsigned integer type, found '{}'", .{lhs_ty.fmt(sema.mod)});
+    if (!lhs_ty.isUnsignedInt() and lhs_ty.zigTypeTag() != .ComptimeInt) {
+        return sema.fail(block, lhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{lhs_ty.fmt(sema.mod)});
     }
 
-    if (!rhs_ty.isUnsignedInt()) {
-        return sema.fail(block, rhs_src, "expected unsigned integer type, found '{}'", .{rhs_ty.fmt(sema.mod)});
+    if (!rhs_ty.isUnsignedInt() and rhs_ty.zigTypeTag() != .ComptimeInt) {
+        return sema.fail(block, rhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{rhs_ty.fmt(sema.mod)});
     }
 
     const instructions = &[_]Air.Inst.Ref{ uncasted_lhs, uncasted_rhs };
@@ -24178,7 +24179,30 @@ fn zirDepositExtractBits(
         .override = &[_]?LazySrcLoc{ lhs_src, rhs_src },
     });
 
-    assert(dest_ty.zigTypeTag() == .Int);
+    // This branch is only true if *both* parameters are comptime_ints.
+    if (dest_ty.zigTypeTag() == .ComptimeInt) {
+        const builtin_name = switch (air_tag) {
+            .deposit_bits => "@depositBits",
+            .extract_bits => "@extractBits",
+            else => unreachable,
+        };
+
+        const lhs_val = (try sema.resolveMaybeUndefVal(uncasted_lhs)).?;
+        if (lhs_val.compareHetero(.lt, Value.zero, target)) {
+            const err = try sema.errMsg(block, lhs_src, "use of negative value '{}'", .{lhs_val.fmtValue(lhs_ty, sema.mod)});
+            try sema.errNote(block, src, err, "{s} requires parameters of type 'comptime_int' be positive", .{builtin_name});
+            return sema.failWithOwnedErrorMsg(err);
+        }
+
+        const rhs_val = (try sema.resolveMaybeUndefVal(uncasted_rhs)).?;
+        if (rhs_val.compareHetero(.lt, Value.zero, target)) {
+            const err = try sema.errMsg(block, rhs_src, "use of negative value '{}'", .{rhs_val.fmtValue(rhs_ty, sema.mod)});
+            try sema.errNote(block, src, err, "{s} requires parameters of type 'comptime_int' be positive", .{builtin_name});
+            return sema.failWithOwnedErrorMsg(err);
+        }
+    }
+
+    assert(dest_ty.isUnsignedInt() or dest_ty.zigTypeTag() == .ComptimeInt);
 
     const lhs = try sema.coerce(block, dest_ty, uncasted_lhs, lhs_src);
     const rhs = try sema.coerce(block, dest_ty, uncasted_rhs, rhs_src);
@@ -24200,8 +24224,8 @@ fn zirDepositExtractBits(
     if (maybe_lhs_val) |lhs_val| {
         if (maybe_rhs_val) |rhs_val| {
             const dest_val = switch (air_tag) {
-                .deposit_bits => try sema.intDepositBits(lhs_val, rhs_val, dest_ty),
-                .extract_bits => try sema.intExtractBits(lhs_val, rhs_val, dest_ty),
+                .deposit_bits => try sema.intDepositBits(lhs_val, rhs_val),
+                .extract_bits => try sema.intExtractBits(lhs_val, rhs_val),
                 else => unreachable,
             };
 
@@ -36323,19 +36347,16 @@ fn intAddWithOverflowScalar(
     };
 }
 
+/// Asserts that the values are positive
 fn intDepositBits(
     sema: *Sema,
     lhs: Value,
     rhs: Value,
-    ty: Type,
 ) !Value {
     // TODO is this a performance issue? maybe we should try the operation without
     // resorting to BigInt first. For non-bigints, @intDeposit could be used?
     const target = sema.mod.getTarget();
     const arena = sema.arena;
-    const info = ty.intInfo(target);
-
-    assert(ty.intInfo(target).signedness == .unsigned);
 
     var lhs_space: Value.BigIntSpace = undefined;
     var rhs_space: Value.BigIntSpace = undefined;
@@ -36344,7 +36365,7 @@ fn intDepositBits(
 
     const result_limbs = try arena.alloc(
         std.math.big.Limb,
-        std.math.big.int.calcTwosCompLimbCount(info.bits),
+        mask.limbs.len,
     );
 
     var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
@@ -36353,19 +36374,16 @@ fn intDepositBits(
     return Value.fromBigInt(arena, result.toConst());
 }
 
+/// Asserts that the values are positive
 fn intExtractBits(
     sema: *Sema,
     lhs: Value,
     rhs: Value,
-    ty: Type,
 ) !Value {
     // TODO is this a performance issue? maybe we should try the operation without
     // resorting to BigInt first. For non-bigints, @intExtract could be used?
     const target = sema.mod.getTarget();
     const arena = sema.arena;
-    const info = ty.intInfo(target);
-
-    assert(ty.intInfo(target).signedness == .unsigned);
 
     var lhs_space: Value.BigIntSpace = undefined;
     var rhs_space: Value.BigIntSpace = undefined;
@@ -36374,7 +36392,7 @@ fn intExtractBits(
 
     const result_limbs = try arena.alloc(
         std.math.big.Limb,
-        std.math.big.int.calcTwosCompLimbCount(info.bits),
+        mask.limbs.len,
     );
 
     var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };

From af42a76a456584c2661cf1e45f372faa3b54f816 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Wed, 19 Apr 2023 22:11:58 +0100
Subject: [PATCH 16/18] Improve compile errors for negative values

---
 src/Sema.zig | 73 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 25 deletions(-)

diff --git a/src/Sema.zig b/src/Sema.zig
index 8e070ecea6c9..f2df1ae88e27 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -24179,37 +24179,60 @@ fn zirDepositExtractBits(
         .override = &[_]?LazySrcLoc{ lhs_src, rhs_src },
     });
 
-    // This branch is only true if *both* parameters are comptime_ints.
-    if (dest_ty.zigTypeTag() == .ComptimeInt) {
-        const builtin_name = switch (air_tag) {
-            .deposit_bits => "@depositBits",
-            .extract_bits => "@extractBits",
-            else => unreachable,
-        };
-
-        const lhs_val = (try sema.resolveMaybeUndefVal(uncasted_lhs)).?;
-        if (lhs_val.compareHetero(.lt, Value.zero, target)) {
-            const err = try sema.errMsg(block, lhs_src, "use of negative value '{}'", .{lhs_val.fmtValue(lhs_ty, sema.mod)});
-            try sema.errNote(block, src, err, "{s} requires parameters of type 'comptime_int' be positive", .{builtin_name});
-            return sema.failWithOwnedErrorMsg(err);
-        }
-
-        const rhs_val = (try sema.resolveMaybeUndefVal(uncasted_rhs)).?;
-        if (rhs_val.compareHetero(.lt, Value.zero, target)) {
-            const err = try sema.errMsg(block, rhs_src, "use of negative value '{}'", .{rhs_val.fmtValue(rhs_ty, sema.mod)});
-            try sema.errNote(block, src, err, "{s} requires parameters of type 'comptime_int' be positive", .{builtin_name});
-            return sema.failWithOwnedErrorMsg(err);
-        }
-    }
+    const builtin_name = switch (air_tag) {
+        .deposit_bits => "@depositBits",
+        .extract_bits => "@extractBits",
+        else => unreachable,
+    };
 
-    assert(dest_ty.isUnsignedInt() or dest_ty.zigTypeTag() == .ComptimeInt);
+    // Coercion errors are intercepted to add a note if the caller is attempting to pass a negative comptime_int
+    const lhs = sema.coerce(block, dest_ty, uncasted_lhs, lhs_src) catch |err| switch (err) {
+        error.AnalysisFail => {
+            const msg = sema.err orelse return err;
+            const val = (try sema.resolveMaybeUndefVal(uncasted_lhs)).?;
+            if (val.compareHetero(.lt, Value.zero, target)) {
+                try sema.errNote(block, src, msg, "parameters to {s} must be positive", .{builtin_name});
+            }
+            return err;
+        },
+        else => return err,
+    };
 
-    const lhs = try sema.coerce(block, dest_ty, uncasted_lhs, lhs_src);
-    const rhs = try sema.coerce(block, dest_ty, uncasted_rhs, rhs_src);
+    const rhs = sema.coerce(block, dest_ty, uncasted_rhs, rhs_src) catch |err| switch (err) {
+        error.AnalysisFail => {
+            const msg = sema.err orelse return err;
+            const val = (try sema.resolveMaybeUndefVal(uncasted_rhs)).?;
+            if (val.compareHetero(.lt, Value.zero, target)) {
+                try sema.errNote(block, src, msg, "parameters to {s} must be positive", .{builtin_name});
+            }
+            return err;
+        },
+        else => return err,
+    };
 
     const maybe_lhs_val = try sema.resolveMaybeUndefVal(lhs);
     const maybe_rhs_val = try sema.resolveMaybeUndefVal(rhs);
 
+    // We check for negative values here only if the type is a comptime_int, as negative values
+    // would have otherwise been filtered out by coercion and the unsigned type restriction
+    if (dest_ty.zigTypeTag() == .ComptimeInt) {
+        if (maybe_lhs_val) |lhs_val| {
+            if (!lhs_val.isUndef() and lhs_val.compareHetero(.lt, Value.zero, target)) {
+                const err = try sema.errMsg(block, lhs_src, "use of negative value '{}'", .{lhs_val.fmtValue(lhs_ty, sema.mod)});
+                try sema.errNote(block, src, err, "parameters to {s} must be positive", .{builtin_name});
+                return sema.failWithOwnedErrorMsg(err);
+            }
+        }
+
+        if (maybe_rhs_val) |rhs_val| {
+            if (!rhs_val.isUndef() and rhs_val.compareHetero(.lt, Value.zero, target)) {
+                const err = try sema.errMsg(block, rhs_src, "use of negative value '{}'", .{rhs_val.fmtValue(rhs_ty, sema.mod)});
+                try sema.errNote(block, src, err, "parameters to {s} must be positive", .{builtin_name});
+                return sema.failWithOwnedErrorMsg(err);
+            }
+        }
+    }
+
     // If either of the operands are zero, the result is zero
     // If either of the operands are undefined, the result is undefined
     if (maybe_lhs_val) |lhs_val| {

From 1e8c707cf2984fac0398a96624f103b6cb9aa550 Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Fri, 21 Apr 2023 09:53:30 +0100
Subject: [PATCH 17/18] update comments

---
 src/codegen/llvm.zig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 74838e3845b6..6d8fca448a0d 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -9518,7 +9518,7 @@ pub const FuncGen = struct {
         return self.builder.buildTrunc(result, llvm_trunc_ty, "");
     }
 
-    // TODO Should this belong in compiler-rt?
+    // TODO Move this to compiler-rt (see #14609)
     //
     // Implements @depositBits(source, mask) in software
     // (i.e. without platform-specific instructions)
@@ -9665,7 +9665,7 @@ pub const FuncGen = struct {
         return self.builder.buildTrunc(result, llvm_trunc_ty, "");
     }
 
-    // TODO Should this belong in compiler-rt?
+    // TODO Move this to compiler-rt (see #14609)
     //
     // Implements @extractBits(source, mask) in software
     // (i.e. without platform-specific instructions)

From 3d6e308cbfcec99b9cf59f2206f5898c92acb3bc Mon Sep 17 00:00:00 2001
From: ominitay <37453713+ominitay@users.noreply.github.com>
Date: Sun, 18 Jun 2023 13:43:40 +0100
Subject: [PATCH 18/18] Bring changes up-to-date with master

---
 lib/std/math/big/int.zig |  4 +--
 src/Air.zig              |  2 ++
 src/Liveness/Verify.zig  |  2 ++
 src/Sema.zig             | 54 +++++++++++++++++++++-------------------
 src/codegen/llvm.zig     | 12 ++++-----
 5 files changed, 40 insertions(+), 34 deletions(-)

diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig
index 93edb9255388..2e973f42be4e 100644
--- a/lib/std/math/big/int.zig
+++ b/lib/std/math/big/int.zig
@@ -1741,7 +1741,7 @@ pub const Mutable = struct {
         assert(mask.positive);
 
         r.positive = true;
-        std.mem.set(Limb, r.limbs, 0);
+        @memset(r.limbs, 0);
 
         var mask_limb: Limb = mask.limbs[0];
         var mask_limb_index: Limb = 0;
@@ -1787,7 +1787,7 @@ pub const Mutable = struct {
         assert(mask.positive);
 
         r.positive = true;
-        std.mem.set(Limb, r.limbs, 0);
+        @memset(r.limbs, 0);
 
         var mask_limb: Limb = mask.limbs[0];
         var mask_limb_index: Limb = 0;
diff --git a/src/Air.zig b/src/Air.zig
index 64e6d918ea7e..8a080efb3576 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -1751,6 +1751,8 @@ pub fn mustLower(air: Air, inst: Air.Inst.Index, ip: *const InternPool) bool {
         .work_item_id,
         .work_group_size,
         .work_group_id,
+        .deposit_bits,
+        .extract_bits,
         => false,
 
         .assembly => @truncate(u1, air.extraData(Air.Asm, data.ty_pl.payload).data.flags >> 31) != 0,
diff --git a/src/Liveness/Verify.zig b/src/Liveness/Verify.zig
index 904e38007353..f81f62ad0a3d 100644
--- a/src/Liveness/Verify.zig
+++ b/src/Liveness/Verify.zig
@@ -261,6 +261,8 @@ fn verifyBody(self: *Verify, body: []const Air.Inst.Index) Error!void {
             .memset,
             .memset_safe,
             .memcpy,
+            .deposit_bits,
+            .extract_bits,
             => {
                 const bin_op = data[inst].bin_op;
                 try self.verifyInstOperands(inst, .{ bin_op.lhs, bin_op.rhs, .none });
diff --git a/src/Sema.zig b/src/Sema.zig
index f2df1ae88e27..a7baab078f63 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -24153,7 +24153,7 @@ fn zirDepositExtractBits(
     extended: Zir.Inst.Extended.InstData,
     air_tag: Air.Inst.Tag,
 ) CompileError!Air.Inst.Ref {
-    const target = sema.mod.getTarget();
+    const mod = sema.mod;
     const extra = sema.code.extraData(Zir.Inst.BinNode, extended.operand).data;
     const src = LazySrcLoc.nodeOffset(extra.node);
 
@@ -24166,12 +24166,12 @@ fn zirDepositExtractBits(
     const lhs_ty = sema.typeOf(uncasted_lhs);
     const rhs_ty = sema.typeOf(uncasted_rhs);
 
-    if (!lhs_ty.isUnsignedInt() and lhs_ty.zigTypeTag() != .ComptimeInt) {
-        return sema.fail(block, lhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{lhs_ty.fmt(sema.mod)});
+    if (!lhs_ty.isUnsignedInt(mod) and lhs_ty.zigTypeTag(mod) != .ComptimeInt) {
+        return sema.fail(block, lhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{lhs_ty.fmt(mod)});
     }
 
-    if (!rhs_ty.isUnsignedInt() and rhs_ty.zigTypeTag() != .ComptimeInt) {
-        return sema.fail(block, rhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{rhs_ty.fmt(sema.mod)});
+    if (!rhs_ty.isUnsignedInt(mod) and rhs_ty.zigTypeTag(mod) != .ComptimeInt) {
+        return sema.fail(block, rhs_src, "expected unsigned integer or 'comptime_int', found '{}'", .{rhs_ty.fmt(mod)});
     }
 
     const instructions = &[_]Air.Inst.Ref{ uncasted_lhs, uncasted_rhs };
@@ -24190,7 +24190,7 @@ fn zirDepositExtractBits(
         error.AnalysisFail => {
             const msg = sema.err orelse return err;
             const val = (try sema.resolveMaybeUndefVal(uncasted_lhs)).?;
-            if (val.compareHetero(.lt, Value.zero, target)) {
+            if (val.orderAgainstZero(mod) == .lt) {
                 try sema.errNote(block, src, msg, "parameters to {s} must be positive", .{builtin_name});
             }
             return err;
@@ -24202,7 +24202,7 @@ fn zirDepositExtractBits(
         error.AnalysisFail => {
             const msg = sema.err orelse return err;
             const val = (try sema.resolveMaybeUndefVal(uncasted_rhs)).?;
-            if (val.compareHetero(.lt, Value.zero, target)) {
+            if (val.orderAgainstZero(mod) == .lt) {
                 try sema.errNote(block, src, msg, "parameters to {s} must be positive", .{builtin_name});
             }
             return err;
@@ -24215,18 +24215,18 @@ fn zirDepositExtractBits(
 
     // We check for negative values here only if the type is a comptime_int, as negative values
     // would have otherwise been filtered out by coercion and the unsigned type restriction
-    if (dest_ty.zigTypeTag() == .ComptimeInt) {
+    if (dest_ty.zigTypeTag(mod) == .ComptimeInt) {
         if (maybe_lhs_val) |lhs_val| {
-            if (!lhs_val.isUndef() and lhs_val.compareHetero(.lt, Value.zero, target)) {
-                const err = try sema.errMsg(block, lhs_src, "use of negative value '{}'", .{lhs_val.fmtValue(lhs_ty, sema.mod)});
+            if (!lhs_val.isUndef(mod) and lhs_val.orderAgainstZero(mod) == .lt) {
+                const err = try sema.errMsg(block, lhs_src, "use of negative value '{}'", .{lhs_val.fmtValue(lhs_ty, mod)});
                 try sema.errNote(block, src, err, "parameters to {s} must be positive", .{builtin_name});
                 return sema.failWithOwnedErrorMsg(err);
             }
         }
 
         if (maybe_rhs_val) |rhs_val| {
-            if (!rhs_val.isUndef() and rhs_val.compareHetero(.lt, Value.zero, target)) {
-                const err = try sema.errMsg(block, rhs_src, "use of negative value '{}'", .{rhs_val.fmtValue(rhs_ty, sema.mod)});
+            if (!rhs_val.isUndef(mod) and rhs_val.orderAgainstZero(mod) == .lt) {
+                const err = try sema.errMsg(block, rhs_src, "use of negative value '{}'", .{rhs_val.fmtValue(rhs_ty, mod)});
                 try sema.errNote(block, src, err, "parameters to {s} must be positive", .{builtin_name});
                 return sema.failWithOwnedErrorMsg(err);
             }
@@ -24236,19 +24236,19 @@ fn zirDepositExtractBits(
     // If either of the operands are zero, the result is zero
     // If either of the operands are undefined, the result is undefined
     if (maybe_lhs_val) |lhs_val| {
-        if (try lhs_val.compareAllWithZeroAdvanced(.eq, sema)) return sema.addConstant(dest_ty, Value.zero);
-        if (lhs_val.isUndef()) return sema.addConstUndef(dest_ty);
+        if (lhs_val.orderAgainstZero(mod) == .eq) return sema.addConstant(dest_ty, try mod.intValue(dest_ty, 0));
+        if (lhs_val.isUndef(mod)) return sema.addConstUndef(dest_ty);
     }
     if (maybe_rhs_val) |rhs_val| {
-        if (try rhs_val.compareAllWithZeroAdvanced(.eq, sema)) return sema.addConstant(dest_ty, Value.zero);
-        if (rhs_val.isUndef()) return sema.addConstUndef(dest_ty);
+        if (rhs_val.orderAgainstZero(mod) == .lt) return sema.addConstant(dest_ty, try mod.intValue(dest_ty, 0));
+        if (rhs_val.isUndef(mod)) return sema.addConstUndef(dest_ty);
     }
 
     if (maybe_lhs_val) |lhs_val| {
         if (maybe_rhs_val) |rhs_val| {
             const dest_val = switch (air_tag) {
-                .deposit_bits => try sema.intDepositBits(lhs_val, rhs_val),
-                .extract_bits => try sema.intExtractBits(lhs_val, rhs_val),
+                .deposit_bits => try sema.intDepositBits(lhs_val, rhs_val, dest_ty),
+                .extract_bits => try sema.intExtractBits(lhs_val, rhs_val, dest_ty),
                 else => unreachable,
             };
 
@@ -36375,16 +36375,17 @@ fn intDepositBits(
     sema: *Sema,
     lhs: Value,
     rhs: Value,
+    ty: Type,
 ) !Value {
     // TODO is this a performance issue? maybe we should try the operation without
     // resorting to BigInt first. For non-bigints, @intDeposit could be used?
-    const target = sema.mod.getTarget();
+    const mod = sema.mod;
     const arena = sema.arena;
 
     var lhs_space: Value.BigIntSpace = undefined;
     var rhs_space: Value.BigIntSpace = undefined;
-    const source = lhs.toBigInt(&lhs_space, target);
-    const mask = rhs.toBigInt(&rhs_space, target);
+    const source = lhs.toBigInt(&lhs_space, mod);
+    const mask = rhs.toBigInt(&rhs_space, mod);
 
     const result_limbs = try arena.alloc(
         std.math.big.Limb,
@@ -36394,7 +36395,7 @@ fn intDepositBits(
     var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
 
     result.depositBits(source, mask);
-    return Value.fromBigInt(arena, result.toConst());
+    return mod.intValue_big(ty, result.toConst());
 }
 
 /// Asserts that the values are positive
@@ -36402,16 +36403,17 @@ fn intExtractBits(
     sema: *Sema,
     lhs: Value,
     rhs: Value,
+    ty: Type,
 ) !Value {
     // TODO is this a performance issue? maybe we should try the operation without
     // resorting to BigInt first. For non-bigints, @intExtract could be used?
-    const target = sema.mod.getTarget();
+    const mod = sema.mod;
     const arena = sema.arena;
 
     var lhs_space: Value.BigIntSpace = undefined;
     var rhs_space: Value.BigIntSpace = undefined;
-    const source = lhs.toBigInt(&lhs_space, target);
-    const mask = rhs.toBigInt(&rhs_space, target);
+    const source = lhs.toBigInt(&lhs_space, mod);
+    const mask = rhs.toBigInt(&rhs_space, mod);
 
     const result_limbs = try arena.alloc(
         std.math.big.Limb,
@@ -36421,7 +36423,7 @@ fn intExtractBits(
     var result = std.math.big.int.Mutable{ .limbs = result_limbs, .positive = undefined, .len = undefined };
 
     result.extractBits(source, mask);
-    return Value.fromBigInt(arena, result.toConst());
+    return mod.intValue_big(ty, result.toConst());
 }
 
 /// Asserts the values are comparable. Both operands have type `ty`.
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 6d8fca448a0d..cdd5277570e9 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -9456,7 +9456,7 @@ pub const FuncGen = struct {
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
         const rhs = try self.resolveInst(bin_op.rhs);
-        const inst_ty = self.air.typeOfIndex(inst);
+        const inst_ty = self.typeOfIndex(inst);
 
         const target = self.dg.module.getTarget();
         const params = [2]*llvm.Value{ lhs, rhs };
@@ -9465,7 +9465,7 @@ pub const FuncGen = struct {
                 // Doesn't have pdep
                 if (!std.Target.x86.featureSetHas(target.cpu.features, .bmi2)) break :blk;
 
-                const bits = inst_ty.intInfo(target).bits;
+                const bits = inst_ty.intInfo(self.dg.module).bits;
                 const supports_64 = tag == .x86_64;
                 // Integer size doesn't match the available instruction(s)
                 if (!(bits <= 32 or (bits <= 64 and supports_64))) break :blk;
@@ -9488,7 +9488,7 @@ pub const FuncGen = struct {
         assert(target.cpu.arch.isX86());
         assert(std.Target.x86.featureSetHas(target.cpu.features, .bmi2));
 
-        const bits = ty.intInfo(target).bits;
+        const bits = ty.intInfo(self.dg.module).bits;
         const intrinsic_name = switch (bits) {
             1...32 => "llvm.x86.bmi.pdep.32",
             33...64 => "llvm.x86.bmi.pdep.64",
@@ -9603,7 +9603,7 @@ pub const FuncGen = struct {
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
         const rhs = try self.resolveInst(bin_op.rhs);
-        const inst_ty = self.air.typeOfIndex(inst);
+        const inst_ty = self.typeOfIndex(inst);
 
         const target = self.dg.module.getTarget();
         const params = [2]*llvm.Value{ lhs, rhs };
@@ -9612,7 +9612,7 @@ pub const FuncGen = struct {
                 // Doesn't have pext
                 if (!std.Target.x86.featureSetHas(target.cpu.features, .bmi2)) break :blk;
 
-                const bits = inst_ty.intInfo(target).bits;
+                const bits = inst_ty.intInfo(self.dg.module).bits;
                 const supports_64 = tag == .x86_64;
                 // Integer size doesn't match the available instruction(s)
                 if (!(bits <= 32 or (bits <= 64 and supports_64))) break :blk;
@@ -9635,7 +9635,7 @@ pub const FuncGen = struct {
         assert(target.cpu.arch.isX86());
         assert(std.Target.x86.featureSetHas(target.cpu.features, .bmi2));
 
-        const bits = ty.intInfo(target).bits;
+        const bits = ty.intInfo(self.dg.module).bits;
         const intrinsic_name = switch (bits) {
             1...32 => "llvm.x86.bmi.pext.32",
             33...64 => "llvm.x86.bmi.pext.64",