Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement @depositBits and @extractBits #18680

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
34ffb08
std.math.big.int: Implement depositBits and extractBits
ominitay Apr 9, 2023
32ff101
std.math.big.int: Conversion from 2's complement
ominitay Apr 11, 2023
a33d4f6
Write docs for `@depositBits` and `@extractBits`
ominitay Apr 12, 2023
9bd3bf7
Implement `@depositBits` and `@extractBits`
ominitay Apr 12, 2023
2de5fcc
LLVM: Implement emulation for `@depositBits`
ominitay Apr 13, 2023
a2850aa
LLVM: Implement emulation for `@extractBits`
ominitay Apr 13, 2023
9760841
std.math.big.int: Fix index out-of-bounds
ominitay Apr 14, 2023
566a888
Add behaviour tests for `@depositBits` and `@extractBits`
ominitay Apr 14, 2023
db280ce
zig fmt
ominitay Apr 15, 2023
9020b2f
Replace `u6` with `Log2Limb`
ominitay Apr 15, 2023
eecdf99
big.int.depositBits/extractBits: Remove limbs_buffer
ominitay Apr 17, 2023
13d4205
Disallow signed integer types for deposit/extract
ominitay Apr 18, 2023
313d258
Actually use deposit/extract behaviour test
ominitay Apr 19, 2023
5a42ecb
Enable langref tests for deposit and extract
ominitay Apr 19, 2023
fc8eadb
Allow use of `comptime_int` with deposit/extract
ominitay Apr 19, 2023
9c14b26
Improve compile errors for negative values
ominitay Apr 19, 2023
4eff831
update comments
ominitay Apr 21, 2023
69e893d
Bring branch up-to-date
ominitay Nov 9, 2023
71f8db4
x86: Implement `@depositBits` and `@extractBits`
ominitay Nov 12, 2023
5f66df1
update deposit/extract to master
ominitay Jan 25, 2024
e0b4630
zig fmt
ominitay Jan 25, 2024
4bcaab9
Don't compile tests for deposit/extract when unsupported
ominitay Jan 25, 2024
432e1cb
Bring branch up-to-date with llvm backend changes
ominitay Mar 15, 2024
725019e
Emulate pdep and pext in compiler-rt
ominitay Mar 26, 2024
e1915f9
Include depositBits/extractBits emulation in x86 backend
ominitay Mar 27, 2024
e80a4b2
Update behaviour tests for deposit/extractBits
ominitay Mar 28, 2024
b87e549
Bring fork up-to-date with master
ominitay Mar 29, 2024
726b436
Skip failing behaviour tests
ominitay Apr 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions doc/langref.html.in
Original file line number Diff line number Diff line change
Expand Up @@ -8558,6 +8558,33 @@ test "main" {
{#see_also|@cVaArg|@cVaCopy|@cVaEnd#}
{#header_close#}

{#header_open|@depositBits#}
<pre>{#syntax#}@depositBits(source: T, mask: T) T{#endsyntax#}</pre>
<p>
{#syntax#}T{#endsyntax#} must be an unsigned integer type, or a `comptime_int` (for which both parameters must be positive). `T` is determined by peer-type resolution.
</p>
<p>
Uses a mask to transfer contiguous lower bits in the {#syntax#}source{#endsyntax#} operand to the destination, transferring them to the corresponding bits in the destination that are set in the mask. All other bits in the destination are zeroed.
</p>
<p>
Currently, only x86 processors with BMI2 enabled support this in hardware. On processors without support for the instruction, it will be emulated. AMD processors before Zen 3 implement the corresponding instruction (PDEP) in microcode. It may be faster to use an alternative method in both of these cases.
</p>
<p>
Example:
</p>

{#code_begin|test|test_depositbits_builtin#}
const std = @import("std");

test "deposit bits" {
comptime {
try std.testing.expectEqual(@depositBits(0x00001234, 0xf0f0f0f0), 0x10203040);
}
}
{#code_end#}
{#see_also|@extractBits#}
{#header_close#}

{#header_open|@divExact#}
<pre>{#syntax#}@divExact(numerator: T, denominator: T) T{#endsyntax#}</pre>
<p>
Expand Down Expand Up @@ -8726,6 +8753,33 @@ export fn @"A function name that is a complete sentence."() void {}
{#see_also|@export#}
{#header_close#}

{#header_open|@extractBits#}
<pre>{#syntax#}@extractBits(source: T, mask: T) T{#endsyntax#}</pre>
<p>
{#syntax#}T{#endsyntax#} must be an unsigned integer type, or a `comptime_int` (for which both parameters must be positive). `T` is determined by peer-type resolution.
</p>
<p>
Uses a mask to transfer bits in the {#syntax#}source{#endsyntax#} operand to the destination, writing them as contiguous lower bits in the destination. The upper bits of the destination are zeroed.
</p>
<p>
Currently, only x86 processors with BMI2 enabled support this in hardware. On processors without support for the instruction, it will be emulated. AMD processors before Zen 3 implement the corresponding instruction (PEXT) in microcode. It may be faster to use an alternative method in both of these cases.
</p>
<p>
Example:
</p>

{#code_begin|test|test_depositbits_builtin#}
const std = @import("std");

test "extract bits" {
comptime {
try std.testing.expectEqual(@extractBits(0x12345678, 0xf0f0f0f0), 0x00001357);
}
}
{#code_end#}
{#see_also|@depositBits#}
{#header_close#}

{#header_open|@fence#}
<pre>{#syntax#}@fence(order: AtomicOrder) void{#endsyntax#}</pre>
<p>
Expand Down
1 change: 1 addition & 0 deletions lib/compiler_rt.zig
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ comptime {
_ = @import("compiler_rt/popcount.zig");
_ = @import("compiler_rt/bswap.zig");
_ = @import("compiler_rt/cmp.zig");
_ = @import("compiler_rt/pdeppext.zig");

_ = @import("compiler_rt/shift.zig");
_ = @import("compiler_rt/negXi2.zig");
Expand Down
177 changes: 177 additions & 0 deletions lib/compiler_rt/pdeppext.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
const std = @import("std");
const builtin = @import("builtin");
const common = @import("common.zig");

const Limb = u32;
const Log2Limb = u5;

comptime {
@export(__pdep_bigint, .{ .name = "__pdep_bigint", .linkage = common.linkage, .visibility = common.visibility });
@export(__pdep_u32, .{ .name = "__pdep_u32", .linkage = common.linkage, .visibility = common.visibility });
@export(__pdep_u64, .{ .name = "__pdep_u64", .linkage = common.linkage, .visibility = common.visibility });
@export(__pdep_u128, .{ .name = "__pdep_u128", .linkage = common.linkage, .visibility = common.visibility });

@export(__pext_bigint, .{ .name = "__pext_bigint", .linkage = common.linkage, .visibility = common.visibility });
@export(__pext_u32, .{ .name = "__pext_u32", .linkage = common.linkage, .visibility = common.visibility });
@export(__pext_u64, .{ .name = "__pext_u64", .linkage = common.linkage, .visibility = common.visibility });
@export(__pext_u128, .{ .name = "__pext_u128", .linkage = common.linkage, .visibility = common.visibility });
}

const endian = builtin.cpu.arch.endian();

inline fn limb(x: []const Limb, i: usize) Limb {
return if (endian == .little) x[i] else x[x.len - 1 - i];
}

inline fn limb_ptr(x: []Limb, i: usize) *Limb {
return if (endian == .little) &x[i] else &x[x.len - 1 - i];
}

inline fn limb_set(x: []Limb, i: usize, v: Limb) void {
if (endian == .little) {
x[i] = v;
} else {
x[x.len - 1 - i] = v;
}
}

// Code for bigint pdep and pext largely taken from std.math.big.int.depositBits and extractBits

inline fn pdep_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
@memset(result, 0);

var mask_limb: Limb = limb(mask, 0);
var mask_limb_index: usize = 0;
var i: usize = 0;

outer: while (true) : (i += 1) {
// Find the lowest set bit in mask
const mask_limb_bit: Log2Limb = limb_bit: while (true) {
const mask_limb_tz = @ctz(mask_limb);
if (mask_limb_tz != @bitSizeOf(Limb)) {
const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
mask_limb ^= @as(Limb, 1) << cast_limb_bit;
break :limb_bit cast_limb_bit;
}

mask_limb_index += 1;
if (mask_limb_index >= mask.len) break :outer;

mask_limb = limb(mask, mask_limb_index);
};

const i_limb_index = i / 32;
const i_limb_bit: Log2Limb = @truncate(i);

if (i_limb_index >= source.len) break;

const source_bit_set = limb(source, i_limb_index) & (@as(Limb, 1) << i_limb_bit) != 0;

limb_ptr(result, mask_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
}
}

pub fn __pdep_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.C) void {
const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];

pdep_bigint(result, source, mask);
}

inline fn pext_bigint(result: []Limb, source: []const Limb, mask: []const Limb) void {
@memset(result, 0);

var mask_limb: Limb = limb(mask, 0);
var mask_limb_index: usize = 0;
var i: usize = 0;

outer: while (true) : (i += 1) {
const mask_limb_bit: Log2Limb = limb_bit: while (true) {
const mask_limb_tz = @ctz(mask_limb);
if (mask_limb_tz != @bitSizeOf(Limb)) {
const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
mask_limb ^= @as(Limb, 1) << cast_limb_bit;
break :limb_bit cast_limb_bit;
}

mask_limb_index += 1;
if (mask_limb_index >= mask.len) break :outer;

mask_limb = limb(mask, mask_limb_index);
};

const i_limb_index = i / 32;
const i_limb_bit: Log2Limb = @truncate(i);

if (i_limb_index >= source.len) break;

const source_bit_set = limb(source, mask_limb_index) & (@as(Limb, 1) << mask_limb_bit) != 0;

limb_ptr(result, i_limb_index).* |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;
}
}

pub fn __pext_bigint(r: [*]Limb, s: [*]const Limb, m: [*]const Limb, bits: usize) callconv(.C) void {
const result = r[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
const source = s[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];
const mask = m[0 .. std.math.divCeil(usize, bits, 32) catch unreachable];

pext_bigint(result, source, mask);
}

inline fn pdep_uX(comptime T: type, source: T, mask_: T) T {
var bb: T = 1;
var result: T = 0;
var mask = mask_;

while (mask != 0) {
const bit = mask & ~(mask - 1);
mask &= ~bit;
const source_bit = source & bb;
if (source_bit != 0) result |= bit;
bb += bb;
}

return result;
}

pub fn __pdep_u32(source: u32, mask: u32) callconv(.C) u32 {
return pdep_uX(u32, source, mask);
}

pub fn __pdep_u64(source: u64, mask: u64) callconv(.C) u64 {
return pdep_uX(u64, source, mask);
}

pub fn __pdep_u128(source: u128, mask: u128) callconv(.C) u128 {
return pdep_uX(u128, source, mask);
}

inline fn pext_uX(comptime T: type, source: T, mask_: T) T {
var bb: T = 1;
var result: T = 0;
var mask = mask_;

while (mask != 0) {
const bit = mask & ~(mask - 1);
mask &= ~bit;
const source_bit = source & bit;
if (source_bit != 0) result |= bb;
bb += bb;
}

return result;
}

pub fn __pext_u32(source: u32, mask: u32) callconv(.C) u32 {
return pext_uX(u32, source, mask);
}

pub fn __pext_u64(source: u64, mask: u64) callconv(.C) u64 {
return pext_uX(u64, source, mask);
}

pub fn __pext_u128(source: u128, mask: u128) callconv(.C) u128 {
return pext_uX(u128, source, mask);
}
92 changes: 92 additions & 0 deletions lib/std/math/big/int.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1735,6 +1735,98 @@ pub const Mutable = struct {
y.shiftRight(y.toConst(), norm_shift);
}

// TODO this function is quite inefficient and could be optimised
/// r = @depositBits(source, mask)
///
/// Asserts that `source` and `mask` are positive
pub fn depositBits(r: *Mutable, source: Const, mask: Const) void {
assert(source.positive);
assert(mask.positive);

r.positive = true;
@memset(r.limbs, 0);

var mask_limb: Limb = mask.limbs[0];
var mask_limb_index: Limb = 0;
var i: usize = 0;
outer: while (true) : (i += 1) {
// Find next bit in mask
const mask_limb_bit: Log2Limb = limb_bit: while (true) {
const mask_limb_tz = @ctz(mask_limb);
if (mask_limb_tz != @sizeOf(Limb) * 8) {
const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
mask_limb ^= @as(Limb, 1) << cast_limb_bit;
break :limb_bit cast_limb_bit;
}

mask_limb_index += 1;
// No more limbs, we've finished iterating the mask
if (mask_limb_index >= mask.limbs.len) {
break :outer;
}

mask_limb = mask.limbs[mask_limb_index];
};

const i_limb_index = i / limb_bits;
const i_limb_bit: Log2Limb = @truncate(i);

if (i_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)

const source_bit_set = source.limbs[i_limb_index] & (@as(Limb, 1) << i_limb_bit) != 0;

r.limbs[mask_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << mask_limb_bit;
}

r.normalize(r.limbs.len);
}

// TODO this function is quite inefficient and could be optimised
/// r = @extractBits(source, mask)
///
/// Asserts that `source` and `mask` are positive
pub fn extractBits(r: *Mutable, source: Const, mask: Const) void {
assert(source.positive);
assert(mask.positive);

r.positive = true;
@memset(r.limbs, 0);

var mask_limb: Limb = mask.limbs[0];
var mask_limb_index: Limb = 0;
var i: usize = 0;
outer: while (true) : (i += 1) {
// Find next bit in mask
const mask_limb_bit: Log2Limb = limb_bit: while (true) {
const mask_limb_tz = @ctz(mask_limb);
if (mask_limb_tz != @sizeOf(Limb) * 8) {
const cast_limb_bit: Log2Limb = @intCast(mask_limb_tz);
mask_limb ^= @as(Limb, 1) << cast_limb_bit;
break :limb_bit cast_limb_bit;
}

mask_limb_index += 1;
// No more limbs, we've finished iterating the mask
if (mask_limb_index >= mask.limbs.len) {
break :outer;
}

mask_limb = mask.limbs[mask_limb_index];
};

const i_limb_index = i / limb_bits;
const i_limb_bit: Log2Limb = @truncate(i);

if (mask_limb_index >= source.limbs.len) break; // Stop when we reach the end of `source` (we can treat the rest as zeroes)

const source_bit_set = source.limbs[mask_limb_index] & (@as(Limb, 1) << mask_limb_bit) != 0;

r.limbs[i_limb_index] |= @as(Limb, @intFromBool(source_bit_set)) << i_limb_bit;
}

r.normalize(r.limbs.len);
}

/// If a is positive, this passes through to truncate.
/// If a is negative, then r is set to positive with the bit pattern ~(a - 1).
/// r may alias a.
Expand Down
Loading
Loading