-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Use Span to drop byte[1] allocations #15680
Conversation
@dotnet-bot test Windows_NT x64 Checked corefx_baseline |
@@ -30,18 +30,12 @@ internal PinnedBufferMemoryStream(byte[] array) | |||
Debug.Assert(array != null, "Array can't be null"); | |||
|
|||
int len = array.Length; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: Move this line right in front of the Initialize
call?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved
src/mscorlib/src/System/IO/Stream.cs
Outdated
byte[] oneByteArray = new byte[1]; | ||
oneByteArray[0] = value; | ||
Write(oneByteArray, 0, 1); | ||
Span<byte> bytes = stackalloc byte[1]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you even need to stackalloc here?
public virtual unsafe WriteByte(byte value)
{
Write(new Span<byte>(&value, 1));
}
it's unsafe though....
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't think it would make much difference with the improvements to small fixed stackalloc #14623; but its outrageously better
ReadByte with
Span<byte> bytes = stackalloc byte[1];
Produces
; Lcl frame size = 112
G_M10894_IG01:
push rbp
push rdi
push rsi
sub rsp, 112
lea rbp, [rsp+20H]
mov rsi, rcx
lea rdi, [rbp+10H]
mov ecx, 14
xor rax, rax
rep stosd
mov rcx, rsi
mov qword ptr [rbp+48H], rsp
mov rax, qword ptr [(reloc)]
mov qword ptr [rbp+08H], rax
G_M10894_IG02:
lea rdx, bword ptr [rbp+28H]
G_M10894_IG03:
xorpd xmm0, xmm0
movdqu qword ptr [rdx], xmm0
G_M10894_IG04:
add rsp, 32
push 0
push 0
sub rsp, 32
lea rdx, [rsp+20H]
mov qword ptr [rbp+48H], rsp
xor rax, rax
mov qword ptr [rbp+20H], rax
mov bword ptr [rbp+20H], rdx
mov rdx, bword ptr [rbp+20H]
lea rax, bword ptr [rbp+28H]
mov bword ptr [rax], rdx
mov dword ptr [rbp+30H], 1
G_M10894_IG05:
movdqu xmm0, qword ptr [rbp+28H]
movdqu qword ptr [rbp+38H], xmm0
G_M10894_IG06:
movdqu xmm0, qword ptr [rbp+38H]
movdqu qword ptr [rbp+10H], xmm0
G_M10894_IG07:
lea rdx, bword ptr [rbp+10H]
mov rax, qword ptr [rcx]
mov rax, qword ptr [rax+96]
call qword ptr [rax+24]Stream:Read(struct):int:this
test eax, eax
jne SHORT G_M10894_IG10
mov eax, -1
lea rcx, [(reloc)]
mov rcx, qword ptr [rcx]
cmp qword ptr [rbp+08H], rcx
je SHORT G_M10894_IG08
call CORINFO_HELP_FAIL_FAST
G_M10894_IG08:
nop
G_M10894_IG09:
lea rsp, [rbp+50H]
pop rsi
pop rdi
pop rbp
ret
G_M10894_IG10:
cmp dword ptr [rbp+40H], 0
jbe SHORT G_M10894_IG13
mov rax, bword ptr [rbp+38H]
movzx rax, byte ptr [rax]
lea rcx, [(reloc)]
mov rcx, qword ptr [rcx]
cmp qword ptr [rbp+08H], rcx
je SHORT G_M10894_IG11
call CORINFO_HELP_FAIL_FAST
G_M10894_IG11:
nop
G_M10894_IG12:
lea rsp, [rbp+50H]
pop rsi
pop rdi
pop rbp
ret
G_M10894_IG13:
call CORINFO_HELP_RNGCHKFAIL
int3
; Total bytes of code 229, prolog size 46 for method Stream:ReadByte():int:this
Changing the one line to these two lines
byte b = 0;
Span<byte> bytes = new Span<byte>(ref b, 1);
Produces
; Lcl frame size = 64
G_M10891_IG01:
push rsi
sub rsp, 64
xor rax, rax
mov qword ptr [rsp+28H], rax
G_M10891_IG02:
xor edx, edx
mov dword ptr [rsp+3CH], edx
lea rdx, bword ptr [rsp+3CH]
mov rsi, rdx
mov rdx, rsi
mov eax, 1
mov rsi, rdx
lea rdx, bword ptr [rsp+28H]
mov bword ptr [rdx], rsi
mov dword ptr [rdx+8], eax
lea rdx, bword ptr [rsp+28H]
mov rax, qword ptr [rcx]
mov rax, qword ptr [rax+96]
call qword ptr [rax+24]Stream:Read(struct):int:this
test eax, eax
jne SHORT G_M10891_IG04
mov eax, -1
G_M10891_IG03:
add rsp, 64
pop rsi
ret
G_M10891_IG04:
movzx rax, byte ptr [rsi]
G_M10891_IG05:
add rsp, 64
pop rsi
ret
; Total bytes of code 87, prolog size 12 for method Stream:ReadByte():int:this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, that does look strange; I would not expect them to be all that different.
I'll take a look at what is going on under the hood.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like csc is storing the length in a local and so blocks the jit's ability to convert the stackalloc as it doesn't see that it is a fixed size.
IL_0000: ldc.i4.1
IL_0001: stloc.1
IL_0002: ldloc.1
IL_0003: conv.u
IL_0004: localloc
The localloc transformation happens early before we've set up any kind of dataflow capabilities in the jit. So despite this looking like something that should trivially fall out we don't have the mechansims to catch this today.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I hack the jit to forcibly enable the fixed stackalloc transformation then Unsafe.As
fails to inline in a manner similar to #11211 -- here the argument is the address of a local and so arg fetching bypasses the fix from #11218.
With the unconverted stackalloc Unsafe.As
inlines ok.
@@ -46,7 +46,8 @@ unsafe static internal byte ConvertToNative(char managedChar, bool fBestFit, boo | |||
|
|||
static internal char ConvertToManaged(byte nativeChar) | |||
{ | |||
byte[] bytes = new byte[1] { nativeChar }; | |||
Span<byte> bytes = stackalloc byte[1]; | |||
bytes[0] = nativeChar; | |||
string str = Encoding.Default.GetString(bytes); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same question here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is in corelib. Presumably it can just use the Span(ref T, int) ctor. Or it could use DangerousCreate that just calls it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was trying to figure out why DangerousCreate wasn't showing up in intellisense:
[EditorBrowsable(EditorBrowsableState.Never)]
public static Span<T> DangerousCreate(object obj, ref T objectData, int length);
This totally trolled me
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@KrzysztofCwalina, @ahsonkhan, why is this marked as EditorBrowsableState.Never? We seem to be too quick to do that. It's already named Dangerous
... why do we need to hide it? Even if we wanted to hide it from the IntelliSense list, marking it as Never means that once you type out "DangerousCreate" and the open paren, you still don't get IntelliSense help with the argument list, making it difficult to use even after you've discovered it.
src/mscorlib/src/System/IO/Stream.cs
Outdated
byte[] oneByteArray = new byte[1]; | ||
int r = Read(oneByteArray, 0, 1); | ||
Span<byte> bytes = stackalloc byte[1]; | ||
int r = Read(bytes); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure how useful this is. If someone is overriding the new Read(Span) method, then hopefully they're also overriding ReadByte (and more efficiently). If they're not overriding Read(Span), then the base Read (Span) will fall back to using ArrayPool and a copy, and without measuring it's not clear whether that's actually better than a one-byte allocation. That's why I didn't change these when adding the span methods.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You are right, looking at the asm its pretty terrible. It would be better to rent from the array pool directly rather than stackalloc->span->arraypool. However the granularity is way too high; so it would be better if the caller rented from the array pool and used array overload...
Will revert the Stream change as its dubious
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Reverted the Stream ReadByte/WriteByte
|
||
_array = array; | ||
_pinningHandle = GCHandle.Alloc(array, GCHandleType.Pinned); | ||
// Now the byte[] is pinned for the lifetime of this instance. | ||
// But I also need to get a pointer to that block of memory... | ||
fixed (byte* ptr = &_array[0]) | ||
fixed (byte* ptr = &MemoryMarshal.GetReference((Span<byte>)array)) | ||
Initialize(ptr, len, len, FileAccess.Read); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is span needed here? fixed works fine with zero-length arrays).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pinning zero length array gives back null
. Initialize
method does not accept null.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see. Though that could be addressed as well just by passing ptr != null ? ptr : (byte*)1
instead of ptr
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, there are several ways to address this. Another option in this case is to fix or delete the null check in the Initialize
method.
(byte*)1
is a very unusual pointer value. If I saw it somewhere, I would be looking for memory corruption. It would be nice for the unmanaged pointers to either be null or point to valid memory.- The code size varies for the different options. I suspect that
ptr != null ? ptr : (byte*)1
pattern is larger overall native code compared toMemoryMarshal.GetReference
- we should measure.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As now (MemoryMarshal.GetReference
) its
call GCHandle:InternalAlloc(ref,int):long
or rax, 1
mov qword ptr [rsi+96], rax
lea r8, bword ptr [rdi+16]
mov r9d, dword ptr [rdi+8]
mov bword ptr [rsp+28H], r8
mov rdx, r8
mov dword ptr [rsp+20H], 1
movsxd r8, ebx
movsxd r9, ebx
mov rcx, rsi
call UnmanagedMemoryStream:Initialize(long,long,long,int):this
Changing to
fixed (byte* ptr = array)
Initialize(ptr != null ? ptr : (byte*)1, len, len, FileAccess.Read);
It becomes
call GCHandle:InternalAlloc(ref,int):long
or rax, 1
mov qword ptr [rsi+96], rax
mov gword ptr [rsp+28H], rdi
mov r8, gword ptr [rsp+28H]
cmp dword ptr [r8+8], 0
jne SHORT G_M3256_IG04
G_M3256_IG03:
xor r8, r8
jmp SHORT G_M3256_IG05
G_M3256_IG04:
mov r8, gword ptr [rsp+28H]
cmp dword ptr [r8+8], 0
jbe SHORT G_M3256_IG09
mov r8, gword ptr [rsp+28H]
add r8, 16
G_M3256_IG05:
test r8, r8
jne SHORT G_M3256_IG06
mov edx, 1
jmp SHORT G_M3256_IG07
G_M3256_IG06:
mov rdx, r8
G_M3256_IG07:
mov dword ptr [rsp+20H], 1
movsxd r9, ebx
mov r8, r9
mov rcx, rsi
call UnmanagedMemoryStream:Initialize(long,long,long,int):this
Which seems more involved?
src/mscorlib/src/System/IO/Stream.cs
Outdated
// subclass that maintains an internal buffer. Then, it can help perf | ||
// significantly for people who are reading one byte at a time. | ||
public virtual int ReadByte() | ||
{ | ||
byte[] oneByteArray = new byte[1]; | ||
int r = Read(oneByteArray, 0, 1); | ||
Span<byte> bytes = stackalloc byte[1]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is, intentionally, a single byte Span but is named as bytes
. oneByteSpan
instead?
@@ -46,7 +46,8 @@ unsafe static internal byte ConvertToNative(char managedChar, bool fBestFit, boo | |||
|
|||
static internal char ConvertToManaged(byte nativeChar) | |||
{ | |||
byte[] bytes = new byte[1] { nativeChar }; | |||
Span<byte> bytes = new Span<byte>(ref nativeChar, 1); | |||
bytes[0] = nativeChar; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bytes[0] = nativeChar;
is redundant
Signed-off-by: dotnet-bot <[email protected]>
Signed-off-by: dotnet-bot <[email protected]>
Signed-off-by: dotnet-bot-corefx-mirror <[email protected]>
Signed-off-by: dotnet-bot-corefx-mirror <[email protected]>
Signed-off-by: dotnet-bot-corefx-mirror <[email protected]>
Signed-off-by: dotnet-bot-corefx-mirror <[email protected]>
Span<byte>