diff --git a/cli/bench/encode_into.js b/cli/bench/encode_into.js new file mode 100644 index 00000000000000..aaee234348dea5 --- /dev/null +++ b/cli/bench/encode_into.js @@ -0,0 +1,23 @@ +// Copyright 2018-2022 the Deno authors. All rights reserved. MIT license. +const queueMicrotask = globalThis.queueMicrotask || process.nextTick; +let [total, count] = typeof Deno !== "undefined" + ? Deno.args + : [process.argv[2], process.argv[3]]; + +total = total ? parseInt(total, 0) : 50; +count = count ? parseInt(count, 10) : 1000000; + +function bench(fun) { + const start = Date.now(); + for (let i = 0; i < count; i++) fun(); + const elapsed = Date.now() - start; + const rate = Math.floor(count / (elapsed / 1000)); + console.log(`time ${elapsed} ms rate ${rate}`); + if (--total) queueMicrotask(() => bench(fun)); +} + +const encoder = new TextEncoder(); +const data = "hello world"; +const out = new Uint8Array(100); + +bench(() => encoder.encodeInto(data, out)); diff --git a/ext/web/08_text_encoding.js b/ext/web/08_text_encoding.js index 9e0c1f311c4b5d..db724033f09641 100644 --- a/ext/web/08_text_encoding.js +++ b/ext/web/08_text_encoding.js @@ -25,6 +25,7 @@ TypedArrayPrototypeSubarray, TypedArrayPrototypeSlice, Uint8Array, + Uint32Array, } = window.__bootstrap.primordials; class TextDecoder { @@ -199,10 +200,16 @@ context: "Argument 2", allowShared: true, }); - return ops.op_encoding_encode_into(source, destination); + ops.op_encoding_encode_into(source, destination, encodeIntoBuf); + return { + read: encodeIntoBuf[0], + written: encodeIntoBuf[1], + }; } } + const encodeIntoBuf = new Uint32Array(2); + webidl.configurePrototype(TextEncoder); const TextEncoderPrototype = TextEncoder.prototype; diff --git a/ext/web/lib.rs b/ext/web/lib.rs index 9c1e85952a9748..0eec63ab9f0dee 100644 --- a/ext/web/lib.rs +++ b/ext/web/lib.rs @@ -10,7 +10,9 @@ use deno_core::error::type_error; use deno_core::error::AnyError; use deno_core::include_js_files; use deno_core::op; +use deno_core::serde_v8; use deno_core::url::Url; +use deno_core::v8; use deno_core::ByteString; use deno_core::CancelHandle; use deno_core::Extension; @@ -19,11 +21,11 @@ use deno_core::Resource; use deno_core::ResourceId; use deno_core::U16String; use deno_core::ZeroCopyBuf; + use encoding_rs::CoderResult; use encoding_rs::Decoder; use encoding_rs::DecoderResult; use encoding_rs::Encoding; -use serde::Serialize; use std::borrow::Cow; use std::cell::RefCell; use std::fmt; @@ -314,46 +316,25 @@ impl Resource for TextDecoderResource { } } -#[derive(Serialize)] -#[serde(rename_all = "camelCase")] -struct EncodeIntoResult { - read: usize, - written: usize, -} - -#[op] +#[op(v8)] fn op_encoding_encode_into( - input: String, + scope: &mut v8::HandleScope, + input: serde_v8::Value, buffer: &mut [u8], -) -> EncodeIntoResult { - // Since `input` is already UTF-8, we can simply find the last UTF-8 code - // point boundary from input that fits in `buffer`, and copy the bytes up to - // that point. - let boundary = if buffer.len() >= input.len() { - input.len() - } else { - let mut boundary = buffer.len(); - - // The maximum length of a UTF-8 code point is 4 bytes. - for _ in 0..4 { - if input.is_char_boundary(boundary) { - break; - } - debug_assert!(boundary > 0); - boundary -= 1; - } - - debug_assert!(input.is_char_boundary(boundary)); - boundary - }; - - buffer[..boundary].copy_from_slice(input[..boundary].as_bytes()); - - EncodeIntoResult { - // The `read` output parameter is measured in UTF-16 code units. - read: input[..boundary].encode_utf16().count(), - written: boundary, - } + out_buf: &mut [u32], +) -> Result<(), AnyError> { + let s = v8::Local::::try_from(input.v8_value)?; + + let mut nchars = 0; + out_buf[1] = s.write_utf8( + scope, + buffer, + Some(&mut nchars), + v8::WriteOptions::NO_NULL_TERMINATION + | v8::WriteOptions::REPLACE_INVALID_UTF8, + ) as u32; + out_buf[0] = nchars as u32; + Ok(()) } /// Creates a [`CancelHandle`] resource that can be used to cancel invocations of certain ops. diff --git a/ops/lib.rs b/ops/lib.rs index 1163d426af88b4..29e3f662e58951 100644 --- a/ops/lib.rs +++ b/ops/lib.rs @@ -721,14 +721,44 @@ fn codegen_arg( }; } // Fast path for &/&mut [u8] and &/&mut [u32] - if let Some(ty) = is_ref_slice(&**ty) { - let (ptr_ty, mutability) = match ty { - SliceType::U8 => (quote!([u8]), quote!(&)), - SliceType::U8Mut => (quote!([u8]), quote!(&mut)), + match is_ref_slice(&**ty) { + None => {} + Some(SliceType::U32Mut) => { + let blck = codegen_u32_mut_slice(core, idx); + return quote! { + let #ident = #blck; + }; + } + Some(_) => { + let blck = codegen_u8_slice(core, idx); + return quote! { + let #ident = #blck; + }; + } + } + // Otherwise deserialize it via serde_v8 + quote! { + let #ident = args.get(#idx as i32); + let #ident = match #core::serde_v8::from_v8(scope, #ident) { + Ok(v) => v, + Err(err) => { + let msg = format!("Error parsing args at position {}: {}", #idx, #core::anyhow::Error::from(err)); + return #core::_ops::throw_type_error(scope, msg); + } }; - return quote! { - let #ident = { - let value = args.get(#idx as i32); + } +} + +fn codegen_u8_slice(core: &TokenStream2, idx: usize) -> TokenStream2 { + quote! {{ + let value = args.get(#idx as i32); + match #core::v8::Local::<#core::v8::ArrayBuffer>::try_from(value) { + Ok(b) => { + let store = b.data() as *mut u8; + // SAFETY: rust guarantees that lifetime of slice is no longer than the call. + unsafe { ::std::slice::from_raw_parts_mut(store, b.byte_length()) } + }, + Err(_) => { if let Ok(view) = #core::v8::Local::<#core::v8::ArrayBufferView>::try_from(value) { let (offset, len) = (view.byte_offset(), view.byte_length()); let buffer = match view.buffer(scope) { @@ -737,37 +767,33 @@ fn codegen_arg( return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx)); } }; - let store = buffer.get_backing_store(); - if store.is_shared() { - return #core::_ops::throw_type_error(scope, format!("Expected non-shared ArrayBufferView at position {}", #idx)); - } - unsafe { #mutability *(&store[offset..offset + len] as *const _ as *mut #ptr_ty) } + let store = buffer.data() as *mut u8; + // SAFETY: rust guarantees that lifetime of slice is no longer than the call. + unsafe { ::std::slice::from_raw_parts_mut(store.add(offset), len) } } else { - let b: #core::v8::Local<#core::v8::ArrayBuffer> = match value.try_into() { - Ok(v) => v, - Err(_) => { - return #core::_ops::throw_type_error(scope, format!("Expected ArrayBuffer at position {}", #idx)); - } - }; - let store = b.get_backing_store(); - if store.is_shared() { - return #core::_ops::throw_type_error(scope, format!("Expected non-shared ArrayBufferView at position {}", #idx)); - } - unsafe { #mutability *(&store[0..b.byte_length()] as *const _ as *mut #ptr_ty) } + return #core::_ops::throw_type_error(scope, format!("Expected ArrayBufferView at position {}", #idx)); } - }; - }; + } + }} } - // Otherwise deserialize it via serde_v8 +} + +fn codegen_u32_mut_slice(core: &TokenStream2, idx: usize) -> TokenStream2 { quote! { - let #ident = args.get(#idx as i32); - let #ident = match #core::serde_v8::from_v8(scope, #ident) { - Ok(v) => v, - Err(err) => { - let msg = format!("Error parsing args at position {}: {}", #idx, #core::anyhow::Error::from(err)); - return #core::_ops::throw_type_error(scope, msg); - } - }; + if let Ok(view) = #core::v8::Local::<#core::v8::Uint32Array>::try_from(args.get(#idx as i32)) { + let (offset, len) = (view.byte_offset(), view.byte_length()); + let buffer = match view.buffer(scope) { + Some(v) => v, + None => { + return #core::_ops::throw_type_error(scope, format!("Expected Uint32Array at position {}", #idx)); + } + }; + let store = buffer.data() as *mut u8; + // SAFETY: buffer from Uint32Array. Rust guarantees that lifetime of slice is no longer than the call. + unsafe { ::std::slice::from_raw_parts_mut(store.add(offset) as *mut u32, len / 4) } + } else { + return #core::_ops::throw_type_error(scope, format!("Expected Uint32Array at position {}", #idx)); + } } } @@ -849,6 +875,7 @@ fn is_option_string(ty: impl ToTokens) -> bool { enum SliceType { U8, U8Mut, + U32Mut, } fn is_ref_slice(ty: impl ToTokens) -> Option { @@ -858,6 +885,9 @@ fn is_ref_slice(ty: impl ToTokens) -> Option { if is_u8_slice_mut(&ty) { return Some(SliceType::U8Mut); } + if is_u32_slice_mut(&ty) { + return Some(SliceType::U32Mut); + } None } @@ -869,6 +899,10 @@ fn is_u8_slice_mut(ty: impl ToTokens) -> bool { tokens(ty) == "& mut [u8]" } +fn is_u32_slice_mut(ty: impl ToTokens) -> bool { + tokens(ty) == "& mut [u32]" +} + fn is_optional_fast_callback_option(ty: impl ToTokens) -> bool { tokens(&ty).contains("Option < & mut FastApiCallbackOptions") } diff --git a/tools/wpt/expectation.json b/tools/wpt/expectation.json index 7da0edb91b6b11..0f31e94f72813f 100644 --- a/tools/wpt/expectation.json +++ b/tools/wpt/expectation.json @@ -991,94 +991,8 @@ "api-replacement-encodings.any.worker.html": true, "api-surrogates-utf8.any.html": true, "api-surrogates-utf8.any.worker.html": true, - "encodeInto.any.html": [ - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 4, filler random" - ], - "encodeInto.any.worker.html": [ - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with Hi and destination length 0, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with A and destination length 10, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with 𝌆 and destination length 4, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with 𝌆A and destination length 3, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with \ud834A\udf06A¥Hi and destination length 10, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with A\udf06 and destination length 4, offset 4, filler random", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 0, filler 0", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 4, filler 0", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 0, filler 128", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 4, filler 128", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 0, filler random", - "encodeInto() into SharedArrayBuffer with ¥¥ and destination length 4, offset 4, filler random" - ], + "encodeInto.any.html": true, + "encodeInto.any.worker.html": true, "idlharness.any.html": true, "idlharness.any.worker.html": true, "iso-2022-jp-decoder.any.html": true, @@ -1117,7 +1031,8 @@ "encode-utf8.any.html": true, "encode-utf8.any.worker.html": true, "readable-writable-properties.any.html": true, - "readable-writable-properties.any.worker.html": true + "readable-writable-properties.any.worker.html": true, + "realms.window.html": false }, "textdecoder-arguments.any.html": true, "textdecoder-arguments.any.worker.html": true, @@ -1162,7 +1077,9 @@ "single-byte-decoder.window.html?TextDecoder": true, "textdecoder-eof.any.html": true, "textdecoder-eof.any.worker.html": true, - "idlharness-shadowrealm.window.html": false + "idlharness-shadowrealm.window.html": false, + "single-byte-decoder.window.html?XMLHttpRequest": false, + "single-byte-decoder.window.html?document": false }, "hr-time": { "monotonic-clock.any.html": true,