From a7191744ed831cba2330ef42a4e8c2e2bc2af64a Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Thu, 15 Feb 2024 15:19:35 +0100 Subject: [PATCH] Do not make the RubyString native when passed to a :string argument of a FFI call * See https://github.com/oracle/truffleruby/issues/3293#issuecomment-1944444642 and https://github.com/ffi/ffi/issues/1080 * As that could cause extra conversions to managed later on. --- CHANGELOG.md | 1 + lib/truffle/truffle/ffi_backend/function.rb | 4 +- lib/truffle/truffle/fiddle_backend.rb | 3 +- .../java/org/truffleruby/cext/CExtNodes.java | 59 +++++++++++++------ .../format/convert/StringToPointerNode.java | 2 +- .../core/string/ImmutableRubyString.java | 11 ++-- 6 files changed, 53 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c2a8dedd571..fc0c80127670 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -64,6 +64,7 @@ Performance: * Change the `Hash` representation from traditional buckets to a "compact hash table" for improved locality, performance and memory footprint (#3172, @moste00). * Optimize calls with `ruby2_keywords` forwarding by deciding it per call site instead of per callee thanks to [my fix in CRuby 3.2](https://bugs.ruby-lang.org/issues/18625) (@eregon). * Optimize feature loading when require is called with an absolute path to a .rb file (@rwstauner). +* Avoid extra copies for Strings passed as `:string` arguments to a FFI call and used later for Regexp matching (#3293, @eregon). Changes: diff --git a/lib/truffle/truffle/ffi_backend/function.rb b/lib/truffle/truffle/ffi_backend/function.rb index f0226b1ead3f..14ff27593606 100644 --- a/lib/truffle/truffle/ffi_backend/function.rb +++ b/lib/truffle/truffle/ffi_backend/function.rb @@ -155,7 +155,7 @@ def free get_pointer_value(value) elsif FFI::Type::STRING == type Truffle::Type.check_null_safe(value) unless Primitive.nil?(value) - get_pointer_value(value) + Truffle::CExt.string_to_ffi_pointer_copy(value) elsif Primitive.is_a?(type, FFI::FunctionType) and Primitive.is_a?(value, Proc) callback(value, type) else @@ -198,7 +198,7 @@ def free elsif Primitive.nil?(value) Truffle::FFI::Pointer::NULL elsif Primitive.is_a?(value, String) - Truffle::CExt.string_to_ffi_pointer(value) + Truffle::CExt.string_to_ffi_pointer_inplace(value) elsif value.respond_to?(:to_ptr) Truffle::Type.coerce_to value, Truffle::FFI::Pointer, :to_ptr else diff --git a/lib/truffle/truffle/fiddle_backend.rb b/lib/truffle/truffle/fiddle_backend.rb index 230c4b20fd0c..8da281681da7 100644 --- a/lib/truffle/truffle/fiddle_backend.rb +++ b/lib/truffle/truffle/fiddle_backend.rb @@ -93,7 +93,8 @@ def self.convert_ruby_to_native(type, val) def self.get_pointer_value(val) if Primitive.is_a?(val, String) - Truffle::CExt.string_to_ffi_pointer(val) + # NOTE: Fiddle::TYPE_CONST_STRING wouldn't need inplace, but not defined yet by this file + Truffle::CExt.string_to_ffi_pointer_inplace(val) elsif Primitive.is_a?(val, Fiddle::Pointer) val.to_i elsif val.respond_to?(:to_ptr) diff --git a/src/main/java/org/truffleruby/cext/CExtNodes.java b/src/main/java/org/truffleruby/cext/CExtNodes.java index 54e1bc7d0b71..a386a869cb8b 100644 --- a/src/main/java/org/truffleruby/cext/CExtNodes.java +++ b/src/main/java/org/truffleruby/cext/CExtNodes.java @@ -13,6 +13,7 @@ import java.util.Arrays; import java.util.concurrent.locks.ReentrantLock; +import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.TruffleSafepoint; import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.dsl.GenerateCached; @@ -714,7 +715,7 @@ public abstract static class RbEncCodeRangeClear extends CoreMethodArrayArgument @Specialization RubyString clearCodeRange(RubyString string, @Cached StringToNativeNode stringToNativeNode) { - stringToNativeNode.executeToNative(this, string); + stringToNativeNode.executeToNative(this, string, true); string.clearCodeRange(); return string; @@ -817,7 +818,7 @@ public abstract static class RbStrCapacityNode extends CoreMethodArrayArgumentsN @Specialization long capacity(Object string, @Cached StringToNativeNode stringToNativeNode) { - return getNativeStringCapacity(stringToNativeNode.executeToNative(this, string)); + return getNativeStringCapacity(stringToNativeNode.executeToNative(this, string, true)); } } @@ -830,7 +831,7 @@ RubyString strSetLen(RubyString string, int newByteLength, @Cached StringToNativeNode stringToNativeNode, @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode, @Cached InlinedConditionProfile minLengthOneProfile) { - var pointer = stringToNativeNode.executeToNative(this, string); + var pointer = stringToNativeNode.executeToNative(this, string, true); var encoding = libString.getEncoding(string); int minLength = encoding.jcoding.minLength(); @@ -860,7 +861,7 @@ RubyString rbStrResize(RubyString string, int newByteLength, @Cached RubyStringLibrary libString, @Cached StringToNativeNode stringToNativeNode, @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode) { - var pointer = stringToNativeNode.executeToNative(this, string); + var pointer = stringToNativeNode.executeToNative(this, string, true); var tencoding = libString.getTEncoding(string); int byteLength = string.tstring.byteLength(tencoding); @@ -889,7 +890,7 @@ RubyString trStrCapaResize(RubyString string, int newCapacity, @Cached RubyStringLibrary libString, @Cached StringToNativeNode stringToNativeNode, @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode) { - var pointer = stringToNativeNode.executeToNative(this, string); + var pointer = stringToNativeNode.executeToNative(this, string, true); var tencoding = libString.getTEncoding(string); if (getNativeStringCapacity(pointer) == newCapacity) { @@ -1327,24 +1328,29 @@ int rbHash(Object object, } } + /** If inplace is true, this node mutates the RubyString to use native memory. It should be avoided unless there is + * no other way because e.g. Regexp matching later on that String would then copy to managed byte[] back, and + * copying back-and-forth is quite expensive. OTOH if the String will need to be used as native memory again soon + * after and without needing to go to managed in between then it is valuable to avoid extra copies. */ @GenerateInline @GenerateCached(false) public abstract static class StringToNativeNode extends RubyBaseNode { - public abstract Pointer executeToNative(Node node, Object string); + public abstract Pointer executeToNative(Node node, Object string, boolean inplace); @Specialization - static Pointer toNative(Node node, RubyString string, + static Pointer toNative(Node node, RubyString string, boolean inplace, @Cached RubyStringLibrary libString, @Cached InlinedConditionProfile convertProfile, @Cached(inline = false) TruffleString.CopyToNativeMemoryNode copyToNativeMemoryNode, @Cached(inline = false) MutableTruffleString.FromNativePointerNode fromNativePointerNode, @Cached(inline = false) TruffleString.GetInternalNativePointerNode getInternalNativePointerNode) { + CompilerAsserts.partialEvaluationConstant(inplace); + var tstring = string.tstring; var tencoding = libString.getTEncoding(string); final Pointer pointer; - if (convertProfile.profile(node, tstring.isNative())) { assert tstring.isMutable(); pointer = (Pointer) getInternalNativePointerNode.execute(tstring, tencoding); @@ -1353,16 +1359,18 @@ static Pointer toNative(Node node, RubyString string, pointer = allocateAndCopyToNative(getLanguage(node), getContext(node), tstring, tencoding, byteLength, copyToNativeMemoryNode); - var nativeTString = fromNativePointerNode.execute(pointer, 0, byteLength, tencoding, false); - string.setTString(nativeTString); + if (inplace) { + var nativeTString = fromNativePointerNode.execute(pointer, 0, byteLength, tencoding, false); + string.setTString(nativeTString); + } } return pointer; } @Specialization - static Pointer toNativeImmutable(Node node, ImmutableRubyString string) { - return string.getNativeString(getLanguage(node)); + static Pointer toNativeImmutable(Node node, ImmutableRubyString string, boolean inplace) { + return string.getNativeString(getLanguage(node), getContext(node)); } public static Pointer allocateAndCopyToNative(RubyLanguage language, RubyContext context, @@ -1381,17 +1389,34 @@ public abstract static class StringPointerToNativeNode extends PrimitiveArrayArg @Specialization long toNative(Object string, @Cached StringToNativeNode stringToNativeNode) { - return stringToNativeNode.executeToNative(this, string).getAddress(); + return stringToNativeNode.executeToNative(this, string, true).getAddress(); + } + } + + @CoreMethod(names = "string_to_ffi_pointer_inplace", onSingleton = true, required = 1) + public abstract static class StringToFFIPointerInplaceNode extends CoreMethodArrayArgumentsNode { + + @Specialization + RubyPointer toFFIPointerInplace(Object string, + @Cached StringToNativeNode stringToNativeNode) { + var pointer = stringToNativeNode.executeToNative(this, string, true); + + final RubyPointer instance = new RubyPointer( + coreLibrary().truffleFFIPointerClass, + getLanguage().truffleFFIPointerShape, + pointer); + AllocationTracing.trace(instance, this); + return instance; } } - @CoreMethod(names = "string_to_ffi_pointer", onSingleton = true, required = 1) - public abstract static class StringToFFIPointerNode extends CoreMethodArrayArgumentsNode { + @CoreMethod(names = "string_to_ffi_pointer_copy", onSingleton = true, required = 1) + public abstract static class StringToFFIPointerCopyNode extends CoreMethodArrayArgumentsNode { @Specialization - RubyPointer toNative(Object string, + RubyPointer toFFIPointerCopy(Object string, @Cached StringToNativeNode stringToNativeNode) { - var pointer = stringToNativeNode.executeToNative(this, string); + var pointer = stringToNativeNode.executeToNative(this, string, false); final RubyPointer instance = new RubyPointer( coreLibrary().truffleFFIPointerClass, diff --git a/src/main/java/org/truffleruby/core/format/convert/StringToPointerNode.java b/src/main/java/org/truffleruby/core/format/convert/StringToPointerNode.java index 2e275a52c66f..4783d8ac49a7 100644 --- a/src/main/java/org/truffleruby/core/format/convert/StringToPointerNode.java +++ b/src/main/java/org/truffleruby/core/format/convert/StringToPointerNode.java @@ -42,7 +42,7 @@ static long toPointer(VirtualFrame frame, Object string, @Cached RubyStringLibrary strings, @Bind("this") Node node) { - final Pointer pointer = stringToNativeNode.executeToNative(node, string); + final Pointer pointer = stringToNativeNode.executeToNative(node, string, true); List associated = (List) frame.getObject(FormatFrameDescriptor.ASSOCIATED_SLOT); diff --git a/src/main/java/org/truffleruby/core/string/ImmutableRubyString.java b/src/main/java/org/truffleruby/core/string/ImmutableRubyString.java index d59a221cbc7c..886db9558b24 100644 --- a/src/main/java/org/truffleruby/core/string/ImmutableRubyString.java +++ b/src/main/java/org/truffleruby/core/string/ImmutableRubyString.java @@ -80,21 +80,20 @@ public boolean isNative() { } @SuppressFBWarnings("IS2_INCONSISTENT_SYNC") - public Pointer getNativeString(RubyLanguage language) { + public Pointer getNativeString(RubyLanguage language, RubyContext context) { if (nativeString == null) { - return createNativeString(language); + return createNativeString(language, context); } return nativeString; } @TruffleBoundary - private synchronized Pointer createNativeString(RubyLanguage language) { + private synchronized Pointer createNativeString(RubyLanguage language, RubyContext context) { if (nativeString == null) { var tencoding = getEncodingUncached().tencoding; int byteLength = tstring.byteLength(tencoding); - nativeString = CExtNodes.StringToNativeNode.allocateAndCopyToNative(language, - RubyLanguage.getCurrentContext(), tstring, tencoding, byteLength, - TruffleString.CopyToNativeMemoryNode.getUncached()); + nativeString = CExtNodes.StringToNativeNode.allocateAndCopyToNative(language, context, tstring, tencoding, + byteLength, TruffleString.CopyToNativeMemoryNode.getUncached()); } return nativeString; }