From aaa36cc0069043e34b47e89769eb9eba39e5362a Mon Sep 17 00:00:00 2001 From: Claes Redestad Date: Wed, 29 Sep 2021 12:58:14 +0000 Subject: [PATCH] 8274242: Implement fast-path for ASCII-compatible CharsetEncoders on x86 Reviewed-by: naoto, thartmann --- src/hotspot/cpu/aarch64/aarch64.ad | 1 + src/hotspot/cpu/aarch64/matcher_aarch64.hpp | 3 + src/hotspot/cpu/arm/matcher_arm.hpp | 3 + src/hotspot/cpu/ppc/matcher_ppc.hpp | 2 + src/hotspot/cpu/ppc/ppc.ad | 1 + src/hotspot/cpu/s390/matcher_s390.hpp | 3 + src/hotspot/cpu/s390/s390.ad | 1 + src/hotspot/cpu/x86/macroAssembler_x86.cpp | 30 +- src/hotspot/cpu/x86/macroAssembler_x86.hpp | 2 +- src/hotspot/cpu/x86/matcher_x86.hpp | 3 + src/hotspot/cpu/x86/x86_32.ad | 21 +- src/hotspot/cpu/x86/x86_64.ad | 22 +- src/hotspot/share/classfile/vmIntrinsics.cpp | 1 + src/hotspot/share/classfile/vmIntrinsics.hpp | 3 + src/hotspot/share/opto/c2compiler.cpp | 3 + src/hotspot/share/opto/intrinsicnode.hpp | 8 +- src/hotspot/share/opto/library_call.cpp | 10 +- src/hotspot/share/opto/library_call.hpp | 2 +- .../share/classes/java/lang/StringCoding.java | 16 +- .../share/classes/java/lang/System.java | 4 + .../jdk/internal/access/JavaLangAccess.java | 9 + .../share/classes/sun/nio/cs/CESU_8.java | 22 +- .../share/classes/sun/nio/cs/SingleByte.java | 12 +- .../share/classes/sun/nio/cs/US_ASCII.java | 8 +- .../share/classes/sun/nio/cs/UTF_8.java | 26 +- .../jtreg/compiler/codegen/Test6896617.java | 346 ------------------ .../string/TestEncodeIntrinsics.java | 249 +++++++++++++ .../bench/java/nio/CharsetEncodeDecode.java | 8 +- 28 files changed, 428 insertions(+), 391 deletions(-) delete mode 100644 test/hotspot/jtreg/compiler/codegen/Test6896617.java create mode 100644 test/hotspot/jtreg/compiler/intrinsics/string/TestEncodeIntrinsics.java diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 6fcbb24536dbf..e701e0a3a602c 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -16864,6 +16864,7 @@ instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len, vRegD_V2 Vtmp3, vRegD_V3 Vtmp4, iRegI_R0 result, rFlagsReg cr) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(USE_KILL src, USE_KILL dst, USE_KILL len, KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr); diff --git a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp index e5bee7990a6f5..9252ff127252b 100644 --- a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp @@ -163,4 +163,7 @@ return true; } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + #endif // CPU_AARCH64_MATCHER_AARCH64_HPP diff --git a/src/hotspot/cpu/arm/matcher_arm.hpp b/src/hotspot/cpu/arm/matcher_arm.hpp index b7a9a3f50425e..6254f4b33991d 100644 --- a/src/hotspot/cpu/arm/matcher_arm.hpp +++ b/src/hotspot/cpu/arm/matcher_arm.hpp @@ -155,4 +155,7 @@ return false; } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + #endif // CPU_ARM_MATCHER_ARM_HPP diff --git a/src/hotspot/cpu/ppc/matcher_ppc.hpp b/src/hotspot/cpu/ppc/matcher_ppc.hpp index df1672b3048ec..877f0be33c441 100644 --- a/src/hotspot/cpu/ppc/matcher_ppc.hpp +++ b/src/hotspot/cpu/ppc/matcher_ppc.hpp @@ -164,5 +164,7 @@ return VM_Version::has_fcfids(); } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; #endif // CPU_PPC_MATCHER_PPC_HPP diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 9a883c7231082..ddb6e04918ab6 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -12789,6 +12789,7 @@ instruct has_negatives(rarg1RegP ary1, iRegIsrc len, iRegIdst result, iRegLdst t // encode char[] to byte[] in ISO_8859_1 instruct encode_iso_array(rarg1RegP src, rarg2RegP dst, iRegIsrc len, iRegIdst result, iRegLdst tmp1, iRegLdst tmp2, iRegLdst tmp3, iRegLdst tmp4, iRegLdst tmp5, regCTR ctr, flagsRegCR0 cr0) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, USE_KILL src, USE_KILL dst, KILL ctr, KILL cr0); diff --git a/src/hotspot/cpu/s390/matcher_s390.hpp b/src/hotspot/cpu/s390/matcher_s390.hpp index 7577a7b266603..09cb819a6414a 100644 --- a/src/hotspot/cpu/s390/matcher_s390.hpp +++ b/src/hotspot/cpu/s390/matcher_s390.hpp @@ -152,4 +152,7 @@ return true; } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = false; + #endif // CPU_S390_MATCHER_S390_HPP diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index cd22b795886d7..63004f8e26343 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -10282,6 +10282,7 @@ instruct has_negatives(rarg5RegP ary1, iRegI len, iRegI result, roddRegI oddReg, // encode char[] to byte[] in ISO_8859_1 instruct encode_iso_array(iRegP src, iRegP dst, iRegI result, iRegI len, iRegI tmp, flagsReg cr) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP_DEF result, TEMP tmp, KILL cr); // R0, R1 are killed, too. ins_cost(300); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index a13b2b0da30c5..bf56eddbdee03 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -5423,7 +5423,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, BIND(L_exit); } -// encode char[] to byte[] in ISO_8859_1 +// encode char[] to byte[] in ISO_8859_1 or ASCII //@IntrinsicCandidate //private static int implEncodeISOArray(byte[] sa, int sp, //byte[] da, int dp, int len) { @@ -5436,10 +5436,23 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, // } // return i; //} + // + //@IntrinsicCandidate + //private static int implEncodeAsciiArray(char[] sa, int sp, + // byte[] da, int dp, int len) { + // int i = 0; + // for (; i < len; i++) { + // char c = sa[sp++]; + // if (c >= '\u0080') + // break; + // da[dp++] = (byte)c; + // } + // return i; + //} void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, XMMRegister tmp1Reg, XMMRegister tmp2Reg, XMMRegister tmp3Reg, XMMRegister tmp4Reg, - Register tmp5, Register result) { + Register tmp5, Register result, bool ascii) { // rsi: src // rdi: dst @@ -5450,6 +5463,9 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, assert_different_registers(src, dst, len, tmp5, result); Label L_done, L_copy_1_char, L_copy_1_char_exit; + int mask = ascii ? 0xff80ff80 : 0xff00ff00; + int short_mask = ascii ? 0xff80 : 0xff00; + // set result xorl(result, result); // check for zero length @@ -5469,7 +5485,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, if (UseAVX >= 2) { Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit; - movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector + movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector movdl(tmp1Reg, tmp5); vpbroadcastd(tmp1Reg, tmp1Reg, Assembler::AVX_256bit); jmp(L_chars_32_check); @@ -5478,7 +5494,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, vmovdqu(tmp3Reg, Address(src, len, Address::times_2, -64)); vmovdqu(tmp4Reg, Address(src, len, Address::times_2, -32)); vpor(tmp2Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); - vptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector + vptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector jccb(Assembler::notZero, L_copy_32_chars_exit); vpackuswb(tmp3Reg, tmp3Reg, tmp4Reg, /* vector_len */ 1); vpermq(tmp4Reg, tmp3Reg, 0xD8, /* vector_len */ 1); @@ -5493,7 +5509,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, jccb(Assembler::greater, L_copy_16_chars_exit); } else if (UseSSE42Intrinsics) { - movl(tmp5, 0xff00ff00); // create mask to test for Unicode chars in vector + movl(tmp5, mask); // create mask to test for Unicode or non-ASCII chars in vector movdl(tmp1Reg, tmp5); pshufd(tmp1Reg, tmp1Reg, 0); jmpb(L_chars_16_check); @@ -5517,7 +5533,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, movdqu(tmp4Reg, Address(src, len, Address::times_2, -16)); por(tmp2Reg, tmp4Reg); } - ptest(tmp2Reg, tmp1Reg); // check for Unicode chars in vector + ptest(tmp2Reg, tmp1Reg); // check for Unicode or non-ASCII chars in vector jccb(Assembler::notZero, L_copy_16_chars_exit); packuswb(tmp3Reg, tmp4Reg); } @@ -5555,7 +5571,7 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, bind(L_copy_1_char); load_unsigned_short(tmp5, Address(src, len, Address::times_2, 0)); - testl(tmp5, 0xff00); // check if Unicode char + testl(tmp5, short_mask); // check if Unicode or non-ASCII char jccb(Assembler::notZero, L_copy_1_char_exit); movb(Address(dst, len, Address::times_1, 0), tmp5); addptr(len, 1); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 8bbfca6ea18a0..c28f7c43b8b98 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -1725,7 +1725,7 @@ class MacroAssembler: public Assembler { void encode_iso_array(Register src, Register dst, Register len, XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, - XMMRegister tmp4, Register tmp5, Register result); + XMMRegister tmp4, Register tmp5, Register result, bool ascii); #ifdef _LP64 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp index 510395f121fa5..2dcd1e6e7a94a 100644 --- a/src/hotspot/cpu/x86/matcher_x86.hpp +++ b/src/hotspot/cpu/x86/matcher_x86.hpp @@ -195,4 +195,7 @@ return true; } + // Implements a variant of EncodeISOArrayNode that encode ASCII only + static const bool supports_encode_ascii_array = true; + #endif // CPU_X86_MATCHER_X86_HPP diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad index 7fb9e4e66a024..18d213cc31a04 100644 --- a/src/hotspot/cpu/x86/x86_32.ad +++ b/src/hotspot/cpu/x86/x86_32.ad @@ -12199,18 +12199,35 @@ instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI l instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} + format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} ins_encode %{ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); + $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); %} ins_pipe( pipe_slow ); %} +// encode char[] to byte[] in ASCII +instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, + regD tmp1, regD tmp2, regD tmp3, regD tmp4, + eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ + predicate(((EncodeISOArrayNode*)n)->is_ascii()); + match(Set result (EncodeISOArray src (Binary dst len))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); + + format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} + ins_encode %{ + __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, + $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, + $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); + %} + ins_pipe( pipe_slow ); +%} //----------Control Flow Instructions------------------------------------------ // Signed compare Instructions diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index ad6c36c51d75a..14671c39640c1 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -11770,14 +11770,32 @@ instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_Reg instruct encode_iso_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{ + predicate(!((EncodeISOArrayNode*)n)->is_ascii()); match(Set result (EncodeISOArray src (Binary dst len))); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - format %{ "Encode array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %} + format %{ "Encode iso array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %} ins_encode %{ __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); + $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); + %} + ins_pipe( pipe_slow ); +%} + +// encode char[] to byte[] in ASCII +instruct encode_ascii_array(rsi_RegP src, rdi_RegP dst, rdx_RegI len, + legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4, + rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{ + predicate(((EncodeISOArrayNode*)n)->is_ascii()); + match(Set result (EncodeISOArray src (Binary dst len))); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); + + format %{ "Encode ascii array $src,$dst,$len -> $result // KILL RCX, RDX, $tmp1, $tmp2, $tmp3, $tmp4, RSI, RDI " %} + ins_encode %{ + __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, + $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, + $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); %} ins_pipe( pipe_slow ); %} diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp index 205ba8969cb0a..c15e1154b13c0 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.cpp +++ b/src/hotspot/share/classfile/vmIntrinsics.cpp @@ -505,6 +505,7 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) { if (!SpecialArraysEquals) return true; break; case vmIntrinsics::_encodeISOArray: + case vmIntrinsics::_encodeAsciiArray: case vmIntrinsics::_encodeByteISOArray: if (!SpecialEncodeISOArray) return true; break; diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp index 2a4f54880c9e7..31f0b1fb13c4d 100644 --- a/src/hotspot/share/classfile/vmIntrinsics.hpp +++ b/src/hotspot/share/classfile/vmIntrinsics.hpp @@ -353,6 +353,9 @@ class methodHandle; \ do_intrinsic(_encodeByteISOArray, java_lang_StringCoding, encodeISOArray_name, indexOfI_signature, F_S) \ \ + do_intrinsic(_encodeAsciiArray, java_lang_StringCoding, encodeAsciiArray_name, encodeISOArray_signature, F_S) \ + do_name( encodeAsciiArray_name, "implEncodeAsciiArray") \ + \ do_class(java_math_BigInteger, "java/math/BigInteger") \ do_intrinsic(_multiplyToLen, java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_S) \ do_name( multiplyToLen_name, "implMultiplyToLen") \ diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp index d5a59a413af75..c5949ed57f48d 100644 --- a/src/hotspot/share/opto/c2compiler.cpp +++ b/src/hotspot/share/opto/c2compiler.cpp @@ -216,6 +216,9 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt case vmIntrinsics::_copyMemory: if (StubRoutines::unsafe_arraycopy() == NULL) return false; break; + case vmIntrinsics::_encodeAsciiArray: + if (!Matcher::match_rule_supported(Op_EncodeISOArray) || !Matcher::supports_encode_ascii_array) return false; + break; case vmIntrinsics::_encodeISOArray: case vmIntrinsics::_encodeByteISOArray: if (!Matcher::match_rule_supported(Op_EncodeISOArray)) return false; diff --git a/src/hotspot/share/opto/intrinsicnode.hpp b/src/hotspot/share/opto/intrinsicnode.hpp index 3d6e9a38d1225..ab8a834bb28ad 100644 --- a/src/hotspot/share/opto/intrinsicnode.hpp +++ b/src/hotspot/share/opto/intrinsicnode.hpp @@ -168,10 +168,14 @@ class HasNegativesNode: public StrIntrinsicNode { //------------------------------EncodeISOArray-------------------------------- -// encode char[] to byte[] in ISO_8859_1 +// encode char[] to byte[] in ISO_8859_1 or ASCII class EncodeISOArrayNode: public Node { + bool ascii; public: - EncodeISOArrayNode(Node* control, Node* arymem, Node* s1, Node* s2, Node* c): Node(control, arymem, s1, s2, c) {}; + EncodeISOArrayNode(Node* control, Node* arymem, Node* s1, Node* s2, Node* c, bool ascii) + : Node(control, arymem, s1, s2, c), ascii(ascii) {} + + bool is_ascii() { return ascii; } virtual int Opcode() const; virtual bool depends_only_on_test() const { return false; } virtual const Type* bottom_type() const { return TypeInt::INT; } diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index 0ac5bf6d502a5..139a6c87dd053 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -591,7 +591,9 @@ bool LibraryCallKit::try_to_inline(int predicate) { case vmIntrinsics::_encodeISOArray: case vmIntrinsics::_encodeByteISOArray: - return inline_encodeISOArray(); + return inline_encodeISOArray(false); + case vmIntrinsics::_encodeAsciiArray: + return inline_encodeISOArray(true); case vmIntrinsics::_updateCRC32: return inline_updateCRC32(); @@ -4882,8 +4884,8 @@ LibraryCallKit::tightly_coupled_allocation(Node* ptr) { } //-------------inline_encodeISOArray----------------------------------- -// encode char[] to byte[] in ISO_8859_1 -bool LibraryCallKit::inline_encodeISOArray() { +// encode char[] to byte[] in ISO_8859_1 or ASCII +bool LibraryCallKit::inline_encodeISOArray(bool ascii) { assert(callee()->signature()->size() == 5, "encodeISOArray has 5 parameters"); // no receiver since it is static method Node *src = argument(0); @@ -4918,7 +4920,7 @@ bool LibraryCallKit::inline_encodeISOArray() { // 'dst_start' points to dst array + scaled offset const TypeAryPtr* mtype = TypeAryPtr::BYTES; - Node* enc = new EncodeISOArrayNode(control(), memory(mtype), src_start, dst_start, length); + Node* enc = new EncodeISOArrayNode(control(), memory(mtype), src_start, dst_start, length, ascii); enc = _gvn.transform(enc); Node* res_mem = _gvn.transform(new SCMemProjNode(enc)); set_memory(res_mem, mtype); diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index adb1cb06c957b..b3010f1cb5ba3 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -285,7 +285,7 @@ class LibraryCallKit : public GraphKit { Node* get_state_from_digest_object(Node *digestBase_object, const char* state_type); Node* get_digest_length_from_digest_object(Node *digestBase_object); Node* inline_digestBase_implCompressMB_predicate(int predicate); - bool inline_encodeISOArray(); + bool inline_encodeISOArray(bool ascii); bool inline_updateCRC32(); bool inline_updateBytesCRC32(); bool inline_updateByteBufferCRC32(); diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java index c8d691675430e..ec81c3795799f 100644 --- a/src/java.base/share/classes/java/lang/StringCoding.java +++ b/src/java.base/share/classes/java/lang/StringCoding.java @@ -46,7 +46,7 @@ public static boolean hasNegatives(byte[] ba, int off, int len) { @IntrinsicCandidate public static int implEncodeISOArray(byte[] sa, int sp, - byte[] da, int dp, int len) { + byte[] da, int dp, int len) { int i = 0; for (; i < len; i++) { char c = StringUTF16.getChar(sa, sp++); @@ -57,4 +57,18 @@ public static int implEncodeISOArray(byte[] sa, int sp, return i; } + @IntrinsicCandidate + public static int implEncodeAsciiArray(char[] sa, int sp, + byte[] da, int dp, int len) + { + int i = 0; + for (; i < len; i++) { + char c = sa[sp++]; + if (c >= '\u0080') + break; + da[dp++] = (byte)c; + } + return i; + } + } diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index edb636bfe210c..c843858712f7b 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -2419,6 +2419,10 @@ public int decodeASCII(byte[] src, int srcOff, char[] dst, int dstOff, int len) return String.decodeASCII(src, srcOff, dst, dstOff, len); } + public int encodeASCII(char[] src, int srcOff, byte[] dst, int dstOff, int len) { + return StringCoding.implEncodeAsciiArray(src, srcOff, dst, dstOff, len); + } + public void setCause(Throwable t, Throwable cause) { t.setCause(cause); } diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index f6fe5f6113166..b68490ad7a397 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -356,6 +356,15 @@ public interface JavaLangAccess { */ int decodeASCII(byte[] src, int srcOff, char[] dst, int dstOff, int len); + /** + * Encodes ASCII codepoints as possible from the source array into + * the destination byte array, assuming that the encoding is ASCII + * compatible + * + * @return the number of bytes successfully encoded, or 0 if none + */ + int encodeASCII(char[] src, int srcOff, byte[] dst, int dstOff, int len); + /** * Set the cause of Throwable * @param cause set t's cause to new value diff --git a/src/java.base/share/classes/sun/nio/cs/CESU_8.java b/src/java.base/share/classes/sun/nio/cs/CESU_8.java index b3dcebf53232d..f1fc69703c20c 100644 --- a/src/java.base/share/classes/sun/nio/cs/CESU_8.java +++ b/src/java.base/share/classes/sun/nio/cs/CESU_8.java @@ -76,11 +76,11 @@ private static final void updatePositions(Buffer src, int sp, dst.position(dp - dst.arrayOffset()); } + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); + private static class Decoder extends CharsetDecoder implements ArrayDecoder { - private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); - private Decoder(Charset cs) { super(cs, 1.0f, 1.0f); } @@ -434,7 +434,6 @@ private static void to3Bytes(ByteBuffer dst, char c) { } private Surrogate.Parser sgp; - private char[] c2; private CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { @@ -445,11 +444,12 @@ private CoderResult encodeArrayLoop(CharBuffer src, byte[] da = dst.array(); int dp = dst.arrayOffset() + dst.position(); int dl = dst.arrayOffset() + dst.limit(); - int dlASCII = dp + Math.min(sl - sp, dl - dp); - // ASCII only loop - while (dp < dlASCII && sa[sp] < '\u0080') - da[dp++] = (byte) sa[sp++]; + // Handle ASCII-only prefix + int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp)); + sp += n; + dp += n; + while (sp < sl) { char c = sa[sp]; if (c < 0x80) { @@ -549,11 +549,11 @@ protected final CoderResult encodeLoop(CharBuffer src, public int encode(char[] sa, int sp, int len, byte[] da) { int sl = sp + len; int dp = 0; - int dlASCII = dp + Math.min(len, da.length); - // ASCII only optimized loop - while (dp < dlASCII && sa[sp] < '\u0080') - da[dp++] = (byte) sa[sp++]; + // Handle ASCII-only prefix + int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(len, da.length)); + sp += n; + dp += n; while (sp < sl) { char c = sa[sp++]; diff --git a/src/java.base/share/classes/sun/nio/cs/SingleByte.java b/src/java.base/share/classes/sun/nio/cs/SingleByte.java index 88f8954844424..748659b323f15 100644 --- a/src/java.base/share/classes/sun/nio/cs/SingleByte.java +++ b/src/java.base/share/classes/sun/nio/cs/SingleByte.java @@ -49,11 +49,11 @@ private static final CoderResult withResult(CoderResult cr, return cr; } + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); + public static final class Decoder extends CharsetDecoder implements ArrayDecoder { - private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); - private final char[] b2c; private final boolean isASCIICompatible; private final boolean isLatin1Decodable; @@ -214,8 +214,14 @@ private CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { byte[] da = dst.array(); int dp = dst.arrayOffset() + dst.position(); int dl = dst.arrayOffset() + dst.limit(); - int len = Math.min(dl - dp, sl - sp); + int len = Math.min(dl - dp, sl - sp); + if (isASCIICompatible) { + int n = JLA.encodeASCII(sa, sp, da, dp, len); + sp += n; + dp += n; + len -= n; + } while (len-- > 0) { char c = sa[sp]; int b = encode(c); diff --git a/src/java.base/share/classes/sun/nio/cs/US_ASCII.java b/src/java.base/share/classes/sun/nio/cs/US_ASCII.java index 04aeceb43d34a..8ff79d497fbc5 100644 --- a/src/java.base/share/classes/sun/nio/cs/US_ASCII.java +++ b/src/java.base/share/classes/sun/nio/cs/US_ASCII.java @@ -61,9 +61,9 @@ public CharsetEncoder newEncoder() { return new Encoder(this); } - private static class Decoder extends CharsetDecoder { + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); - private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); + private static class Decoder extends CharsetDecoder { private Decoder(Charset cs) { super(cs, 1.0f, 1.0f); @@ -159,6 +159,10 @@ private CoderResult encodeArrayLoop(CharBuffer src, assert (dp <= dl); dp = (dp <= dl ? dp : dl); + int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp)); + sp += n; + dp += n; + try { while (sp < sl) { char c = sa[sp]; diff --git a/src/java.base/share/classes/sun/nio/cs/UTF_8.java b/src/java.base/share/classes/sun/nio/cs/UTF_8.java index 1a7d8c4d1e1a5..a27b4690f59f5 100644 --- a/src/java.base/share/classes/sun/nio/cs/UTF_8.java +++ b/src/java.base/share/classes/sun/nio/cs/UTF_8.java @@ -83,9 +83,9 @@ static final void updatePositions(Buffer src, int sp, dst.position(dp - dst.arrayOffset()); } - private static class Decoder extends CharsetDecoder { + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); - private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); + private static class Decoder extends CharsetDecoder { private Decoder(Charset cs) { super(cs, 1.0f, 1.0f); @@ -443,8 +443,7 @@ private static CoderResult overflow(CharBuffer src, int mark) { private Surrogate.Parser sgp; private CoderResult encodeArrayLoop(CharBuffer src, - ByteBuffer dst) - { + ByteBuffer dst) { char[] sa = src.array(); int sp = src.arrayOffset() + src.position(); int sl = src.arrayOffset() + src.limit(); @@ -452,11 +451,22 @@ private CoderResult encodeArrayLoop(CharBuffer src, byte[] da = dst.array(); int dp = dst.arrayOffset() + dst.position(); int dl = dst.arrayOffset() + dst.limit(); - int dlASCII = dp + Math.min(sl - sp, dl - dp); - // ASCII only loop - while (dp < dlASCII && sa[sp] < '\u0080') - da[dp++] = (byte) sa[sp++]; + // Handle ASCII-only prefix + int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(sl - sp, dl - dp)); + sp += n; + dp += n; + + if (sp < sl) { + return encodeArrayLoopSlow(src, sa, sp, sl, dst, da, dp, dl); + } else { + updatePositions(src, sp, dst, dp); + return CoderResult.UNDERFLOW; + } + } + + private CoderResult encodeArrayLoopSlow(CharBuffer src, char[] sa, int sp, int sl, + ByteBuffer dst, byte[] da, int dp, int dl) { while (sp < sl) { char c = sa[sp]; if (c < 0x80) { diff --git a/test/hotspot/jtreg/compiler/codegen/Test6896617.java b/test/hotspot/jtreg/compiler/codegen/Test6896617.java deleted file mode 100644 index eddaa3231ea41..0000000000000 --- a/test/hotspot/jtreg/compiler/codegen/Test6896617.java +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -/* - * @test - * @key randomness - * @bug 6896617 - * @summary Optimize sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() with SSE instructions on x86 - * @library /test/lib - * @modules java.base/jdk.internal.misc - * java.base/sun.nio.cs - * java.management - * - * @ignore 8193479 - * @run main/othervm/timeout=1200 -Xbatch -Xmx256m compiler.codegen.Test6896617 - */ - -package compiler.codegen; - -import jdk.test.lib.Utils; - -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.charset.CharsetDecoder; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.CodingErrorAction; -import java.util.Arrays; -import java.util.Random; - -public class Test6896617 { - final static int SIZE = 256; - - public static void main(String[] args) { - String csn = "ISO-8859-1"; - Charset cs = Charset.forName(csn); - CharsetEncoder enc = cs.newEncoder(); - enc.onMalformedInput(CodingErrorAction.REPLACE) - .onUnmappableCharacter(CodingErrorAction.REPLACE); - CharsetDecoder dec = cs.newDecoder(); - dec.onMalformedInput(CodingErrorAction.REPLACE) - .onUnmappableCharacter(CodingErrorAction.REPLACE); - - byte repl = (byte)'?'; - enc.replaceWith(new byte[] { repl }); - - // Use internal API for tests. - sun.nio.cs.ArrayEncoder arrenc = (sun.nio.cs.ArrayEncoder)enc; - sun.nio.cs.ArrayDecoder arrdec = (sun.nio.cs.ArrayDecoder)dec; - - // Populate char[] with chars which can be encoded by ISO_8859_1 (<= 0xFF) - Random rnd = Utils.getRandomInstance(); - int maxchar = 0xFF; - char[] a = new char[SIZE]; - byte[] b = new byte[SIZE]; - char[] at = new char[SIZE]; - byte[] bt = new byte[SIZE]; - for (int i = 0; i < SIZE; i++) { - char c = (char) rnd.nextInt(maxchar); - if (!enc.canEncode(c)) { - System.out.printf("Something wrong: can't encode c=%03x\n", (int)c); - System.exit(97); - } - a[i] = c; - b[i] = (byte)c; - at[i] = (char)-1; - bt[i] = (byte)-1; - } - if (arrenc.encode(a, 0, SIZE, bt) != SIZE || !Arrays.equals(b, bt)) { - System.out.println("Something wrong: ArrayEncoder.encode failed"); - System.exit(97); - } - if (arrdec.decode(b, 0, SIZE, at) != SIZE || !Arrays.equals(a, at)) { - System.out.println("Something wrong: ArrayDecoder.decode failed"); - System.exit(97); - } - for (int i = 0; i < SIZE; i++) { - at[i] = (char)-1; - bt[i] = (byte)-1; - } - - ByteBuffer bb = ByteBuffer.wrap(b); - CharBuffer ba = CharBuffer.wrap(a); - ByteBuffer bbt = ByteBuffer.wrap(bt); - CharBuffer bat = CharBuffer.wrap(at); - if (!enc.encode(ba, bbt, true).isUnderflow() || !Arrays.equals(b, bt)) { - System.out.println("Something wrong: Encoder.encode failed"); - System.exit(97); - } - if (!dec.decode(bb, bat, true).isUnderflow() || !Arrays.equals(a, at)) { - System.out.println("Something wrong: Decoder.decode failed"); - System.exit(97); - } - for (int i = 0; i < SIZE; i++) { - at[i] = (char)-1; - bt[i] = (byte)-1; - } - - // Warm up - boolean failed = false; - int result = 0; - for (int i = 0; i < 10000; i++) { - result += arrenc.encode(a, 0, SIZE, bt); - result -= arrdec.decode(b, 0, SIZE, at); - } - for (int i = 0; i < 10000; i++) { - result += arrenc.encode(a, 0, SIZE, bt); - result -= arrdec.decode(b, 0, SIZE, at); - } - for (int i = 0; i < 10000; i++) { - result += arrenc.encode(a, 0, SIZE, bt); - result -= arrdec.decode(b, 0, SIZE, at); - } - if (result != 0 || !Arrays.equals(b, bt) || !Arrays.equals(a, at)) { - failed = true; - System.out.println("Failed: ArrayEncoder.encode char[" + SIZE + "] and ArrayDecoder.decode byte[" + SIZE + "]"); - } - for (int i = 0; i < SIZE; i++) { - at[i] = (char)-1; - bt[i] = (byte)-1; - } - - boolean is_underflow = true; - for (int i = 0; i < 10000; i++) { - ba.clear(); bb.clear(); bat.clear(); bbt.clear(); - boolean enc_res = enc.encode(ba, bbt, true).isUnderflow(); - boolean dec_res = dec.decode(bb, bat, true).isUnderflow(); - is_underflow = is_underflow && enc_res && dec_res; - } - for (int i = 0; i < SIZE; i++) { - at[i] = (char)-1; - bt[i] = (byte)-1; - } - for (int i = 0; i < 10000; i++) { - ba.clear(); bb.clear(); bat.clear(); bbt.clear(); - boolean enc_res = enc.encode(ba, bbt, true).isUnderflow(); - boolean dec_res = dec.decode(bb, bat, true).isUnderflow(); - is_underflow = is_underflow && enc_res && dec_res; - } - for (int i = 0; i < SIZE; i++) { - at[i] = (char)-1; - bt[i] = (byte)-1; - } - for (int i = 0; i < 10000; i++) { - ba.clear(); bb.clear(); bat.clear(); bbt.clear(); - boolean enc_res = enc.encode(ba, bbt, true).isUnderflow(); - boolean dec_res = dec.decode(bb, bat, true).isUnderflow(); - is_underflow = is_underflow && enc_res && dec_res; - } - if (!is_underflow || !Arrays.equals(b, bt) || !Arrays.equals(a, at)) { - failed = true; - System.out.println("Failed: Encoder.encode char[" + SIZE + "] and Decoder.decode byte[" + SIZE + "]"); - } - - // Test encoder with different source and destination sizes - System.out.println("Testing different source and destination sizes"); - for (int i = 1; i <= SIZE; i++) { - for (int j = 1; j <= SIZE; j++) { - bt = new byte[j]; - // very source's SIZE - result = arrenc.encode(a, 0, i, bt); - int l = Math.min(i, j); - if (result != l) { - failed = true; - System.out.println("Failed: encode char[" + i + "] to byte[" + j + "]: result = " + result + ", expected " + l); - } - for (int k = 0; k < l; k++) { - if (bt[k] != b[k]) { - failed = true; - System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]); - } - } - // very source's offset - int sz = SIZE - i + 1; - result = arrenc.encode(a, i-1, sz, bt); - l = Math.min(sz, j); - if (result != l) { - failed = true; - System.out.println("Failed: encode char[" + sz + "] to byte[" + j + "]: result = " + result + ", expected " + l); - } - for (int k = 0; k < l; k++) { - if (bt[k] != b[i+k-1]) { - failed = true; - System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[i+k-1]); - } - } - } - } - - // Test encoder with char > 0xFF - System.out.println("Testing big char"); - - byte orig = (byte)'A'; - bt = new byte[SIZE]; - for (int i = 1; i <= SIZE; i++) { - for (int j = 0; j < i; j++) { - a[j] += 0x100; - // make sure to replace a different byte - bt[j] = orig; - result = arrenc.encode(a, 0, i, bt); - if (result != i) { - failed = true; - System.out.println("Failed: encode char[" + i + "] to byte[" + i + "]: result = " + result + ", expected " + i); - } - if (bt[j] != repl) { - failed = true; - System.out.println("Failed: encoded replace byte[" + j + "] (" + bt[j] + ") != " + repl); - } - bt[j] = b[j]; // Restore to compare whole array - for (int k = 0; k < i; k++) { - if (bt[k] != b[k]) { - failed = true; - System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]); - } - } - a[j] -= 0x100; // Restore - } - } - - // Test sun.nio.cs.ISO_8859_1$Encode.encodeArrayLoop() performance. - - int itrs = Integer.getInteger("iterations", 1000000); - int size = Integer.getInteger("size", 256); - a = new char[size]; - b = new byte[size]; - bt = new byte[size]; - for (int i = 0; i < size; i++) { - char c = (char) rnd.nextInt(maxchar); - if (!enc.canEncode(c)) { - System.out.printf("Something wrong: can't encode c=%03x\n", (int)c); - System.exit(97); - } - a[i] = c; - b[i] = (byte)-1; - bt[i] = (byte)c; - } - ba = CharBuffer.wrap(a); - bb = ByteBuffer.wrap(b); - boolean enc_res = enc.encode(ba, bb, true).isUnderflow(); - if (!enc_res || !Arrays.equals(b, bt)) { - failed = true; - System.out.println("Failed 1: Encoder.encode char[" + size + "]"); - } - for (int i = 0; i < size; i++) { - b[i] = (byte)-1; - } - - // Make sure to recompile method if needed before performance run. - for (int i = 0; i < 10000; i++) { - ba.clear(); bb.clear(); - enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow(); - } - for (int i = 0; i < size; i++) { - b[i] = (byte)-1; - } - for (int i = 0; i < 10000; i++) { - ba.clear(); bb.clear(); - enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow(); - } - if (!enc_res || !Arrays.equals(b, bt)) { - failed = true; - System.out.println("Failed 2: Encoder.encode char[" + size + "]"); - } - for (int i = 0; i < size; i++) { - b[i] = (byte)-1; - } - - System.out.println("Testing ISO_8859_1$Encode.encodeArrayLoop() performance"); - long start = System.currentTimeMillis(); - for (int i = 0; i < itrs; i++) { - ba.clear(); bb.clear(); - enc_res = enc_res && enc.encode(ba, bb, true).isUnderflow(); - } - long end = System.currentTimeMillis(); - if (!enc_res || !Arrays.equals(b, bt)) { - failed = true; - System.out.println("Failed 3: Encoder.encode char[" + size + "]"); - } else { - System.out.println("size: " + size + " time: " + (end - start)); - } - - // Test sun.nio.cs.ISO_8859_1$Encode.encode() performance. - - // Make sure to recompile method if needed before performance run. - result = 0; - for (int i = 0; i < size; i++) { - b[i] = (byte)-1; - } - for (int i = 0; i < 10000; i++) { - result += arrenc.encode(a, 0, size, b); - } - for (int i = 0; i < size; i++) { - b[i] = (byte)-1; - } - for (int i = 0; i < 10000; i++) { - result += arrenc.encode(a, 0, size, b); - } - if (result != size*20000 || !Arrays.equals(b, bt)) { - failed = true; - System.out.println("Failed 1: ArrayEncoder.encode char[" + SIZE + "]"); - } - for (int i = 0; i < size; i++) { - b[i] = (byte)-1; - } - - System.out.println("Testing ISO_8859_1$Encode.encode() performance"); - result = 0; - start = System.currentTimeMillis(); - for (int i = 0; i < itrs; i++) { - result += arrenc.encode(a, 0, size, b); - } - end = System.currentTimeMillis(); - if (!Arrays.equals(b, bt)) { - failed = true; - System.out.println("Failed 2: ArrayEncoder.encode char[" + size + "]"); - } else { - System.out.println("size: " + size + " time: " + (end - start)); - } - - if (failed) { - System.out.println("FAILED"); - System.exit(97); - } - System.out.println("PASSED"); - } -} diff --git a/test/hotspot/jtreg/compiler/intrinsics/string/TestEncodeIntrinsics.java b/test/hotspot/jtreg/compiler/intrinsics/string/TestEncodeIntrinsics.java new file mode 100644 index 0000000000000..38a516e7521a6 --- /dev/null +++ b/test/hotspot/jtreg/compiler/intrinsics/string/TestEncodeIntrinsics.java @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2013, 2021, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @key randomness + * @bug 6896617 8274242 + * @summary Verify potentially intrinsified encoders behave well before and after compilation + * @library /test/lib + * + * @run main/othervm/timeout=1200 --add-opens=java.base/sun.nio.cs=ALL-UNNAMED -Xbatch -Xmx256m compiler.intrinsics.string.TestEncodeIntrinsics + */ + +package compiler.intrinsics.string; + +import jdk.test.lib.Utils; + +import java.lang.reflect.Method; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; +import java.util.Arrays; +import java.util.Random; + +public class TestEncodeIntrinsics { + final static int SIZE = 256; + + public static void main(String[] args) { + + test("ISO-8859-1", false); + test("UTF-8", true); + test("US-ASCII", true); + test("CESU-8", true); + } + + private static void test(String csn, boolean asciiOnly) { + try { + System.out.println("Testing " + csn); + Charset cs = Charset.forName(csn); + CharsetEncoder enc = cs.newEncoder(); + enc.onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); + CharsetDecoder dec = cs.newDecoder(); + dec.onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); + + byte repl = (byte) '?'; + enc.replaceWith(new byte[]{repl}); + + // Populate char[] with chars which can be encoded by ISO_8859_1 (<= 0xFF) + // - or ASCII (<= 0x7F) if requested + Random rnd = Utils.getRandomInstance(); + int maxchar = asciiOnly ? 0x7F : 0xFF; + char[] a = new char[SIZE]; + byte[] b = new byte[SIZE]; + char[] at = new char[SIZE]; + byte[] bt = new byte[SIZE]; + for (int i = 0; i < SIZE; i++) { + char c = (char) rnd.nextInt(maxchar); + if (!enc.canEncode(c)) { + System.out.printf("Something wrong: can't encode c=%03x\n", (int) c); + System.exit(97); + } + a[i] = c; + b[i] = (byte) c; + at[i] = (char) -1; + bt[i] = (byte) -1; + } + + Method encodeArray = null; + if (csn.equals("ISO-8859-1")) { + // Use internal API for tests + encodeArray = enc.getClass().getDeclaredMethod("encodeISOArray", + char[].class, int.class, byte[].class, int.class, int.class); + encodeArray.setAccessible(true); + if ((int) encodeArray.invoke(enc, a, 0, bt, 0, SIZE) != SIZE || !Arrays.equals(b, bt)) { + System.out.println("Something wrong: ArrayEncoder.encode failed"); + System.exit(97); + } + for (int i = 0; i < SIZE; i++) { + at[i] = (char) -1; + } + } + + ByteBuffer bb = ByteBuffer.wrap(b); + CharBuffer ba = CharBuffer.wrap(a); + ByteBuffer bbt = ByteBuffer.wrap(bt); + CharBuffer bat = CharBuffer.wrap(at); + if (!enc.encode(ba, bbt, true).isUnderflow() || !Arrays.equals(b, bt)) { + System.out.println("Something wrong: Encoder.encode failed"); + System.exit(97); + } + if (!dec.decode(bb, bat, true).isUnderflow() || !Arrays.equals(a, at)) { + System.out.println("Something wrong: Decoder.decode failed (a == at: " + !Arrays.equals(a, at) + ")"); + System.exit(97); + } + for (int i = 0; i < SIZE; i++) { + at[i] = (char) -1; + bt[i] = (byte) -1; + } + + // Warm up + boolean failed = false; + + if (csn.equals("ISO-8859-1")) { + for (int i = 0; i < 10000; i++) { + failed |= (int) encodeArray.invoke(enc, a, 0, bt, 0, SIZE) != SIZE; + } + for (int i = 0; i < 10000; i++) { + failed |= (int) encodeArray.invoke(enc, a, 0, bt, 0, SIZE) != SIZE; + } + for (int i = 0; i < 10000; i++) { + failed |= (int) encodeArray.invoke(enc, a, 0, bt, 0, SIZE) != SIZE; + } + if (failed || !Arrays.equals(b, bt)) { + failed = true; + System.out.println("Failed: ISO_8859_1$Encoder.encode char[" + SIZE + "]"); + } + } + + for (int i = 0; i < SIZE; i++) { + at[i] = (char) -1; + bt[i] = (byte) -1; + } + + boolean is_underflow = true; + for (int i = 0; i < 10000; i++) { + ba.clear(); + bb.clear(); + bat.clear(); + bbt.clear(); + boolean enc_res = enc.encode(ba, bbt, true).isUnderflow(); + boolean dec_res = dec.decode(bb, bat, true).isUnderflow(); + is_underflow = is_underflow && enc_res && dec_res; + } + for (int i = 0; i < SIZE; i++) { + at[i] = (char) -1; + bt[i] = (byte) -1; + } + for (int i = 0; i < 10000; i++) { + ba.clear(); + bb.clear(); + bat.clear(); + bbt.clear(); + boolean enc_res = enc.encode(ba, bbt, true).isUnderflow(); + boolean dec_res = dec.decode(bb, bat, true).isUnderflow(); + is_underflow = is_underflow && enc_res && dec_res; + } + for (int i = 0; i < SIZE; i++) { + at[i] = (char) -1; + bt[i] = (byte) -1; + } + for (int i = 0; i < 10000; i++) { + ba.clear(); + bb.clear(); + bat.clear(); + bbt.clear(); + boolean enc_res = enc.encode(ba, bbt, true).isUnderflow(); + boolean dec_res = dec.decode(bb, bat, true).isUnderflow(); + is_underflow = is_underflow && enc_res && dec_res; + } + if (!is_underflow) { + failed = true; + System.out.println("Failed: got a non-underflow"); + } + if (!Arrays.equals(b, bt)) { + failed = true; + System.out.println("Failed: b != bt"); + } + if (!Arrays.equals(a, at)) { + failed = true; + System.out.println("Failed: a != at"); + } + + // Test encoder with chars outside of the range the intrinsic deals with + System.out.println("Testing big char"); + + bt = new byte[SIZE + 10]; // add some spare room to deal with encoding multi-byte + ba = CharBuffer.wrap(a); + bbt = ByteBuffer.wrap(bt); + for (int i = 1; i <= SIZE; i++) { + for (int j = 0; j < i; j++) { + char bigChar = (char)((asciiOnly ? 0x7F : 0xFF) + 1 + rnd.nextInt(0x100)); + char aOrig = a[j]; + a[j] = bigChar; + // make sure to replace with a different byte + bt[j] = (byte)(bt[j] + 1); + ba.clear(); + ba.limit(i); + bbt.clear(); + if (!enc.encode(ba, bbt, true).isUnderflow()) { + failed = true; + System.out.println("Failed: encode char[" + i + "] to byte[" + i + "]: expected underflow"); + } + if (bt[j] == b[j] && b[j] != repl) { // b[j] can be equal to repl; ignore + failed = true; + System.out.println("Failed: different byte expected at pos bt[" + j + "]"); + } + if (!enc.canEncode(bigChar) && bt[j] != repl) { + failed = true; + System.out.println("Failed: encoded replace byte[" + j + "] (" + bt[j] + ") != " + repl); + } + + // Check that all bytes prior to the replaced one was encoded properly + for (int k = 0; k < j; k++) { + if (bt[k] != b[k]) { + failed = true; + System.out.println("Failed: encoded byte[" + k + "] (" + bt[k] + ") != " + b[k]); + } + } + a[j] = aOrig; // Restore + } + } + + if (failed) { + System.out.println("FAILED"); + System.exit(97); + } + System.out.println("PASSED"); + } catch (Exception e) { + e.printStackTrace(); + System.out.println("FAILED"); + System.exit(97); + } + } +} diff --git a/test/micro/org/openjdk/bench/java/nio/CharsetEncodeDecode.java b/test/micro/org/openjdk/bench/java/nio/CharsetEncodeDecode.java index 51ad531e685a3..6e129a5466e89 100644 --- a/test/micro/org/openjdk/bench/java/nio/CharsetEncodeDecode.java +++ b/test/micro/org/openjdk/bench/java/nio/CharsetEncodeDecode.java @@ -24,12 +24,15 @@ import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; import org.openjdk.jmh.annotations.Mode; import org.openjdk.jmh.annotations.OutputTimeUnit; import org.openjdk.jmh.annotations.Param; import org.openjdk.jmh.annotations.Scope; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; import java.nio.ByteBuffer; import java.nio.CharBuffer; @@ -45,8 +48,11 @@ * char and byte arrays. */ @BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) @OutputTimeUnit(TimeUnit.MICROSECONDS) @State(Scope.Thread) +@Fork(3) public class CharsetEncodeDecode { private byte[] BYTES; @@ -55,7 +61,7 @@ public class CharsetEncodeDecode { private CharsetEncoder encoder; private CharsetDecoder decoder; - @Param({"BIG5", "ISO-8859-15", "ASCII", "UTF-16"}) + @Param({"UTF-8", "BIG5", "ISO-8859-15", "ASCII", "UTF-16"}) private String type; @Param("16384")