From 83761ad6d1182a59b507a73a71aef63005f2d12e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicent=20Mart=C3=AD?= <42793+vmg@users.noreply.github.com> Date: Thu, 6 Jul 2023 11:27:16 +0200 Subject: [PATCH] mysql: introduce icuregex package (#13391) * mysql: introduce icuregex package Co-authored-by: Dirkjan Bussink Signed-off-by: Vicent Marti * icuregex: implement freeze set optimization Signed-off-by: Vicent Marti * evalengine: wire up regex Signed-off-by: Vicent Marti * Fix remaining TODOs and fix a bunch of bugs Signed-off-by: Dirkjan Bussink * Update sizegen Signed-off-by: Dirkjan Bussink * icuregex: Fix invalid slice creation Parse the structure so we can create buffers with the proper size and never with infinite sizes. While this was not the immediate cause of the race error, it's better to create with the right slice size also for debugging when digging into it. The real fix here is that the size of `algorithmicRange` includes the size of the struct itself, so if we want to get the remaining slice size it needs to subtract this value. Signed-off-by: Dirkjan Bussink * icuregex: Create valid slice length for algorithmicRange We also want to create a valid slice length for the additional data, this was too long if an offset was given and would read into the next entry. Signed-off-by: Dirkjan Bussink * icuregex: Clean up more unsafe usage This reduces unsafe usage to just udata and doesn't use it anywhere outside of it. Makes it more Go idiomatic this way. Signed-off-by: Dirkjan Bussink * icuregex: Use more Go like naming and reduce exposed API Signed-off-by: Dirkjan Bussink * regex: Implement additional regular expression functions This implements the additional MySQL regular expression functions in the evalengine. The evaluator is only implementing this for now, but the compiler is up next. Signed-off-by: Dirkjan Bussink * regexp: Update generated data Signed-off-by: Dirkjan Bussink * Revert accidentally committed test Signed-off-by: Dirkjan Bussink * evalengine: Add compilation for regular expressions Also fixes a whole slew of bugs identified. Signed-off-by: Dirkjan Bussink * icuregex: Allow for setting explicit dumper Remove the usage of a global variable here. Signed-off-by: Dirkjan Bussink * evalengine: Add a whole bunch of regex tests This adds a bunch of tests and fixes the bugs exposed through them. Signed-off-by: Dirkjan Bussink * Fix license Signed-off-by: Dirkjan Bussink * More license fixes Signed-off-by: Dirkjan Bussink * evalengine: Improve handling of constant expression regexps Signed-off-by: Dirkjan Bussink --------- Signed-off-by: Vicent Marti Signed-off-by: Dirkjan Bussink Co-authored-by: Dirkjan Bussink --- go/mysql/collations/charset/convert.go | 74 + go/mysql/collations/env.go | 9 +- go/mysql/constants.go | 25 + go/mysql/icuregex/compiler.go | 3647 +++++++++++++++++ go/mysql/icuregex/compiler_table.go | 357 ++ go/mysql/icuregex/debug.go | 151 + go/mysql/icuregex/error.go | 149 + go/mysql/icuregex/errors/error.go | 27 + go/mysql/icuregex/icu_test.go | 422 ++ .../icuregex/internal/bytestrie/bytes_trie.go | 354 ++ go/mysql/icuregex/internal/icudata/README.md | 46 + go/mysql/icuregex/internal/icudata/char.brk | Bin 0 -> 13680 bytes go/mysql/icuregex/internal/icudata/embed.go | 96 + go/mysql/icuregex/internal/icudata/nfc.nrm | Bin 0 -> 35124 bytes go/mysql/icuregex/internal/icudata/nfkc.nrm | Bin 0 -> 54136 bytes .../icuregex/internal/icudata/nfkc_cf.nrm | Bin 0 -> 51472 bytes go/mysql/icuregex/internal/icudata/pnames.icu | Bin 0 -> 42682 bytes go/mysql/icuregex/internal/icudata/ubidi.icu | Bin 0 -> 26636 bytes go/mysql/icuregex/internal/icudata/ucase.icu | Bin 0 -> 28898 bytes .../icuregex/internal/icudata/ulayout.icu | Bin 0 -> 13488 bytes go/mysql/icuregex/internal/icudata/unames.icu | Bin 0 -> 283932 bytes go/mysql/icuregex/internal/icudata/uprops.icu | Bin 0 -> 135656 bytes go/mysql/icuregex/internal/icudata/word.brk | Bin 0 -> 22232 bytes .../icuregex/internal/normalizer/constants.go | 122 + .../internal/normalizer/normalizer.go | 482 +++ .../icuregex/internal/pattern/unescape.go | 314 ++ .../internal/pattern/unescape_test.go | 48 + go/mysql/icuregex/internal/pattern/utils.go | 111 + go/mysql/icuregex/internal/ubidi/ubidi.go | 461 +++ go/mysql/icuregex/internal/ucase/fold.go | 243 ++ go/mysql/icuregex/internal/ucase/ucase.go | 425 ++ go/mysql/icuregex/internal/uchar/constants.go | 240 ++ go/mysql/icuregex/internal/uchar/uchar.go | 405 ++ go/mysql/icuregex/internal/udata/udata.go | 155 + go/mysql/icuregex/internal/ulayout/ulayout.go | 128 + go/mysql/icuregex/internal/unames/unames.go | 471 +++ .../icuregex/internal/unames/unames_test.go | 64 + .../icuregex/internal/uprops/constants.go | 613 +++ .../icuregex/internal/uprops/properties.go | 472 +++ go/mysql/icuregex/internal/uprops/uprops.go | 269 ++ .../icuregex/internal/uprops/uprops_binary.go | 239 ++ .../icuregex/internal/uprops/uprops_int.go | 265 ++ go/mysql/icuregex/internal/uprops/uscript.go | 505 +++ go/mysql/icuregex/internal/uset/close.go | 96 + go/mysql/icuregex/internal/uset/frozen.go | 339 ++ go/mysql/icuregex/internal/uset/pattern.go | 107 + .../icuregex/internal/uset/unicode_set.go | 694 ++++ .../internal/uset/unicode_set_test.go | 43 + go/mysql/icuregex/internal/utf16/helpers.go | 65 + go/mysql/icuregex/internal/utrie/ucptrie.go | 708 ++++ go/mysql/icuregex/internal/utrie/utrie2.go | 440 ++ go/mysql/icuregex/matcher.go | 1655 ++++++++ go/mysql/icuregex/ops.go | 414 ++ go/mysql/icuregex/pattern.go | 149 + go/mysql/icuregex/perl_test.go | 216 + go/mysql/icuregex/sets.go | 104 + go/mysql/icuregex/sets_test.go | 66 + go/mysql/icuregex/testdata/re_tests.txt | 923 +++++ go/mysql/icuregex/testdata/regextst.txt | 2793 +++++++++++++ .../icuregex/testdata/regextst_extended.txt | 126 + go/mysql/sql_error.go | 22 + go/vt/vterrors/state.go | 25 + go/vt/vtgate/evalengine/cached_size.go | 48 + go/vt/vtgate/evalengine/compare.go | 2 +- go/vt/vtgate/evalengine/compiler_asm.go | 445 +- go/vt/vtgate/evalengine/compiler_test.go | 4 + go/vt/vtgate/evalengine/expr_collate.go | 18 +- go/vt/vtgate/evalengine/expr_compare.go | 8 +- go/vt/vtgate/evalengine/fn_regexp.go | 1062 +++++ .../evalengine/integration/fuzz_test.go | 5 + go/vt/vtgate/evalengine/mysql_test.go | 2 +- go/vt/vtgate/evalengine/testcases/cases.go | 288 ++ go/vt/vtgate/evalengine/testcases/inputs.go | 35 + go/vt/vtgate/evalengine/translate.go | 8 + go/vt/vtgate/evalengine/translate_builtin.go | 161 + .../tabletmanager/vreplication/utils.go | 20 + 76 files changed, 22460 insertions(+), 20 deletions(-) create mode 100644 go/mysql/icuregex/compiler.go create mode 100644 go/mysql/icuregex/compiler_table.go create mode 100644 go/mysql/icuregex/debug.go create mode 100644 go/mysql/icuregex/error.go create mode 100644 go/mysql/icuregex/errors/error.go create mode 100644 go/mysql/icuregex/icu_test.go create mode 100644 go/mysql/icuregex/internal/bytestrie/bytes_trie.go create mode 100644 go/mysql/icuregex/internal/icudata/README.md create mode 100644 go/mysql/icuregex/internal/icudata/char.brk create mode 100644 go/mysql/icuregex/internal/icudata/embed.go create mode 100644 go/mysql/icuregex/internal/icudata/nfc.nrm create mode 100644 go/mysql/icuregex/internal/icudata/nfkc.nrm create mode 100644 go/mysql/icuregex/internal/icudata/nfkc_cf.nrm create mode 100644 go/mysql/icuregex/internal/icudata/pnames.icu create mode 100644 go/mysql/icuregex/internal/icudata/ubidi.icu create mode 100644 go/mysql/icuregex/internal/icudata/ucase.icu create mode 100644 go/mysql/icuregex/internal/icudata/ulayout.icu create mode 100644 go/mysql/icuregex/internal/icudata/unames.icu create mode 100644 go/mysql/icuregex/internal/icudata/uprops.icu create mode 100644 go/mysql/icuregex/internal/icudata/word.brk create mode 100644 go/mysql/icuregex/internal/normalizer/constants.go create mode 100644 go/mysql/icuregex/internal/normalizer/normalizer.go create mode 100644 go/mysql/icuregex/internal/pattern/unescape.go create mode 100644 go/mysql/icuregex/internal/pattern/unescape_test.go create mode 100644 go/mysql/icuregex/internal/pattern/utils.go create mode 100644 go/mysql/icuregex/internal/ubidi/ubidi.go create mode 100644 go/mysql/icuregex/internal/ucase/fold.go create mode 100644 go/mysql/icuregex/internal/ucase/ucase.go create mode 100644 go/mysql/icuregex/internal/uchar/constants.go create mode 100644 go/mysql/icuregex/internal/uchar/uchar.go create mode 100644 go/mysql/icuregex/internal/udata/udata.go create mode 100644 go/mysql/icuregex/internal/ulayout/ulayout.go create mode 100644 go/mysql/icuregex/internal/unames/unames.go create mode 100644 go/mysql/icuregex/internal/unames/unames_test.go create mode 100644 go/mysql/icuregex/internal/uprops/constants.go create mode 100644 go/mysql/icuregex/internal/uprops/properties.go create mode 100644 go/mysql/icuregex/internal/uprops/uprops.go create mode 100644 go/mysql/icuregex/internal/uprops/uprops_binary.go create mode 100644 go/mysql/icuregex/internal/uprops/uprops_int.go create mode 100644 go/mysql/icuregex/internal/uprops/uscript.go create mode 100644 go/mysql/icuregex/internal/uset/close.go create mode 100644 go/mysql/icuregex/internal/uset/frozen.go create mode 100644 go/mysql/icuregex/internal/uset/pattern.go create mode 100644 go/mysql/icuregex/internal/uset/unicode_set.go create mode 100644 go/mysql/icuregex/internal/uset/unicode_set_test.go create mode 100644 go/mysql/icuregex/internal/utf16/helpers.go create mode 100644 go/mysql/icuregex/internal/utrie/ucptrie.go create mode 100644 go/mysql/icuregex/internal/utrie/utrie2.go create mode 100644 go/mysql/icuregex/matcher.go create mode 100644 go/mysql/icuregex/ops.go create mode 100644 go/mysql/icuregex/pattern.go create mode 100644 go/mysql/icuregex/perl_test.go create mode 100644 go/mysql/icuregex/sets.go create mode 100644 go/mysql/icuregex/sets_test.go create mode 100644 go/mysql/icuregex/testdata/re_tests.txt create mode 100644 go/mysql/icuregex/testdata/regextst.txt create mode 100644 go/mysql/icuregex/testdata/regextst_extended.txt create mode 100644 go/vt/vtgate/evalengine/fn_regexp.go diff --git a/go/mysql/collations/charset/convert.go b/go/mysql/collations/charset/convert.go index 1c0ced27e4e..6054ae33559 100644 --- a/go/mysql/collations/charset/convert.go +++ b/go/mysql/collations/charset/convert.go @@ -19,6 +19,8 @@ package charset import ( "fmt" "unicode/utf8" + + "vitess.io/vitess/go/hack" ) func failedConversionError(from, to Charset, input []byte) error { @@ -126,6 +128,78 @@ func Convert(dst []byte, dstCharset Charset, src []byte, srcCharset Charset) ([] } } +func Expand(dst []rune, src []byte, srcCharset Charset) []rune { + switch srcCharset := srcCharset.(type) { + case Charset_utf8mb3, Charset_utf8mb4: + if dst == nil { + return []rune(string(src)) + } + dst = make([]rune, 0, len(src)) + for _, cp := range string(src) { + dst = append(dst, cp) + } + return dst + case Charset_binary: + if dst == nil { + dst = make([]rune, 0, len(src)) + } + for _, c := range src { + dst = append(dst, rune(c)) + } + return dst + default: + if dst == nil { + dst = make([]rune, 0, len(src)) + } + for len(src) > 0 { + cp, width := srcCharset.DecodeRune(src) + src = src[width:] + dst = append(dst, cp) + } + return dst + } +} + +func Collapse(dst []byte, src []rune, dstCharset Charset) []byte { + switch dstCharset := dstCharset.(type) { + case Charset_utf8mb3, Charset_utf8mb4: + if dst == nil { + return hack.StringBytes(string(src)) + } + return append(dst, hack.StringBytes(string(src))...) + case Charset_binary: + if dst == nil { + dst = make([]byte, 0, len(src)) + } + for _, b := range src { + dst = append(dst, byte(b)) + } + return dst + default: + nDst := 0 + if dst == nil { + dst = make([]byte, len(src)*dstCharset.MaxWidth()) + } else { + dst = dst[:cap(dst)] + } + for _, c := range src { + if len(dst)-nDst < 4 { + newDst := make([]byte, len(dst)*2) + copy(newDst, dst[:nDst]) + dst = newDst + } + w := dstCharset.EncodeRune(dst[nDst:], c) + if w < 0 { + if w = dstCharset.EncodeRune(dst[nDst:], '?'); w < 0 { + break + } + } + nDst += w + } + return dst[:nDst] + } +} + func ConvertFromUTF8(dst []byte, dstCharset Charset, src []byte) ([]byte, error) { return Convert(dst, dstCharset, src, Charset_utf8mb4{}) } diff --git a/go/mysql/collations/env.go b/go/mysql/collations/env.go index 52a255b6f41..0c063e140d5 100644 --- a/go/mysql/collations/env.go +++ b/go/mysql/collations/env.go @@ -194,10 +194,11 @@ func makeEnv(version collver) *Environment { // A few interesting character set values. // See http://dev.mysql.com/doc/internals/en/character-set.html#packet-Protocol::CharacterSet const ( - CollationUtf8ID = 33 - CollationUtf8mb4ID = 255 - CollationBinaryID = 63 - CollationUtf8mb4BinID = 46 + CollationUtf8ID = 33 + CollationUtf8mb4ID = 255 + CollationBinaryID = 63 + CollationUtf8mb4BinID = 46 + CollationLatin1Swedish = 8 ) // Binary is the default Binary collation diff --git a/go/mysql/constants.go b/go/mysql/constants.go index b2c9b4d49a5..bedc9871426 100644 --- a/go/mysql/constants.go +++ b/go/mysql/constants.go @@ -565,6 +565,31 @@ const ( ERJSONValueTooBig = ErrorCode(3150) ERJSONDocumentTooDeep = ErrorCode(3157) + ERRegexpStringNotTerminated = ErrorCode(3684) + ERRegexpBufferOverflow = ErrorCode(3684) + ERRegexpIllegalArgument = ErrorCode(3685) + ERRegexpIndexOutOfBounds = ErrorCode(3686) + ERRegexpInternal = ErrorCode(3687) + ERRegexpRuleSyntax = ErrorCode(3688) + ERRegexpBadEscapeSequence = ErrorCode(3689) + ERRegexpUnimplemented = ErrorCode(3690) + ERRegexpMismatchParen = ErrorCode(3691) + ERRegexpBadInterval = ErrorCode(3692) + ERRRegexpMaxLtMin = ErrorCode(3693) + ERRegexpInvalidBackRef = ErrorCode(3694) + ERRegexpLookBehindLimit = ErrorCode(3695) + ERRegexpMissingCloseBracket = ErrorCode(3696) + ERRegexpInvalidRange = ErrorCode(3697) + ERRegexpStackOverflow = ErrorCode(3698) + ERRegexpTimeOut = ErrorCode(3699) + ERRegexpPatternTooBig = ErrorCode(3700) + ERRegexpInvalidCaptureGroup = ErrorCode(3887) + ERRegexpInvalidFlag = ErrorCode(3900) + + ERCharacterSetMismatch = ErrorCode(3995) + + ERWrongParametersToNativeFct = ErrorCode(1583) + // max execution time exceeded ERQueryTimeout = ErrorCode(3024) diff --git a/go/mysql/icuregex/compiler.go b/go/mysql/icuregex/compiler.go new file mode 100644 index 00000000000..eba297d0f21 --- /dev/null +++ b/go/mysql/icuregex/compiler.go @@ -0,0 +1,3647 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +import ( + "math" + "strings" + "unicode/utf8" + + "golang.org/x/exp/slices" + + "vitess.io/vitess/go/mysql/icuregex/internal/pattern" + "vitess.io/vitess/go/mysql/icuregex/internal/ucase" + "vitess.io/vitess/go/mysql/icuregex/internal/uchar" + "vitess.io/vitess/go/mysql/icuregex/internal/unames" + "vitess.io/vitess/go/mysql/icuregex/internal/uprops" + "vitess.io/vitess/go/mysql/icuregex/internal/uset" + "vitess.io/vitess/go/mysql/icuregex/internal/utf16" +) + +const BreakIteration = false +const stackSize = 100 + +type reChar struct { + char rune + quoted bool +} + +const ( + parenPlain = -1 + parenCapturing = -2 + parenAtomic = -3 + parenLookahead = -4 + parenNegLookahead = -5 + parenFlags = -6 + parenLookBehind = -7 + parenLookBehindN = -8 +) + +type setOperation uint32 + +const ( + setStart setOperation = 0<<16 | 1 + setEnd setOperation = 1<<16 | 2 + setNegation setOperation = 2<<16 | 3 + setCaseClose setOperation = 2<<16 | 9 + setDifference2 setOperation = 3<<16 | 4 // '--' set difference operator + setIntersection2 setOperation = 3<<16 | 5 // '&&' set intersection operator + setUnion setOperation = 4<<16 | 6 // implicit union of adjacent items + setDifference1 setOperation = 4<<16 | 7 // '-', single dash difference op, for compatibility with old UnicodeSet. + setIntersection1 setOperation = 4<<16 | 8 // '&', single amp intersection op, for compatibility with old UnicodeSet. +) + +type compiler struct { + err error + out *Pattern + p []rune + + scanIndex int + quoteMode bool + inBackslashQuote bool + eolComments bool + + lineNum int + charNum int + lastChar rune + peekChar rune + + c reChar + stack [stackSize]uint16 + stackPtr int + + modeFlags RegexpFlag + newModeFlags RegexpFlag + setModeFlag bool + + literalChars []rune + + parenStack []int + matchOpenParen int + matchCloseParen int + + intervalLow int + intervalUpper int + + setStack []*uset.UnicodeSet + setOpStack []setOperation + + lastSetLiteral rune + captureName *strings.Builder +} + +func newCompiler(pat *Pattern) *compiler { + return &compiler{ + out: pat, + scanIndex: 0, + eolComments: true, + lineNum: 1, + charNum: 0, + lastChar: -1, + peekChar: -1, + modeFlags: RegexpFlag(uint32(pat.flags) | 0x80000000), + matchOpenParen: -1, + matchCloseParen: -1, + lastSetLiteral: -1, + } +} + +func (c *compiler) nextCharLL() (ch rune) { + if c.peekChar != -1 { + ch, c.peekChar = c.peekChar, -1 + return + } + if len(c.p) == 0 { + return -1 + } + + ch = c.p[0] + c.p = c.p[1:] + if ch == utf8.RuneError { + return -1 + } + + if ch == chCR || ch == chNEL || ch == chLS || (ch == chLF && c.lastChar != chCR) { + c.lineNum++ + c.charNum = 0 + } else { + if ch != chLF { + c.charNum++ + } + } + c.lastChar = ch + return +} + +func (c *compiler) peekCharLL() rune { + if c.peekChar == -1 { + c.peekChar = c.nextCharLL() + } + return c.peekChar +} + +func (c *compiler) nextChar(ch *reChar) { + c.scanIndex++ + ch.char = c.nextCharLL() + ch.quoted = false + + if c.quoteMode { + ch.quoted = true + if (ch.char == chBackSlash && c.peekCharLL() == chE && ((c.modeFlags & Literal) == 0)) || + ch.char == -1 { + c.quoteMode = false // Exit quote mode, + c.nextCharLL() // discard the E + c.nextChar(ch) + return + } + } else if c.inBackslashQuote { + // The current character immediately follows a '\' + // Don't check for any further escapes, just return it as-is. + // Don't set c.fQuoted, because that would prevent the state machine from + // dispatching on the character. + c.inBackslashQuote = false + } else { + // We are not in a \Q quoted region \E of the source. + // + if (c.modeFlags & Comments) != 0 { + // + // We are in free-spacing and comments mode. + // Scan through any white space and comments, until we + // reach a significant character or the end of inut. + for { + if ch.char == -1 { + break // End of Input + } + if ch.char == chPound && c.eolComments { + // Start of a comment. Consume the rest of it, until EOF or a new line + for { + ch.char = c.nextCharLL() + if ch.char == -1 || // EOF + ch.char == chCR || + ch.char == chLF || + ch.char == chNEL || + ch.char == chLS { + break + } + } + } + // TODO: check what Java & Perl do with non-ASCII white spaces. Ticket 6061. + if !pattern.IsWhitespace(ch.char) { + break + } + ch.char = c.nextCharLL() + } + } + + // + // check for backslash escaped characters. + // + if ch.char == chBackSlash { + beforeEscape := c.p + if staticSetUnescape.ContainsRune(c.peekCharLL()) { + // + // A '\' sequence that is handled by ICU's standard unescapeAt function. + // Includes \uxxxx, \n, \r, many others. + // Return the single equivalent character. + // + c.nextCharLL() // get & discard the peeked char. + ch.quoted = true + + ch.char, c.p = pattern.UnescapeAtRunes(beforeEscape) + if ch.char < 0 { + c.error(BadEscapeSequence) + } + c.charNum += len(beforeEscape) - len(c.p) + } else if c.peekCharLL() == chDigit0 { + // Octal Escape, using Java Regexp Conventions + // which are \0 followed by 1-3 octal digits. + // Different from ICU Unescape handling of Octal, which does not + // require the leading 0. + // Java also has the convention of only consuming 2 octal digits if + // the three digit number would be > 0xff + // + ch.char = 0 + c.nextCharLL() // Consume the initial 0. + for index := 0; index < 3; index++ { + ch2 := c.peekCharLL() + if ch2 < chDigit0 || ch2 > chDigit7 { + if index == 0 { + // \0 is not followed by any octal digits. + c.error(BadEscapeSequence) + } + break + } + ch.char <<= 3 + ch.char += ch2 & 7 + if ch.char <= 255 { + c.nextCharLL() + } else { + // The last digit made the number too big. Forget we saw it. + ch.char >>= 3 + } + } + ch.quoted = true + } else if c.peekCharLL() == chQ { + // "\Q" enter quote mode, which will continue until "\E" + c.quoteMode = true + c.nextCharLL() // discard the 'Q'. + c.nextChar(ch) // recurse to get the real next char. + return + } else { + // We are in a '\' escape that will be handled by the state table scanner. + // Just return the backslash, but remember that the following char is to + // be taken literally. + c.inBackslashQuote = true + } + } + } + + // re-enable # to end-of-line comments, in case they were disabled. + // They are disabled by the parser upon seeing '(?', but this lasts for + // the fetching of the next character only. + c.eolComments = true +} + +const ( + chCR = 0x0d // New lines, for terminating comments. + chLF = 0x0a // Line Feed + chPound = 0x23 // '#', introduces a comment. + chDigit0 = 0x30 // '0' + chDigit7 = 0x37 // '9' + chColon = 0x3A // ':' + chE = 0x45 // 'E' + chQ = 0x51 // 'Q' + chN = 0x4E // 'N' + chP = 0x50 // 'P' + chBackSlash = 0x5c // '\' introduces a char escape + chLBracket = 0x5b // '[' + chRBracket = 0x5d // ']' + chUp = 0x5e // '^' + chLowerP = 0x70 + chLBrace = 0x7b // '{' + chRBrace = 0x7d // '}' + chNEL = 0x85 // NEL newline variant + chLS = 0x2028 // Unicode Line Separator + chAmp = 0x26 // '&' + chDash = 0x2d // '-' +) + +func (c *compiler) compile(pat []rune) error { + if c.err != nil { + return c.err + } + if c.out.pattern != "" { + panic("cannot reuse pattern") + } + + c.out.pattern = string(pat) + c.p = pat + + var state uint16 = 1 + var table []regexTableEl + + // UREGEX_LITERAL force entire pattern to be treated as a literal string. + if c.modeFlags&Literal != 0 { + c.quoteMode = true + } + + c.nextChar(&c.c) + + // Main loop for the regex pattern parsing state machine. + // Runs once per state transition. + // Each time through optionally performs, depending on the state table, + // - an advance to the the next pattern char + // - an action to be performed. + // - pushing or popping a state to/from the local state return stack. + // file regexcst.txt is the source for the state table. The logic behind + // recongizing the pattern syntax is there, not here. + for { + if c.err != nil { + break + } + + if state == 0 { + panic("bad state?") + } + + table = parseStateTable[state:] + for len(table) > 0 { + if table[0].charClass < 127 && !c.c.quoted && rune(table[0].charClass) == c.c.char { + break + } + if table[0].charClass == 255 { + break + } + if table[0].charClass == 254 && c.c.quoted { + break + } + if table[0].charClass == 253 && c.c.char == -1 { + break + } + if table[0].charClass >= 128 && table[0].charClass < 240 && !c.c.quoted && c.c.char != -1 { + if staticRuleSet[table[0].charClass-128].ContainsRune(c.c.char) { + break + } + } + + table = table[1:] + } + + if !c.doParseActions(table[0].action) { + break + } + + if table[0].pushState != 0 { + c.stackPtr++ + if c.stackPtr >= stackSize { + c.error(InternalError) + c.stackPtr-- + } + c.stack[c.stackPtr] = uint16(table[0].pushState) + } + + if table[0].nextChar { + c.nextChar(&c.c) + } + + if table[0].nextState != 255 { + state = uint16(table[0].nextState) + } else { + state = c.stack[c.stackPtr] + c.stackPtr-- + if c.stackPtr < 0 { + c.stackPtr++ + c.error(MismatchedParen) + } + } + } + + if c.err != nil { + return c.err + } + + c.allocateStackData(restackframeHdrCount) + c.stripNOPs() + + c.out.minMatchLen = c.minMatchLength(3, len(c.out.compiledPat)-1) + + c.matchStartType() + return c.err +} + +func (c *compiler) doParseActions(action patternParseAction) bool { + switch action { + case doPatStart: + // Start of pattern compiles to: + //0 SAVE 2 Fall back to position of FAIL + //1 jmp 3 + //2 FAIL Stop if we ever reach here. + //3 NOP Dummy, so start of pattern looks the same as + // the start of an ( grouping. + //4 NOP Resreved, will be replaced by a save if there are + // OR | operators at the top level + c.appendOp(urxStateSave, 2) + c.appendOp(urxJmp, 3) + c.appendOp(urxFail, 0) + + // Standard open nonCapture paren action emits the two NOPs and + // sets up the paren stack frame. + c.doParseActions(doOpenNonCaptureParen) + + case doPatFinish: + // We've scanned to the end of the pattern + // The end of pattern compiles to: + // URX_END + // which will stop the runtime match engine. + // Encountering end of pattern also behaves like a close paren, + // and forces fixups of the State Save at the beginning of the compiled pattern + // and of any OR operations at the top level. + // + c.handleCloseParen() + if len(c.parenStack) > 0 { + // Missing close paren in pattern. + c.error(MismatchedParen) + } + + // add the END operation to the compiled pattern. + c.appendOp(urxEnd, 0) + + // Terminate the pattern compilation state machine. + return false + + case doOrOperator: + // Scanning a '|', as in (A|B) + // Generate code for any pending literals preceding the '|' + c.fixLiterals(false) + + // Insert a SAVE operation at the start of the pattern section preceding + // this OR at this level. This SAVE will branch the match forward + // to the right hand side of the OR in the event that the left hand + // side fails to match and backtracks. Locate the position for the + // save from the location on the top of the parentheses stack. + var savePosition int + savePosition, c.parenStack = stackPop(c.parenStack) + op := c.out.compiledPat[savePosition] + + if op.typ() != urxNop { + panic("expected a NOP placeholder") + } + + op = c.buildOp(urxStateSave, len(c.out.compiledPat)+1) + c.out.compiledPat[savePosition] = op + + // Append an JMP operation into the compiled pattern. The operand for + // the JMP will eventually be the location following the ')' for the + // group. This will be patched in later, when the ')' is encountered. + c.appendOp(urxJmp, 0) + + // Push the position of the newly added JMP op onto the parentheses stack. + // This registers if for fixup when this block's close paren is encountered. + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1) + + // Append a NOP to the compiled pattern. This is the slot reserved + // for a SAVE in the event that there is yet another '|' following + // this one. + c.appendOp(urxNop, 0) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1) + + case doBeginNamedCapture: + // Scanning (? + // Compile to a + // - NOP, which later may be replaced if the parenthesized group + // has a quantifier, followed by + // - STO_SP save state stack position, so it can be restored at the ")" + // - NOP, which may later be replaced by a save-state if there + // is an '|' alternation within the parens. + c.fixLiterals(false) + c.appendOp(urxNop, 0) + varLoc := c.allocateData(1) // Reserve a data location for saving the state stack ptr. + c.appendOp(urxStoSp, varLoc) + c.appendOp(urxNop, 0) + + // On the Parentheses stack, start a new frame and add the postions + // of the two NOPs. Depending on what follows in the pattern, the + // NOPs may be changed to SAVE_STATE or JMP ops, with a target + // address of the end of the parenthesized group. + c.parenStack = append(c.parenStack, int(c.modeFlags)) + c.parenStack = append(c.parenStack, parenAtomic) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-3) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1) + + case doOpenLookAhead: + // Positive Look-ahead (?= stuff ) + // + // Note: Addition of transparent input regions, with the need to + // restore the original regions when failing out of a lookahead + // block, complicated this sequence. Some combined opcodes + // might make sense - or might not, lookahead aren't that common. + // + // Caution: min match length optimization knows about this + // sequence; don't change without making updates there too. + // + // Compiles to + // 1 LA_START dataLoc Saves SP, Input Pos, Active input region. + // 2. STATE_SAVE 4 on failure of lookahead, goto 4 + // 3 JMP 6 continue ... + // + // 4. LA_END Look Ahead failed. Restore regions. + // 5. BACKTRACK and back track again. + // + // 6. NOP reserved for use by quantifiers on the block. + // Look-ahead can't have quantifiers, but paren stack + // compile time conventions require the slot anyhow. + // 7. NOP may be replaced if there is are '|' ops in the block. + // 8. code for parenthesized stuff. + // 9. LA_END + // + // Four data slots are reserved, for saving state on entry to the look-around + // 0: stack pointer on entry. + // 1: input position on entry. + // 2: fActiveStart, the active bounds start on entry. + // 3: fActiveLimit, the active bounds limit on entry. + c.fixLiterals(false) + dataLoc := c.allocateData(4) + c.appendOp(urxLaStart, dataLoc) + c.appendOp(urxStateSave, len(c.out.compiledPat)+2) + c.appendOp(urxJmp, len(c.out.compiledPat)+3) + c.appendOp(urxLaEnd, dataLoc) + c.appendOp(urxBacktrack, 0) + c.appendOp(urxNop, 0) + c.appendOp(urxNop, 0) + + // On the Parentheses stack, start a new frame and add the postions + // of the NOPs. + c.parenStack = append(c.parenStack, int(c.modeFlags)) + c.parenStack = append(c.parenStack, parenLookahead) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1) + + case doOpenLookAheadNeg: + // Negated Lookahead. (?! stuff ) + // Compiles to + // 1. LA_START dataloc + // 2. SAVE_STATE 7 // Fail within look-ahead block restores to this state, + // // which continues with the match. + // 3. NOP // Std. Open Paren sequence, for possible '|' + // 4. code for parenthesized stuff. + // 5. LA_END // Cut back stack, remove saved state from step 2. + // 6. BACKTRACK // code in block succeeded, so neg. lookahead fails. + // 7. END_LA // Restore match region, in case look-ahead was using + // an alternate (transparent) region. + // Four data slots are reserved, for saving state on entry to the look-around + // 0: stack pointer on entry. + // 1: input position on entry. + // 2: fActiveStart, the active bounds start on entry. + // 3: fActiveLimit, the active bounds limit on entry. + c.fixLiterals(false) + dataLoc := c.allocateData(4) + c.appendOp(urxLaStart, dataLoc) + c.appendOp(urxStateSave, 0) // dest address will be patched later. + c.appendOp(urxNop, 0) + + // On the Parentheses stack, start a new frame and add the postions + // of the StateSave and NOP. + c.parenStack = append(c.parenStack, int(c.modeFlags)) + c.parenStack = append(c.parenStack, parenNegLookahead) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1) + + // Instructions #5 - #7 will be added when the ')' is encountered. + + case doOpenLookBehind: + // Compile a (?<= look-behind open paren. + // + // Compiles to + // 0 URX_LB_START dataLoc + // 1 URX_LB_CONT dataLoc + // 2 MinMatchLen + // 3 MaxMatchLen + // 4 URX_NOP Standard '(' boilerplate. + // 5 URX_NOP Reserved slot for use with '|' ops within (block). + // 6 + // 7 URX_LB_END dataLoc # Check match len, restore input len + // 8 URX_LA_END dataLoc # Restore stack, input pos + // + // Allocate a block of matcher data, to contain (when running a match) + // 0: Stack ptr on entry + // 1: Input Index on entry + // 2: fActiveStart, the active bounds start on entry. + // 3: fActiveLimit, the active bounds limit on entry. + // 4: Start index of match current match attempt. + // The first four items must match the layout of data for LA_START / LA_END + + // Generate match code for any pending literals. + c.fixLiterals(false) + + // Allocate data space + dataLoc := c.allocateData(5) + + // Emit URX_LB_START + c.appendOp(urxLbStart, dataLoc) + + // Emit URX_LB_CONT + c.appendOp(urxLbCont, dataLoc) + c.appendOp(urxReservedOp, 0) // MinMatchLength. To be filled later. + c.appendOp(urxReservedOp, 0) // MaxMatchLength. To be filled later. + + // Emit the NOPs + c.appendOp(urxNop, 0) + c.appendOp(urxNop, 0) + + // On the Parentheses stack, start a new frame and add the postions + // of the URX_LB_CONT and the NOP. + c.parenStack = append(c.parenStack, int(c.modeFlags)) + c.parenStack = append(c.parenStack, parenLookBehind) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1) + + // The final two instructions will be added when the ')' is encountered. + + case doOpenLookBehindNeg: + // Compile a (? + // 8 URX_LBN_END dataLoc # Check match len, cause a FAIL + // 9 ... + // + // Allocate a block of matcher data, to contain (when running a match) + // 0: Stack ptr on entry + // 1: Input Index on entry + // 2: fActiveStart, the active bounds start on entry. + // 3: fActiveLimit, the active bounds limit on entry. + // 4: Start index of match current match attempt. + // The first four items must match the layout of data for LA_START / LA_END + + // Generate match code for any pending literals. + c.fixLiterals(false) + + // Allocate data space + dataLoc := c.allocateData(5) + + // Emit URX_LB_START + c.appendOp(urxLbStart, dataLoc) + + // Emit URX_LBN_CONT + c.appendOp(urxLbnCount, dataLoc) + c.appendOp(urxReservedOp, 0) // MinMatchLength. To be filled later. + c.appendOp(urxReservedOp, 0) // MaxMatchLength. To be filled later. + c.appendOp(urxReservedOp, 0) // Continue Loc. To be filled later. + + // Emit the NOPs + c.appendOp(urxNop, 0) + c.appendOp(urxNop, 0) + + // On the Parentheses stack, start a new frame and add the postions + // of the URX_LB_CONT and the NOP. + c.parenStack = append(c.parenStack, int(c.modeFlags)) + c.parenStack = append(c.parenStack, parenLookBehindN) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1) + + // The final two instructions will be added when the ')' is encountered. + + case doConditionalExpr, doPerlInline: + // Conditionals such as (?(1)a:b) + // Perl inline-condtionals. (?{perl code}a|b) We're not perl, no way to do them. + c.error(Unimplemented) + + case doCloseParen: + c.handleCloseParen() + if len(c.parenStack) == 0 { + // Extra close paren, or missing open paren. + c.error(MismatchedParen) + } + + case doNOP: + + case doBadOpenParenType, doRuleError: + c.error(RuleSyntax) + + case doMismatchedParenErr: + c.error(MismatchedParen) + + case doPlus: + // Normal '+' compiles to + // 1. stuff to be repeated (already built) + // 2. jmp-sav 1 + // 3. ... + // + // Or, if the item to be repeated can match a zero length string, + // 1. STO_INP_LOC data-loc + // 2. body of stuff to be repeated + // 3. JMP_SAV_X 2 + // 4. ... + + // + // Or, if the item to be repeated is simple + // 1. Item to be repeated. + // 2. LOOP_SR_I set number (assuming repeated item is a set ref) + // 3. LOOP_C stack location + topLoc := c.blockTopLoc(false) // location of item #1 + + // Check for simple constructs, which may get special optimized code. + if topLoc == len(c.out.compiledPat)-1 { + repeatedOp := c.out.compiledPat[topLoc] + + if repeatedOp.typ() == urxSetref { + // Emit optimized code for [char set]+ + c.appendOp(urxLoopSrI, repeatedOp.value()) + frameLoc := c.allocateStackData(1) + c.appendOp(urxLoopC, frameLoc) + break + } + + if repeatedOp.typ() == urxDotany || repeatedOp.typ() == urxDotanyAll || repeatedOp.typ() == urxDotanyUnix { + // Emit Optimized code for .+ operations. + loopOpI := c.buildOp(urxLoopDotI, 0) + if repeatedOp.typ() == urxDotanyAll { + // URX_LOOP_DOT_I operand is a flag indicating ". matches any" mode. + loopOpI |= 1 + } + if c.modeFlags&UnixLines != 0 { + loopOpI |= 2 + } + c.appendIns(loopOpI) + frameLoc := c.allocateStackData(1) + c.appendOp(urxLoopC, frameLoc) + break + } + } + + // General case. + + // Check for minimum match length of zero, which requires + // extra loop-breaking code. + if c.minMatchLength(topLoc, len(c.out.compiledPat)-1) == 0 { + // Zero length match is possible. + // Emit the code sequence that can handle it. + c.insertOp(topLoc) + frameLoc := c.allocateStackData(1) + op := c.buildOp(urxStoInpLoc, frameLoc) + c.out.compiledPat[topLoc] = op + + c.appendOp(urxJmpSavX, topLoc+1) + } else { + // Simpler code when the repeated body must match something non-empty + c.appendOp(urxJmpSav, topLoc) + } + + case doNGPlus: + // Non-greedy '+?' compiles to + // 1. stuff to be repeated (already built) + // 2. state-save 1 + // 3. ... + topLoc := c.blockTopLoc(false) + c.appendOp(urxStateSave, topLoc) + + case doOpt: + // Normal (greedy) ? quantifier. + // Compiles to + // 1. state save 3 + // 2. body of optional block + // 3. ... + // Insert the state save into the compiled pattern, and we're done. + saveStateLoc := c.blockTopLoc(true) + saveStateOp := c.buildOp(urxStateSave, len(c.out.compiledPat)) + c.out.compiledPat[saveStateLoc] = saveStateOp + + case doNGOpt: + // Non-greedy ?? quantifier + // compiles to + // 1. jmp 4 + // 2. body of optional block + // 3 jmp 5 + // 4. state save 2 + // 5 ... + // This code is less than ideal, with two jmps instead of one, because we can only + // insert one instruction at the top of the block being iterated. + jmp1Loc := c.blockTopLoc(true) + jmp2Loc := len(c.out.compiledPat) + + jmp1Op := c.buildOp(urxJmp, jmp2Loc+1) + c.out.compiledPat[jmp1Loc] = jmp1Op + + c.appendOp(urxJmp, jmp2Loc+2) + c.appendOp(urxStateSave, jmp1Loc+1) + + case doStar: + // Normal (greedy) * quantifier. + // Compiles to + // 1. STATE_SAVE 4 + // 2. body of stuff being iterated over + // 3. JMP_SAV 2 + // 4. ... + // + // Or, if the body is a simple [Set], + // 1. LOOP_SR_I set number + // 2. LOOP_C stack location + // ... + // + // Or if this is a .* + // 1. LOOP_DOT_I (. matches all mode flag) + // 2. LOOP_C stack location + // + // Or, if the body can match a zero-length string, to inhibit infinite loops, + // 1. STATE_SAVE 5 + // 2. STO_INP_LOC data-loc + // 3. body of stuff + // 4. JMP_SAV_X 2 + // 5. ... + // location of item #1, the STATE_SAVE + topLoc := c.blockTopLoc(false) + + // Check for simple *, where the construct being repeated + // compiled to single opcode, and might be optimizable. + if topLoc == len(c.out.compiledPat)-1 { + repeatedOp := c.out.compiledPat[topLoc] + + if repeatedOp.typ() == urxSetref { + // Emit optimized code for a [char set]* + loopOpI := c.buildOp(urxLoopSrI, repeatedOp.value()) + c.out.compiledPat[topLoc] = loopOpI + dataLoc := c.allocateStackData(1) + c.appendOp(urxLoopC, dataLoc) + break + } + + if repeatedOp.typ() == urxDotany || repeatedOp.typ() == urxDotanyAll || repeatedOp.typ() == urxDotanyUnix { + // Emit Optimized code for .* operations. + loopOpI := c.buildOp(urxLoopDotI, 0) + if repeatedOp.typ() == urxDotanyAll { + // URX_LOOP_DOT_I operand is a flag indicating . matches any mode. + loopOpI |= 1 + } + if (c.modeFlags & UnixLines) != 0 { + loopOpI |= 2 + } + c.out.compiledPat[topLoc] = loopOpI + dataLoc := c.allocateStackData(1) + c.appendOp(urxLoopC, dataLoc) + break + } + } + + // Emit general case code for this * + // The optimizations did not apply. + + saveStateLoc := c.blockTopLoc(true) + jmpOp := c.buildOp(urxJmpSav, saveStateLoc+1) + + // Check for minimum match length of zero, which requires + // extra loop-breaking code. + if c.minMatchLength(saveStateLoc, len(c.out.compiledPat)-1) == 0 { + c.insertOp(saveStateLoc) + dataLoc := c.allocateStackData(1) + + op := c.buildOp(urxStoInpLoc, dataLoc) + c.out.compiledPat[saveStateLoc+1] = op + jmpOp = c.buildOp(urxJmpSavX, saveStateLoc+2) + } + + // Locate the position in the compiled pattern where the match will continue + // after completing the *. (4 or 5 in the comment above) + continueLoc := len(c.out.compiledPat) + 1 + + // Put together the save state op and store it into the compiled code. + saveStateOp := c.buildOp(urxStateSave, continueLoc) + c.out.compiledPat[saveStateLoc] = saveStateOp + + // Append the URX_JMP_SAV or URX_JMPX operation to the compiled pattern. + c.appendIns(jmpOp) + + case doNGStar: + // Non-greedy *? quantifier + // compiles to + // 1. JMP 3 + // 2. body of stuff being iterated over + // 3. STATE_SAVE 2 + // 4 ... + jmpLoc := c.blockTopLoc(true) // loc 1. + saveLoc := len(c.out.compiledPat) // loc 3. + jmpOp := c.buildOp(urxJmp, saveLoc) + c.out.compiledPat[jmpLoc] = jmpOp + c.appendOp(urxStateSave, jmpLoc+1) + + case doIntervalInit: + // The '{' opening an interval quantifier was just scanned. + // Init the counter varaiables that will accumulate the values as the digits + // are scanned. + c.intervalLow = 0 + c.intervalUpper = -1 + + case doIntevalLowerDigit: + // Scanned a digit from the lower value of an {lower,upper} interval + digitValue := uCharDigitValue(c.c.char) + val := int64(c.intervalLow)*10 + digitValue + if val > math.MaxInt32 { + c.error(NumberTooBig) + } else { + c.intervalLow = int(val) + } + + case doIntervalUpperDigit: + // Scanned a digit from the upper value of an {lower,upper} interval + if c.intervalUpper < 0 { + c.intervalUpper = 0 + } + digitValue := uCharDigitValue(c.c.char) + val := int64(c.intervalUpper)*10 + digitValue + if val > math.MaxInt32 { + c.error(NumberTooBig) + } else { + c.intervalUpper = int(val) + } + + case doIntervalSame: + // Scanned a single value interval like {27}. Upper = Lower. + c.intervalUpper = c.intervalLow + + case doInterval: + // Finished scanning a normal {lower,upper} interval. Generate the code for it. + if !c.compileInlineInterval() { + c.compileInterval(urxCtrInit, utxCtrLoop) + } + + case doPossessiveInterval: + // Finished scanning a Possessive {lower,upper}+ interval. Generate the code for it. + + // Remember the loc for the top of the block being looped over. + // (Can not reserve a slot in the compiled pattern at this time, because + // compileInterval needs to reserve also, and blockTopLoc can only reserve + // once per block.) + topLoc := c.blockTopLoc(false) + + // Produce normal looping code. + c.compileInterval(urxCtrInit, utxCtrLoop) + + // Surround the just-emitted normal looping code with a STO_SP ... LD_SP + // just as if the loop was inclosed in atomic parentheses. + + // First the STO_SP before the start of the loop + c.insertOp(topLoc) + + varLoc := c.allocateData(1) // Reserve a data location for saving the + op := c.buildOp(urxStoSp, varLoc) + c.out.compiledPat[topLoc] = op + + var loopOp instruction + loopOp, c.out.compiledPat = stackPop(c.out.compiledPat) + if loopOp.typ() != utxCtrLoop || loopOp.value() != topLoc { + panic("bad instruction at the end of compiled pattern") + } + + loopOp++ // point LoopOp after the just-inserted STO_SP + c.appendIns(loopOp) + + // Then the LD_SP after the end of the loop + c.appendOp(urxLdSp, varLoc) + + case doNGInterval: + // Finished scanning a non-greedy {lower,upper}? interval. Generate the code for it. + c.compileInterval(urxCtrInitNg, urxCtrLoopNg) + + case doIntervalError: + c.error(BadInterval) + + case doLiteralChar: + // We've just scanned a "normal" character from the pattern, + c.literalChar(c.c.char) + + case doEscapedLiteralChar: + // We've just scanned an backslashed escaped character with no + // special meaning. It represents itself. + if (c.modeFlags&ErrorOnUnknownEscapes) != 0 && ((c.c.char >= 0x41 && c.c.char <= 0x5A) || /* in [A-Z] */ (c.c.char >= 0x61 && c.c.char <= 0x7a)) { // in [a-z] + c.error(BadEscapeSequence) + } + c.literalChar(c.c.char) + + case doDotAny: + // scanned a ".", match any single character. + c.fixLiterals(false) + if (c.modeFlags & DotAll) != 0 { + c.appendOp(urxDotanyAll, 0) + } else if (c.modeFlags & UnixLines) != 0 { + c.appendOp(urxDotanyUnix, 0) + } else { + c.appendOp(urxDotany, 0) + } + + case doCaret: + c.fixLiterals(false) + if (c.modeFlags&Multiline) == 0 && (c.modeFlags&UnixLines) == 0 { + c.appendOp(urxCaret, 0) + } else if (c.modeFlags&Multiline) != 0 && (c.modeFlags&UnixLines) == 0 { + c.appendOp(urxCaretM, 0) + } else if (c.modeFlags&Multiline) == 0 && (c.modeFlags&UnixLines) != 0 { + c.appendOp(urxCaret, 0) // Only testing true start of input. + } else if (c.modeFlags&Multiline) != 0 && (c.modeFlags&UnixLines) != 0 { + c.appendOp(urxCaretMUnix, 0) + } + + case doDollar: + c.fixLiterals(false) + if (c.modeFlags&Multiline) == 0 && (c.modeFlags&UnixLines) == 0 { + c.appendOp(urxDollar, 0) + } else if (c.modeFlags&Multiline) != 0 && (c.modeFlags&UnixLines) == 0 { + c.appendOp(urxDollarM, 0) + } else if (c.modeFlags&Multiline) == 0 && (c.modeFlags&UnixLines) != 0 { + c.appendOp(urxDollarD, 0) + } else if (c.modeFlags&Multiline) != 0 && (c.modeFlags&UnixLines) != 0 { + c.appendOp(urxDollarMd, 0) + } + + case doBackslashA: + c.fixLiterals(false) + c.appendOp(urxCaret, 0) + + case doBackslashB: + if !BreakIteration { + if (c.modeFlags & UWord) != 0 { + c.error(Unimplemented) + } + } + c.fixLiterals(false) + if c.modeFlags&UWord != 0 { + c.appendOp(urxBackslashBu, 1) + } else { + c.appendOp(urxBackslashB, 1) + } + + case doBackslashb: + if !BreakIteration { + if (c.modeFlags & UWord) != 0 { + c.error(Unimplemented) + } + } + c.fixLiterals(false) + if c.modeFlags&UWord != 0 { + c.appendOp(urxBackslashBu, 0) + } else { + c.appendOp(urxBackslashB, 0) + } + + case doBackslashD: + c.fixLiterals(false) + c.appendOp(urxBackslashD, 1) + + case doBackslashd: + c.fixLiterals(false) + c.appendOp(urxBackslashD, 0) + + case doBackslashG: + c.fixLiterals(false) + c.appendOp(urxBackslashG, 0) + + case doBackslashH: + c.fixLiterals(false) + c.appendOp(urxBackslashH, 1) + + case doBackslashh: + c.fixLiterals(false) + c.appendOp(urxBackslashH, 0) + + case doBackslashR: + c.fixLiterals(false) + c.appendOp(urxBackslashR, 0) + + case doBackslashS: + c.fixLiterals(false) + c.appendOp(urxStatSetrefN, urxIsspaceSet) + + case doBackslashs: + c.fixLiterals(false) + c.appendOp(urxStaticSetref, urxIsspaceSet) + + case doBackslashV: + c.fixLiterals(false) + c.appendOp(urxBackslashV, 1) + + case doBackslashv: + c.fixLiterals(false) + c.appendOp(urxBackslashV, 0) + + case doBackslashW: + c.fixLiterals(false) + c.appendOp(urxStatSetrefN, urxIswordSet) + + case doBackslashw: + c.fixLiterals(false) + c.appendOp(urxStaticSetref, urxIswordSet) + + case doBackslashX: + if !BreakIteration { + // Grapheme Cluster Boundary requires ICU break iteration. + c.error(Unimplemented) + } + c.fixLiterals(false) + c.appendOp(urxBackslashX, 0) + + case doBackslashZ: + c.fixLiterals(false) + c.appendOp(urxDollar, 0) + + case doBackslashz: + c.fixLiterals(false) + c.appendOp(urxBackslashZ, 0) + + case doEscapeError: + c.error(BadEscapeSequence) + + case doExit: + c.fixLiterals(false) + return false + + case doProperty: + c.fixLiterals(false) + theSet := c.scanProp() + c.compileSet(theSet) + + case doNamedChar: + ch := c.scanNamedChar() + c.literalChar(ch) + + case doBackRef: + // BackReference. Somewhat unusual in that the front-end can not completely parse + // the regular expression, because the number of digits to be consumed + // depends on the number of capture groups that have been defined. So + // we have to do it here instead. + numCaptureGroups := len(c.out.groupMap) + groupNum := int64(0) + ch := c.c.char + + for { + // Loop once per digit, for max allowed number of digits in a back reference. + digit := uCharDigitValue(ch) + groupNum = groupNum*10 + digit + if groupNum >= int64(numCaptureGroups) { + break + } + ch = c.peekCharLL() + if !staticRuleSet[ruleSetDigitChar-128].ContainsRune(ch) { + break + } + c.nextCharLL() + } + + // Scan of the back reference in the source regexp is complete. Now generate + // the compiled code for it. + // Because capture groups can be forward-referenced by back-references, + // we fill the operand with the capture group number. At the end + // of compilation, it will be changed to the variable's location. + if groupNum == 0 { + panic("\\0 begins an octal escape sequence, and shouldn't enter this code path at all") + } + c.fixLiterals(false) + if (c.modeFlags & CaseInsensitive) != 0 { + c.appendOp(urxBackrefI, int(groupNum)) + } else { + c.appendOp(urxBackref, int(groupNum)) + } + + case doBeginNamedBackRef: + if c.captureName != nil { + panic("should not replace capture name") + } + c.captureName = &strings.Builder{} + + case doContinueNamedBackRef: + c.captureName.WriteRune(c.c.char) + + case doCompleteNamedBackRef: + { + groupNumber := c.out.namedCaptureMap[c.captureName.String()] + if groupNumber == 0 { + // Group name has not been defined. + // Could be a forward reference. If we choose to support them at some + // future time, extra mechanism will be required at this point. + c.error(InvalidCaptureGroupName) + } else { + // Given the number, handle identically to a \n numbered back reference. + // See comments above, under doBackRef + c.fixLiterals(false) + if (c.modeFlags & CaseInsensitive) != 0 { + c.appendOp(urxBackrefI, groupNumber) + } else { + c.appendOp(urxBackref, groupNumber) + } + } + c.captureName = nil + } + + case doPossessivePlus: + // Possessive ++ quantifier. + // Compiles to + // 1. STO_SP + // 2. body of stuff being iterated over + // 3. STATE_SAVE 5 + // 4. JMP 2 + // 5. LD_SP + // 6. ... + // + // Note: TODO: This is pretty inefficient. A mass of saved state is built up + // then unconditionally discarded. Perhaps introduce a new opcode. Ticket 6056 + // + // Emit the STO_SP + topLoc := c.blockTopLoc(true) + stoLoc := c.allocateData(1) // Reserve the data location for storing save stack ptr. + op := c.buildOp(urxStoSp, stoLoc) + c.out.compiledPat[topLoc] = op + + // Emit the STATE_SAVE + c.appendOp(urxStateSave, len(c.out.compiledPat)+2) + + // Emit the JMP + c.appendOp(urxJmp, topLoc+1) + + // Emit the LD_SP + c.appendOp(urxLdSp, stoLoc) + + case doPossessiveStar: + // Possessive *+ quantifier. + // Compiles to + // 1. STO_SP loc + // 2. STATE_SAVE 5 + // 3. body of stuff being iterated over + // 4. JMP 2 + // 5. LD_SP loc + // 6 ... + // TODO: do something to cut back the state stack each time through the loop. + // Reserve two slots at the top of the block. + topLoc := c.blockTopLoc(true) + c.insertOp(topLoc) + + // emit STO_SP loc + stoLoc := c.allocateData(1) // Reserve the data location for storing save stack ptr. + op := c.buildOp(urxStoSp, stoLoc) + c.out.compiledPat[topLoc] = op + + // Emit the SAVE_STATE 5 + L7 := len(c.out.compiledPat) + 1 + op = c.buildOp(urxStateSave, L7) + c.out.compiledPat[topLoc+1] = op + + // Append the JMP operation. + c.appendOp(urxJmp, topLoc+1) + + // Emit the LD_SP loc + c.appendOp(urxLdSp, stoLoc) + + case doPossessiveOpt: + // Possessive ?+ quantifier. + // Compiles to + // 1. STO_SP loc + // 2. SAVE_STATE 5 + // 3. body of optional block + // 4. LD_SP loc + // 5. ... + // + // Reserve two slots at the top of the block. + topLoc := c.blockTopLoc(true) + c.insertOp(topLoc) + + // Emit the STO_SP + stoLoc := c.allocateData(1) // Reserve the data location for storing save stack ptr. + op := c.buildOp(urxStoSp, stoLoc) + c.out.compiledPat[topLoc] = op + + // Emit the SAVE_STATE + continueLoc := len(c.out.compiledPat) + 1 + op = c.buildOp(urxStateSave, continueLoc) + c.out.compiledPat[topLoc+1] = op + + // Emit the LD_SP + c.appendOp(urxLdSp, stoLoc) + + case doBeginMatchMode: + c.newModeFlags = c.modeFlags + c.setModeFlag = true + case doMatchMode: // (?i) and similar + var bit RegexpFlag + switch c.c.char { + case 0x69: /* 'i' */ + bit = CaseInsensitive + case 0x64: /* 'd' */ + bit = UnixLines + case 0x6d: /* 'm' */ + bit = Multiline + case 0x73: /* 's' */ + bit = DotAll + case 0x75: /* 'u' */ + bit = 0 /* Unicode casing */ + case 0x77: /* 'w' */ + bit = UWord + case 0x78: /* 'x' */ + bit = Comments + case 0x2d: /* '-' */ + c.setModeFlag = false + default: + // Should never happen. Other chars are filtered out by the scanner. + panic("unreachable") + } + if c.setModeFlag { + c.newModeFlags |= bit + } else { + c.newModeFlags &= ^bit + } + + case doSetMatchMode: + // Emit code to match any pending literals, using the not-yet changed match mode. + c.fixLiterals(false) + + // We've got a (?i) or similar. The match mode is being changed, but + // the change is not scoped to a parenthesized block. + if c.newModeFlags >= 0 { + panic("cNewModeFlags not properly initialized") + } + c.modeFlags = c.newModeFlags + + case doMatchModeParen: + // We've got a (?i: or similar. Begin a parenthesized block, save old + // mode flags so they can be restored at the close of the block. + // + // Compile to a + // - NOP, which later may be replaced by a save-state if the + // parenthesized group gets a * quantifier, followed by + // - NOP, which may later be replaced by a save-state if there + // is an '|' alternation within the parens. + c.fixLiterals(false) + c.appendOp(urxNop, 0) + c.appendOp(urxNop, 0) + + // On the Parentheses stack, start a new frame and add the postions + // of the two NOPs (a normal non-capturing () frame, except for the + // saving of the orignal mode flags.) + c.parenStack = append(c.parenStack, int(c.modeFlags)) + c.parenStack = append(c.parenStack, parenFlags) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-2) + c.parenStack = append(c.parenStack, len(c.out.compiledPat)-1) + + // Set the current mode flags to the new values. + if c.newModeFlags >= 0 { + panic("cNewModeFlags not properly initialized") + } + c.modeFlags = c.newModeFlags + + case doBadModeFlag: + c.error(InvalidFlag) + + case doSuppressComments: + // We have just scanned a '(?'. We now need to prevent the character scanner from + // treating a '#' as a to-the-end-of-line comment. + // (This Perl compatibility just gets uglier and uglier to do...) + c.eolComments = false + + case doSetAddAmp: + set := c.setStack[len(c.setStack)-1] + set.AddRune(chAmp) + + case doSetAddDash: + set := c.setStack[len(c.setStack)-1] + set.AddRune(chDash) + + case doSetBackslashs: + set := c.setStack[len(c.setStack)-1] + set.AddAll(staticPropertySets[urxIsspaceSet]) + + case doSetBackslashS: + sset := uset.New() + sset.AddAll(staticPropertySets[urxIsspaceSet]) // TODO: add latin1 spaces + sset.Complement() + + set := c.setStack[len(c.setStack)-1] + set.AddAll(sset) + + case doSetBackslashd: + set := c.setStack[len(c.setStack)-1] + c.err = uprops.AddCategory(set, uchar.GcNdMask) + + case doSetBackslashD: + digits := uset.New() + c.err = uprops.ApplyIntPropertyValue(digits, uprops.UCharGeneralCategoryMask, int32(uchar.GcNdMask)) + digits.Complement() + set := c.setStack[len(c.setStack)-1] + set.AddAll(digits) + + case doSetBackslashh: + h := uset.New() + c.err = uprops.ApplyIntPropertyValue(h, uprops.UCharGeneralCategoryMask, int32(uchar.GcZsMask)) + h.AddRune(9) // Tab + + set := c.setStack[len(c.setStack)-1] + set.AddAll(h) + + case doSetBackslashH: + h := uset.New() + c.err = uprops.ApplyIntPropertyValue(h, uprops.UCharGeneralCategoryMask, int32(uchar.GcZsMask)) + h.AddRune(9) // Tab + h.Complement() + + set := c.setStack[len(c.setStack)-1] + set.AddAll(h) + + case doSetBackslashv: + set := c.setStack[len(c.setStack)-1] + set.AddRuneRange(0x0a, 0x0d) // add range + set.AddRune(0x85) + set.AddRuneRange(0x2028, 0x2029) + + case doSetBackslashV: + v := uset.New() + v.AddRuneRange(0x0a, 0x0d) // add range + v.AddRune(0x85) + v.AddRuneRange(0x2028, 0x2029) + v.Complement() + + set := c.setStack[len(c.setStack)-1] + set.AddAll(v) + + case doSetBackslashw: + set := c.setStack[len(c.setStack)-1] + set.AddAll(staticPropertySets[urxIswordSet]) + + case doSetBackslashW: + sset := uset.New() + sset.AddAll(staticPropertySets[urxIswordSet]) + sset.Complement() + + set := c.setStack[len(c.setStack)-1] + set.AddAll(sset) + + case doSetBegin: + c.fixLiterals(false) + c.setStack = append(c.setStack, uset.New()) + c.setOpStack = append(c.setOpStack, setStart) + if (c.modeFlags & CaseInsensitive) != 0 { + c.setOpStack = append(c.setOpStack, setCaseClose) + } + + case doSetBeginDifference1: + // We have scanned something like [[abc]-[ + // Set up a new UnicodeSet for the set beginning with the just-scanned '[' + // Push a Difference operator, which will cause the new set to be subtracted from what + // went before once it is created. + c.setPushOp(setDifference1) + c.setOpStack = append(c.setOpStack, setStart) + if (c.modeFlags & CaseInsensitive) != 0 { + c.setOpStack = append(c.setOpStack, setCaseClose) + } + + case doSetBeginIntersection1: + // We have scanned something like [[abc]&[ + // Need both the '&' operator and the open '[' operator. + c.setPushOp(setIntersection1) + c.setOpStack = append(c.setOpStack, setStart) + if (c.modeFlags & CaseInsensitive) != 0 { + c.setOpStack = append(c.setOpStack, setCaseClose) + } + + case doSetBeginUnion: + // We have scanned something like [[abc][ + // Need to handle the union operation explicitly [[abc] | [ + c.setPushOp(setUnion) + c.setOpStack = append(c.setOpStack, setStart) + if (c.modeFlags & CaseInsensitive) != 0 { + c.setOpStack = append(c.setOpStack, setCaseClose) + } + + case doSetDifference2: + // We have scanned something like [abc-- + // Consider this to unambiguously be a set difference operator. + c.setPushOp(setDifference2) + + case doSetEnd: + // Have encountered the ']' that closes a set. + // Force the evaluation of any pending operations within this set, + // leave the completed set on the top of the set stack. + c.setEval(setEnd) + var start setOperation + start, c.setOpStack = stackPop(c.setOpStack) + if start != setStart { + panic("bad set operation in stack") + } + + case doSetFinish: + // Finished a complete set expression, including all nested sets. + // The close bracket has already triggered clearing out pending set operators, + // the operator stack should be empty and the operand stack should have just + // one entry, the result set. + if len(c.setOpStack) > 0 { + panic("expected setOpStack to be empty") + } + var set *uset.UnicodeSet + set, c.setStack = stackPop(c.setStack) + c.compileSet(set) + + case doSetIntersection2: + // Have scanned something like [abc&& + c.setPushOp(setIntersection2) + + case doSetLiteral: + // Union the just-scanned literal character into the set being built. + // This operation is the highest precedence set operation, so we can always do + // it immediately, without waiting to see what follows. It is necessary to perform + // any pending '-' or '&' operation first, because these have the same precedence + // as union-ing in a literal' + c.setEval(setUnion) + set := c.setStack[len(c.setStack)-1] + set.AddRune(c.c.char) + c.lastSetLiteral = c.c.char + + case doSetLiteralEscaped: + // A back-slash escaped literal character was encountered. + // Processing is the same as with setLiteral, above, with the addition of + // the optional check for errors on escaped ASCII letters. + if (c.modeFlags&ErrorOnUnknownEscapes) != 0 && + ((c.c.char >= 0x41 && c.c.char <= 0x5A) || // in [A-Z] + (c.c.char >= 0x61 && c.c.char <= 0x7a)) { // in [a-z] + c.error(BadEscapeSequence) + } + c.setEval(setUnion) + set := c.setStack[len(c.setStack)-1] + set.AddRune(c.c.char) + c.lastSetLiteral = c.c.char + + case doSetNamedChar: + // Scanning a \N{UNICODE CHARACTER NAME} + // Aside from the source of the character, the processing is identical to doSetLiteral, + // above. + ch := c.scanNamedChar() + c.setEval(setUnion) + set := c.setStack[len(c.setStack)-1] + set.AddRune(ch) + c.lastSetLiteral = ch + + case doSetNamedRange: + // We have scanned literal-\N{CHAR NAME}. Add the range to the set. + // The left character is already in the set, and is saved in fLastSetLiteral. + // The right side needs to be picked up, the scan is at the 'N'. + // Lower Limit > Upper limit being an error matches both Java + // and ICU UnicodeSet behavior. + ch := c.scanNamedChar() + if c.err == nil && (c.lastSetLiteral == -1 || c.lastSetLiteral > ch) { + c.error(InvalidRange) + } + set := c.setStack[len(c.setStack)-1] + set.AddRuneRange(c.lastSetLiteral, ch) + c.lastSetLiteral = ch + + case doSetNegate: + // Scanned a '^' at the start of a set. + // Push the negation operator onto the set op stack. + // A twist for case-insensitive matching: + // the case closure operation must happen _before_ negation. + // But the case closure operation will already be on the stack if it's required. + // This requires checking for case closure, and swapping the stack order + // if it is present. + tosOp := c.setOpStack[len(c.setOpStack)-1] + if tosOp == setCaseClose { + _, c.setOpStack = stackPop(c.setOpStack) + c.setOpStack = append(c.setOpStack, setNegation) + c.setOpStack = append(c.setOpStack, setCaseClose) + } else { + c.setOpStack = append(c.setOpStack, setNegation) + } + + case doSetNoCloseError: + c.error(MissingCloseBracket) + + case doSetOpError: + c.error(RuleSyntax) // -- or && at the end of a set. Illegal. + + case doSetPosixProp: + if set := c.scanPosixProp(); set != nil { + c.setStack[len(c.setStack)-1].AddAll(set) + } + + case doSetProp: + // Scanned a \p \P within [brackets]. + if set := c.scanProp(); set != nil { + c.setStack[len(c.setStack)-1].AddAll(set) + } + + case doSetRange: + // We have scanned literal-literal. Add the range to the set. + // The left character is already in the set, and is saved in fLastSetLiteral. + // The right side is the current character. + // Lower Limit > Upper limit being an error matches both Java + // and ICU UnicodeSet behavior. + + if c.lastSetLiteral == -1 || c.lastSetLiteral > c.c.char { + c.error(InvalidRange) + } + c.setStack[len(c.setStack)-1].AddRuneRange(c.lastSetLiteral, c.c.char) + + default: + panic("unexpected OP in parser") + } + + return c.err == nil +} + +func uCharDigitValue(char rune) int64 { + if char >= '0' && char <= '9' { + return int64(char - '0') + } + return -1 +} + +func stackPop[T any](stack []T) (T, []T) { + var out T + if len(stack) > 0 { + out = stack[len(stack)-1] + stack = stack[:len(stack)-1] + } + return out, stack +} + +func (c *compiler) error(e CompileErrorCode) { + c.err = &CompileError{ + Code: e, + Line: c.lineNum, + Offset: c.charNum, + Context: c.out.pattern, + } +} + +func (c *compiler) stripNOPs() { + if c.err != nil { + return + } + + end := len(c.out.compiledPat) + deltas := make([]int, 0, end) + + // Make a first pass over the code, computing the amount that things + // will be offset at each location in the original code. + var loc, d int + for loc = 0; loc < end; loc++ { + deltas = append(deltas, d) + op := c.out.compiledPat[loc] + if op.typ() == urxNop { + d++ + } + } + + // Make a second pass over the code, removing the NOPs by moving following + // code up, and patching operands that refer to code locations that + // are being moved. The array of offsets from the first step is used + // to compute the new operand values. + var src, dst int + for src = 0; src < end; src++ { + op := c.out.compiledPat[src] + opType := op.typ() + + switch opType { + case urxNop: + // skip + + case urxStateSave, + urxJmp, + utxCtrLoop, + urxCtrLoopNg, + urxRelocOprnd, + urxJmpx, + urxJmpSav, + urxJmpSavX: + // These are instructions with operands that refer to code locations. + operandAddress := op.value() + fixedOperandAddress := operandAddress - deltas[operandAddress] + op = c.buildOp(opType, fixedOperandAddress) + c.out.compiledPat[dst] = op + dst++ + + case urxBackref, urxBackrefI: + where := op.value() + if where > len(c.out.groupMap) { + c.error(InvalidBackRef) + break + } + + where = int(c.out.groupMap[where-1]) + op = c.buildOp(opType, where) + c.out.compiledPat[dst] = op + dst++ + c.out.needsAltInput = true + + case urxReservedOp, + urxReservedOpN, + urxBacktrack, + urxEnd, + urxOnechar, + urxString, + urxStringLen, + urxStartCapture, + urxEndCapture, + urxStaticSetref, + urxStatSetrefN, + urxSetref, + urxDotany, + urxFail, + urxBackslashB, + urxBackslashBu, + urxBackslashG, + urxBackslashX, + urxBackslashZ, + urxDotanyAll, + urxBackslashD, + urxCaret, + urxDollar, + urxCtrInit, + urxCtrInitNg, + urxDotanyUnix, + urxStoSp, + urxLdSp, + urxStoInpLoc, + urxLaStart, + urxLaEnd, + urcOnecharI, + urxStringI, + urxDollarM, + urxCaretM, + urxCaretMUnix, + urxLbStart, + urxLbCont, + urxLbEnd, + urxLbnCount, + urxLbnEnd, + urxLoopSrI, + urxLoopDotI, + urxLoopC, + urxDollarD, + urxDollarMd, + urxBackslashH, + urxBackslashR, + urxBackslashV: + // These instructions are unaltered by the relocation. + c.out.compiledPat[dst] = op + dst++ + + default: + // Some op is unaccounted for. + panic("unreachable") + } + } + + c.out.compiledPat = c.out.compiledPat[:dst] +} + +func (c *compiler) matchStartType() { + var loc int // Location in the pattern of the current op being processed. + var currentLen int32 // Minimum length of a match to this point (loc) in the pattern + var numInitialStrings int // Number of strings encountered that could match at start. + var atStart = true // True if no part of the pattern yet encountered + // could have advanced the position in a match. + // (Maximum match length so far == 0) + + // forwardedLength is a vector holding minimum-match-length values that + // are propagated forward in the pattern by JMP or STATE_SAVE operations. + // It must be one longer than the pattern being checked because some ops + // will jmp to a end-of-block+1 location from within a block, and we must + // count those when checking the block. + end := len(c.out.compiledPat) + forwardedLength := make([]int32, end+1) + + for loc = 3; loc < end; loc++ { + forwardedLength[loc] = math.MaxInt32 + } + + for loc = 3; loc < end; loc++ { + op := c.out.compiledPat[loc] + opType := op.typ() + + // The loop is advancing linearly through the pattern. + // If the op we are now at was the destination of a branch in the pattern, + // and that path has a shorter minimum length than the current accumulated value, + // replace the current accumulated value. + if forwardedLength[loc] < currentLen { + currentLen = forwardedLength[loc] + } + + switch opType { + // Ops that don't change the total length matched + case urxReservedOp, + urxEnd, + urxFail, + urxStringLen, + urxNop, + urxStartCapture, + urxEndCapture, + urxBackslashB, + urxBackslashBu, + urxBackslashG, + urxBackslashZ, + urxDollar, + urxDollarM, + urxDollarD, + urxDollarMd, + urxRelocOprnd, + urxStoInpLoc, + urxBackref, // BackRef. Must assume that it might be a zero length match + urxBackrefI, + urxStoSp, // Setup for atomic or possessive blocks. Doesn't change what can match. + urxLdSp: + // skip + + case urxCaret: + if atStart { + c.out.startType = startStart + } + + case urxCaretM, urxCaretMUnix: + if atStart { + c.out.startType = startLine + } + + case urxOnechar: + if currentLen == 0 { + // This character could appear at the start of a match. + // Add it to the set of possible starting characters. + c.out.initialChars.AddRune(op.value32()) + numInitialStrings += 2 + } + currentLen = safeIncrement(currentLen, 1) + atStart = false + + case urxSetref: + if currentLen == 0 { + sn := op.value() + set := c.out.sets[sn] + c.out.initialChars.AddAll(set) + numInitialStrings += 2 + } + currentLen = safeIncrement(currentLen, 1) + atStart = false + + case urxLoopSrI: + // [Set]*, like a SETREF, above, in what it can match, + // but may not match at all, so currentLen is not incremented. + if currentLen == 0 { + sn := op.value() + set := c.out.sets[sn] + c.out.initialChars.AddAll(set) + numInitialStrings += 2 + } + atStart = false + + case urxLoopDotI: + if currentLen == 0 { + // .* at the start of a pattern. + // Any character can begin the match. + c.out.initialChars.Clear() + c.out.initialChars.Complement() + numInitialStrings += 2 + } + atStart = false + + case urxStaticSetref: + if currentLen == 0 { + sn := op.value() + c.out.initialChars.AddAll(staticPropertySets[sn]) + numInitialStrings += 2 + } + currentLen = safeIncrement(currentLen, 1) + atStart = false + + case urxStatSetrefN: + if currentLen == 0 { + sn := op.value() + sc := uset.New() + sc.AddAll(staticPropertySets[sn]) + sc.Complement() + + c.out.initialChars.AddAll(sc) + numInitialStrings += 2 + } + currentLen = safeIncrement(currentLen, 1) + atStart = false + + case urxBackslashD: + // Digit Char + if currentLen == 0 { + s := uset.New() + c.err = uprops.ApplyIntPropertyValue(s, uprops.UCharGeneralCategoryMask, int32(uchar.GcNdMask)) + if op.value() != 0 { + s.Complement() + } + c.out.initialChars.AddAll(s) + numInitialStrings += 2 + } + currentLen = safeIncrement(currentLen, 1) + atStart = false + + case urxBackslashH: + // Horiz white space + if currentLen == 0 { + s := uset.New() + c.err = uprops.ApplyIntPropertyValue(s, uprops.UCharGeneralCategoryMask, int32(uchar.GcZsMask)) + s.AddRune(9) // Tab + if op.value() != 0 { + s.Complement() + } + c.out.initialChars.AddAll(s) + numInitialStrings += 2 + } + currentLen = safeIncrement(currentLen, 1) + atStart = false + + case urxBackslashR, // Any line ending sequence + urxBackslashV: // Any line ending code point, with optional negation + if currentLen == 0 { + s := uset.New() + s.AddRuneRange(0x0a, 0x0d) // add range + s.AddRune(0x85) + s.AddRuneRange(0x2028, 0x2029) + if op.value() != 0 { + // Complement option applies to URX_BACKSLASH_V only. + s.Complement() + } + c.out.initialChars.AddAll(s) + numInitialStrings += 2 + } + currentLen = safeIncrement(currentLen, 1) + atStart = false + + case urcOnecharI: + // Case Insensitive Single Character. + if currentLen == 0 { + ch := op.value32() + if uprops.HasBinaryProperty(ch, uprops.UCharCaseSensitive) { + starters := uset.New() + starters.AddRuneRange(ch, ch) + starters.CloseOver(uset.CaseInsensitive) + // findCaseInsensitiveStarters(c, &starters); + // For ONECHAR_I, no need to worry about text chars that expand on folding into + // strings. The expanded folding can't match the pattern. + c.out.initialChars.AddAll(starters) + } else { + // Char has no case variants. Just add it as-is to the + // set of possible starting chars. + c.out.initialChars.AddRune(ch) + } + numInitialStrings += 2 + } + currentLen = safeIncrement(currentLen, 1) + atStart = false + + case urxBackslashX, // Grahpeme Cluster. Minimum is 1, max unbounded. + urxDotanyAll, // . matches one or two. + urxDotany, + urxDotanyUnix: + if currentLen == 0 { + // These constructs are all bad news when they appear at the start + // of a match. Any character can begin the match. + c.out.initialChars.Clear() + c.out.initialChars.Complement() + numInitialStrings += 2 + } + currentLen = safeIncrement(currentLen, 1) + atStart = false + + case urxJmpx: + loc++ // Except for extra operand on URX_JMPX, same as URX_JMP. + fallthrough + + case urxJmp: + jmpDest := op.value() + if jmpDest < loc { + // Loop of some kind. Can safely ignore, the worst that will happen + // is that we understate the true minimum length + currentLen = forwardedLength[loc+1] + } else { + // Forward jump. Propagate the current min length to the target loc of the jump. + if forwardedLength[jmpDest] > currentLen { + forwardedLength[jmpDest] = currentLen + } + } + atStart = false + + case urxJmpSav, + urxJmpSavX: + // Combo of state save to the next loc, + jmp backwards. + // Net effect on min. length computation is nothing. + atStart = false + + case urxBacktrack: + // Fails are kind of like a branch, except that the min length was + // propagated already, by the state save. + currentLen = forwardedLength[loc+1] + atStart = false + + case urxStateSave: + // State Save, for forward jumps, propagate the current minimum. + // of the state save. + jmpDest := op.value() + if jmpDest > loc { + if currentLen < forwardedLength[jmpDest] { + forwardedLength[jmpDest] = (currentLen) + } + } + atStart = false + + case urxString: + loc++ + stringLenOp := c.out.compiledPat[loc] + stringLen := stringLenOp.value() + if currentLen == 0 { + // Add the starting character of this string to the set of possible starting + // characters for this pattern. + stringStartIdx := op.value() + ch := c.out.literalText[stringStartIdx] + c.out.initialChars.AddRune(ch) + + // Remember this string. After the entire pattern has been checked, + // if nothing else is identified that can start a match, we'll use it. + numInitialStrings++ + c.out.initialStringIdx = stringStartIdx + c.out.initialStringLen = stringLen + } + + currentLen = safeIncrement(currentLen, stringLen) + atStart = false + + case urxStringI: + // Case-insensitive string. Unlike exact-match strings, we won't + // attempt a string search for possible match positions. But we + // do update the set of possible starting characters. + loc++ + stringLenOp := c.out.compiledPat[loc] + stringLen := stringLenOp.value() + if currentLen == 0 { + // Add the starting character of this string to the set of possible starting + // characters for this pattern. + stringStartIdx := op.value() + ch := c.out.literalText[stringStartIdx] + s := uset.New() + c.findCaseInsensitiveStarters(ch, s) + c.out.initialChars.AddAll(s) + numInitialStrings += 2 // Matching on an initial string not possible. + } + currentLen = safeIncrement(currentLen, stringLen) + atStart = false + + case urxCtrInit, + urxCtrInitNg: + // Loop Init Ops. These don't change the min length, but they are 4 word ops + // so location must be updated accordingly. + // Loop Init Ops. + // If the min loop count == 0 + // move loc forwards to the end of the loop, skipping over the body. + // If the min count is > 0, + // continue normal processing of the body of the loop. + loopEndLoc := c.out.compiledPat[loc+1].value() + minLoopCount := int(c.out.compiledPat[loc+2]) + if minLoopCount == 0 { + // Min Loop Count of 0, treat like a forward branch and + // move the current minimum length up to the target + // (end of loop) location. + if forwardedLength[loopEndLoc] > currentLen { + forwardedLength[loopEndLoc] = currentLen + } + } + loc += 3 // Skips over operands of CTR_INIT + atStart = false + + case utxCtrLoop, + urxCtrLoopNg: + // Loop ops. + // The jump is conditional, backwards only. + atStart = false + + case urxLoopC: + // More loop ops. These state-save to themselves. + // don't change the minimum match + atStart = false + + case urxLaStart, + urxLbStart: + // Look-around. Scan forward until the matching look-ahead end, + // without processing the look-around block. This is overly pessimistic. + + // Keep track of the nesting depth of look-around blocks. Boilerplate code for + // lookahead contains two LA_END instructions, so count goes up by two + // for each LA_START. + var depth int + if opType == urxLaStart { + depth = 2 + } else { + depth = 1 + } + for { + loc++ + op = c.out.compiledPat[loc] + if op.typ() == urxLaStart { + depth += 2 + } + if op.typ() == urxLbStart { + depth++ + } + if op.typ() == urxLaEnd || op.typ() == urxLbnEnd { + depth-- + if depth == 0 { + break + } + } + if op.typ() == urxStateSave { + // Need this because neg lookahead blocks will FAIL to outside + // of the block. + jmpDest := op.value() + if jmpDest > loc { + if currentLen < forwardedLength[jmpDest] { + forwardedLength[jmpDest] = (currentLen) + } + } + } + } + + case urxLaEnd, + urxLbCont, + urxLbEnd, + urxLbnCount, + urxLbnEnd: + panic("should be consumed in URX_LA_START") + + default: + panic("unreachable") + } + } + + // Sort out what we should check for when looking for candidate match start positions. + // In order of preference, + // 1. Start of input text buffer. + // 2. A literal string. + // 3. Start of line in multi-line mode. + // 4. A single literal character. + // 5. A character from a set of characters. + // + if c.out.startType == startStart { + // Match only at the start of an input text string. + // start type is already set. We're done. + } else if numInitialStrings == 1 && c.out.minMatchLen > 0 { + // Match beginning only with a literal string. + ch := c.out.literalText[c.out.initialStringIdx] + c.out.startType = startString + c.out.initialChar = ch + } else if c.out.startType == startLine { + // Match at start of line in Multi-Line mode. + // Nothing to do here; everything is already set. + } else if c.out.minMatchLen == 0 { + // Zero length match possible. We could start anywhere. + c.out.startType = startNoInfo + } else if c.out.initialChars.Len() == 1 { + // All matches begin with the same char. + c.out.startType = startChar + c.out.initialChar = c.out.initialChars.RuneAt(0) + } else if !c.out.initialChars.ContainsRuneRange(0, 0x10ffff) && c.out.minMatchLen > 0 { + // Matches start with a set of character smaller than the set of all chars. + c.out.startType = startSet + } else { + // Matches can start with anything + c.out.startType = startNoInfo + } +} + +func (c *compiler) appendOp(typ opcode, arg int) { + c.appendIns(c.buildOp(typ, arg)) +} + +func (c *compiler) appendIns(ins instruction) { + if c.err != nil { + return + } + c.out.compiledPat = append(c.out.compiledPat, ins) +} + +func (c *compiler) buildOp(typ opcode, val int) instruction { + if c.err != nil { + return 0 + } + if val > 0x00ffffff { + panic("bad argument to buildOp") + } + if val < 0 { + if !(typ == urxReservedOpN || typ == urxReservedOp) { + panic("bad value to buildOp") + } + typ = urxReservedOpN + } + return instruction(int32(typ)<<24 | int32(val)) +} + +func (c *compiler) handleCloseParen() { + if len(c.parenStack) == 0 { + c.error(MismatchedParen) + return + } + + c.fixLiterals(false) + + var patIdx int + var patOp instruction + + for { + patIdx, c.parenStack = stackPop(c.parenStack) + if patIdx < 0 { + break + } + + patOp = c.out.compiledPat[patIdx] + if patOp.value() != 0 { + panic("branch target for JMP should not be set") + } + patOp |= instruction(len(c.out.compiledPat)) + c.out.compiledPat[patIdx] = patOp + c.matchOpenParen = patIdx + } + + var modeFlags int + modeFlags, c.parenStack = stackPop(c.parenStack) + if modeFlags >= 0 { + panic("modeFlags in paren stack was not negated") + } + + c.modeFlags = RegexpFlag(modeFlags) + + switch patIdx { + case parenPlain, parenFlags: + // No additional fixups required. + // (Grouping-only parentheses) + case parenCapturing: + // Capturing Parentheses. + // Insert a End Capture op into the pattern. + // The frame offset of the variables for this cg is obtained from the + // start capture op and put it into the end-capture op. + + captureOp := c.out.compiledPat[c.matchOpenParen+1] + if captureOp.typ() != urxStartCapture { + panic("bad type in capture op (expected URX_START_CAPTURE)") + } + frameVarLocation := captureOp.value() + c.appendOp(urxEndCapture, frameVarLocation) + + case parenAtomic: + // Atomic Parenthesis. + // Insert a LD_SP operation to restore the state stack to the position + // it was when the atomic parens were entered. + stoOp := c.out.compiledPat[c.matchOpenParen+1] + if stoOp.typ() != urxStoSp { + panic("bad type in capture op (expected URX_STO_SP)") + } + stoLoc := stoOp.value() + c.appendOp(urxLdSp, stoLoc) + + case parenLookahead: + startOp := c.out.compiledPat[c.matchOpenParen-5] + if startOp.typ() != urxLaStart { + panic("bad type in capture op (expected URX_LA_START)") + } + dataLoc := startOp.value() + c.appendOp(urxLaEnd, dataLoc) + + case parenNegLookahead: + startOp := c.out.compiledPat[c.matchOpenParen-1] + if startOp.typ() != urxLaStart { + panic("bad type in capture op (expected URX_LA_START)") + } + dataLoc := startOp.value() + c.appendOp(urxLaEnd, dataLoc) + c.appendOp(urxBacktrack, 0) + c.appendOp(urxLaEnd, dataLoc) + + // Patch the URX_SAVE near the top of the block. + // The destination of the SAVE is the final LA_END that was just added. + saveOp := c.out.compiledPat[c.matchOpenParen] + if saveOp.typ() != urxStateSave { + panic("bad type in capture op (expected URX_STATE_SAVE)") + } + saveOp = c.buildOp(urxStateSave, len(c.out.compiledPat)-1) + c.out.compiledPat[c.matchOpenParen] = saveOp + + case parenLookBehind: + startOp := c.out.compiledPat[c.matchOpenParen-4] + if startOp.typ() != urxLbStart { + panic("bad type in capture op (expected URX_LB_START)") + } + dataLoc := startOp.value() + c.appendOp(urxLbEnd, dataLoc) + c.appendOp(urxLaEnd, dataLoc) + + // Determine the min and max bounds for the length of the + // string that the pattern can match. + // An unbounded upper limit is an error. + patEnd := len(c.out.compiledPat) - 1 + minML := c.minMatchLength(c.matchOpenParen, patEnd) + maxML := c.maxMatchLength(c.matchOpenParen, patEnd) + + if maxML == math.MaxInt32 { + c.error(LookBehindLimit) + break + } + if minML == math.MaxInt32 { + // This condition happens when no match is possible, such as with a + // [set] expression containing no elements. + // In principle, the generated code to evaluate the expression could be deleted, + // but it's probably not worth the complication. + minML = 0 + } + + c.out.compiledPat[c.matchOpenParen-2] = instruction(minML) + c.out.compiledPat[c.matchOpenParen-1] = instruction(maxML) + + case parenLookBehindN: + startOp := c.out.compiledPat[c.matchOpenParen-5] + if startOp.typ() != urxLbStart { + panic("bad type in capture op (expected URX_LB_START)") + } + dataLoc := startOp.value() + c.appendOp(urxLbnEnd, dataLoc) + + // Determine the min and max bounds for the length of the + // string that the pattern can match. + // An unbounded upper limit is an error. + patEnd := len(c.out.compiledPat) - 1 + minML := c.minMatchLength(c.matchOpenParen, patEnd) + maxML := c.maxMatchLength(c.matchOpenParen, patEnd) + + if instruction(maxML).typ() != 0 { + c.error(LookBehindLimit) + break + } + if maxML == math.MaxInt32 { + c.error(LookBehindLimit) + break + } + if minML == math.MaxInt32 { + // This condition happens when no match is possible, such as with a + // [set] expression containing no elements. + // In principle, the generated code to evaluate the expression could be deleted, + // but it's probably not worth the complication. + minML = 0 + } + + c.out.compiledPat[c.matchOpenParen-3] = instruction(minML) + c.out.compiledPat[c.matchOpenParen-2] = instruction(maxML) + + op := c.buildOp(urxRelocOprnd, len(c.out.compiledPat)) + c.out.compiledPat[c.matchOpenParen-1] = op + + default: + panic("unexpected opcode in parenStack") + } + + c.matchCloseParen = len(c.out.compiledPat) +} + +func (c *compiler) fixLiterals(split bool) { + if len(c.literalChars) == 0 { + return + } + + lastCodePoint := c.literalChars[len(c.literalChars)-1] + + // Split: We need to ensure that the last item in the compiled pattern + // refers only to the last literal scanned in the pattern, so that + // quantifiers (*, +, etc.) affect only it, and not a longer string. + // Split before case folding for case insensitive matches. + if split { + c.literalChars = c.literalChars[:len(c.literalChars)-1] + c.fixLiterals(false) + + c.literalChar(lastCodePoint) + c.fixLiterals(false) + return + } + + if c.modeFlags&CaseInsensitive != 0 { + c.literalChars = ucase.FoldRunes(c.literalChars) + lastCodePoint = c.literalChars[len(c.literalChars)-1] + } + + if len(c.literalChars) == 1 { + if c.modeFlags&CaseInsensitive != 0 && uprops.HasBinaryProperty(lastCodePoint, uprops.UCharCaseSensitive) { + c.appendOp(urcOnecharI, int(lastCodePoint)) + } else { + c.appendOp(urxOnechar, int(lastCodePoint)) + } + } else { + if len(c.literalChars) > 0x00ffffff || len(c.out.literalText) > 0x00ffffff { + c.error(PatternTooBig) + } + if c.modeFlags&CaseInsensitive != 0 { + c.appendOp(urxStringI, len(c.out.literalText)) + } else { + c.appendOp(urxString, len(c.out.literalText)) + } + c.appendOp(urxStringLen, len(c.literalChars)) + c.out.literalText = append(c.out.literalText, c.literalChars...) + } + + c.literalChars = c.literalChars[:0] +} + +func (c *compiler) literalChar(point rune) { + c.literalChars = append(c.literalChars, point) +} + +func (c *compiler) allocateData(size int) int { + if c.err != nil { + return 0 + } + if size <= 0 || size > 0x100 || c.out.dataSize < 0 { + c.error(InternalError) + return 0 + } + + dataIndex := c.out.dataSize + c.out.dataSize += size + if c.out.dataSize >= 0x00fffff0 { + c.error(InternalError) + } + return dataIndex +} + +func (c *compiler) allocateStackData(size int) int { + if c.err != nil { + return 0 + } + if size <= 0 || size > 0x100 || c.out.frameSize < 0 { + c.error(InternalError) + return 0 + } + dataIndex := c.out.frameSize + c.out.frameSize += size + if c.out.frameSize >= 0x00fffff0 { + c.error(InternalError) + } + return dataIndex +} + +func (c *compiler) insertOp(where int) { + if where < 0 || where >= len(c.out.compiledPat) { + panic("insertOp: out of bounds") + } + + nop := c.buildOp(urxNop, 0) + c.out.compiledPat = slices.Insert(c.out.compiledPat, where, nop) + + // Walk through the pattern, looking for any ops with targets that + // were moved down by the insert. Fix them. + for loc, op := range c.out.compiledPat { + switch op.typ() { + case urxJmp, urxJmpx, urxStateSave, utxCtrLoop, urxCtrLoopNg, urxJmpSav, urxJmpSavX, urxRelocOprnd: + if op.value() > where { + op = c.buildOp(op.typ(), op.value()+1) + c.out.compiledPat[loc] = op + } + } + } + + // Now fix up the parentheses stack. All positive values in it are locations in + // the compiled pattern. (Negative values are frame boundaries, and don't need fixing.) + for loc, x := range c.parenStack { + if x > where { + c.parenStack[loc] = x + 1 + } + } + + if c.matchCloseParen > where { + c.matchCloseParen++ + } + if c.matchOpenParen > where { + c.matchOpenParen++ + } +} + +func (c *compiler) blockTopLoc(reserve bool) int { + var loc int + c.fixLiterals(true) + + if len(c.out.compiledPat) == c.matchCloseParen { + // The item just processed is a parenthesized block. + loc = c.matchOpenParen + } else { + // Item just compiled is a single thing, a ".", or a single char, a string or a set reference. + // No slot for STATE_SAVE was pre-reserved in the compiled code. + // We need to make space now. + loc = len(c.out.compiledPat) - 1 + op := c.out.compiledPat[loc] + if op.typ() == urxStringLen { + // Strings take two opcode, we want the position of the first one. + // We can have a string at this point if a single character case-folded to two. + loc-- + } + if reserve { + nop := c.buildOp(urxNop, 0) + c.out.compiledPat = slices.Insert(c.out.compiledPat, loc, nop) + } + } + return loc +} + +func (c *compiler) compileInlineInterval() bool { + if c.intervalUpper > 10 || c.intervalUpper < c.intervalLow { + return false + } + + topOfBlock := c.blockTopLoc(false) + if c.intervalUpper == 0 { + // Pathological case. Attempt no matches, as if the block doesn't exist. + // Discard the generated code for the block. + // If the block included parens, discard the info pertaining to them as well. + c.out.compiledPat = c.out.compiledPat[:topOfBlock] + if c.matchOpenParen >= topOfBlock { + c.matchOpenParen = -1 + } + if c.matchCloseParen >= topOfBlock { + c.matchCloseParen = -1 + } + return true + } + + if topOfBlock != len(c.out.compiledPat)-1 && c.intervalUpper != 1 { + // The thing being repeated is not a single op, but some + // more complex block. Do it as a loop, not inlines. + // Note that things "repeated" a max of once are handled as inline, because + // the one copy of the code already generated is just fine. + return false + } + + // Pick up the opcode that is to be repeated + // + op := c.out.compiledPat[topOfBlock] + + // Compute the pattern location where the inline sequence + // will end, and set up the state save op that will be needed. + // + endOfSequenceLoc := len(c.out.compiledPat) - 1 + c.intervalUpper + (c.intervalUpper - c.intervalLow) + + saveOp := c.buildOp(urxStateSave, endOfSequenceLoc) + if c.intervalLow == 0 { + c.insertOp(topOfBlock) + c.out.compiledPat[topOfBlock] = saveOp + } + + // Loop, emitting the op for the thing being repeated each time. + // Loop starts at 1 because one instance of the op already exists in the pattern, + // it was put there when it was originally encountered. + for i := 1; i < c.intervalUpper; i++ { + if i >= c.intervalLow { + c.appendIns(saveOp) + } + c.appendIns(op) + } + return true +} + +func (c *compiler) compileInterval(init opcode, loop opcode) { + // The CTR_INIT op at the top of the block with the {n,m} quantifier takes + // four slots in the compiled code. Reserve them. + topOfBlock := c.blockTopLoc(true) + c.insertOp(topOfBlock) + c.insertOp(topOfBlock) + c.insertOp(topOfBlock) + + // The operands for the CTR_INIT opcode include the index in the matcher data + // of the counter. Allocate it now. There are two data items + // counterLoc --> Loop counter + // +1 --> Input index (for breaking non-progressing loops) + // (Only present if unbounded upper limit on loop) + var dataSize int + if c.intervalUpper < 0 { + dataSize = 2 + } else { + dataSize = 1 + } + counterLoc := c.allocateStackData(dataSize) + + op := c.buildOp(init, counterLoc) + c.out.compiledPat[topOfBlock] = op + + // The second operand of CTR_INIT is the location following the end of the loop. + // Must put in as a URX_RELOC_OPRND so that the value will be adjusted if the + // compilation of something later on causes the code to grow and the target + // position to move. + loopEnd := len(c.out.compiledPat) + op = c.buildOp(urxRelocOprnd, loopEnd) + c.out.compiledPat[topOfBlock+1] = op + + // Followed by the min and max counts. + c.out.compiledPat[topOfBlock+2] = instruction(c.intervalLow) + c.out.compiledPat[topOfBlock+3] = instruction(c.intervalUpper) + + // Append the CTR_LOOP op. The operand is the location of the CTR_INIT op. + // Goes at end of the block being looped over, so just append to the code so far. + c.appendOp(loop, topOfBlock) + + if (c.intervalLow&0xff000000) != 0 || (c.intervalUpper > 0 && (c.intervalUpper&0xff000000) != 0) { + c.error(NumberTooBig) + } + + if c.intervalLow > c.intervalUpper && c.intervalUpper != -1 { + c.error(MaxLtMin) + } +} + +func (c *compiler) scanNamedChar() rune { + c.nextChar(&c.c) + if c.c.char != chLBrace { + c.error(PropertySyntax) + return 0 + } + + var charName []rune + for { + c.nextChar(&c.c) + if c.c.char == chRBrace { + break + } + if c.c.char == -1 { + c.error(PropertySyntax) + return 0 + } + charName = append(charName, c.c.char) + } + + if !isInvariantUString(charName) { + // All Unicode character names have only invariant characters. + // The API to get a character, given a name, accepts only char *, forcing us to convert, + // which requires this error check + c.error(PropertySyntax) + return 0 + } + + theChar := unames.CharForName(unames.UnicodeCharName, string(charName)) + if c.err != nil { + c.error(PropertySyntax) + } + + c.nextChar(&c.c) // Continue overall regex pattern processing with char after the '}' + return theChar +} + +func isInvariantUString(name []rune) bool { + for _, c := range name { + /* + * no assertions here because these functions are legitimately called + * for strings with variant characters + */ + if !ucharIsInvariant(c) { + return false /* found a variant char */ + } + } + return true +} + +var invariantChars = [...]uint32{ + 0xfffffbff, /* 00..1f but not 0a */ + 0xffffffe5, /* 20..3f but not 21 23 24 */ + 0x87fffffe, /* 40..5f but not 40 5b..5e */ + 0x87fffffe, /* 60..7f but not 60 7b..7e */ +} + +func ucharIsInvariant(c rune) bool { + return c <= 0x7f && (invariantChars[(c)>>5]&(uint32(1)<<(c&0x1f))) != 0 +} + +func (c *compiler) setPushOp(op setOperation) { + c.setEval(op) + c.setOpStack = append(c.setOpStack, op) + c.setStack = append(c.setStack, uset.New()) +} + +func (c *compiler) setEval(nextOp setOperation) { + var rightOperand *uset.UnicodeSet + var leftOperand *uset.UnicodeSet + + for { + pendingSetOp := c.setOpStack[len(c.setOpStack)-1] + if (pendingSetOp & 0xffff0000) < (nextOp & 0xffff0000) { + break + } + + c.setOpStack = c.setOpStack[:len(c.setOpStack)-1] + rightOperand = c.setStack[len(c.setStack)-1] + + switch pendingSetOp { + case setNegation: + rightOperand.Complement() + + case setCaseClose: + rightOperand.CloseOver(uset.CaseInsensitive) + + case setDifference1, setDifference2: + c.setStack = c.setStack[:len(c.setStack)-1] + leftOperand = c.setStack[len(c.setStack)-1] + leftOperand.RemoveAll(rightOperand) + + case setIntersection1, setIntersection2: + c.setStack = c.setStack[:len(c.setStack)-1] + leftOperand = c.setStack[len(c.setStack)-1] + leftOperand.RetainAll(rightOperand) + + case setUnion: + c.setStack = c.setStack[:len(c.setStack)-1] + leftOperand = c.setStack[len(c.setStack)-1] + leftOperand.AddAll(rightOperand) + + default: + panic("unreachable") + } + } +} + +func safeIncrement(val int32, delta int) int32 { + if delta <= math.MaxInt32 && math.MaxInt32-val > int32(delta) { + return val + int32(delta) + } + return math.MaxInt32 +} + +func (c *compiler) minMatchLength(start, end int) int32 { + if c.err != nil { + return 0 + } + + var loc int + var currentLen int32 + + // forwardedLength is a vector holding minimum-match-length values that + // are propagated forward in the pattern by JMP or STATE_SAVE operations. + // It must be one longer than the pattern being checked because some ops + // will jmp to a end-of-block+1 location from within a block, and we must + // count those when checking the block. + forwardedLength := make([]int32, end+2) + for i := range forwardedLength { + forwardedLength[i] = math.MaxInt32 + } + + for loc = start; loc <= end; loc++ { + op := c.out.compiledPat[loc] + opType := op.typ() + + // The loop is advancing linearly through the pattern. + // If the op we are now at was the destination of a branch in the pattern, + // and that path has a shorter minimum length than the current accumulated value, + // replace the current accumulated value. + // no-match-possible cases. + if forwardedLength[loc] < currentLen { + currentLen = forwardedLength[loc] + } + + switch opType { + // Ops that don't change the total length matched + case urxReservedOp, + urxEnd, + urxStringLen, + urxNop, + urxStartCapture, + urxEndCapture, + urxBackslashB, + urxBackslashBu, + urxBackslashG, + urxBackslashZ, + urxCaret, + urxDollar, + urxDollarM, + urxDollarD, + urxDollarMd, + urxRelocOprnd, + urxStoInpLoc, + urxCaretM, + urxCaretMUnix, + urxBackref, // BackRef. Must assume that it might be a zero length match + urxBackrefI, + urxStoSp, // Setup for atomic or possessive blocks. Doesn't change what can match. + urxLdSp, + urxJmpSav, + urxJmpSavX: + // no-op + + // Ops that match a minimum of one character (one or two 16 bit code units.) + // + case urxOnechar, + urxStaticSetref, + urxStatSetrefN, + urxSetref, + urxBackslashD, + urxBackslashH, + urxBackslashR, + urxBackslashV, + urcOnecharI, + urxBackslashX, // Grahpeme Cluster. Minimum is 1, max unbounded. + urxDotanyAll, // . matches one or two. + urxDotany, + urxDotanyUnix: + currentLen = safeIncrement(currentLen, 1) + + case urxJmpx: + loc++ // URX_JMPX has an extra operand, ignored here, otherwise processed identically to URX_JMP. + fallthrough + + case urxJmp: + jmpDest := op.value() + if jmpDest < loc { + // Loop of some kind. Can safely ignore, the worst that will happen + // is that we understate the true minimum length + currentLen = forwardedLength[loc+1] + } else { + // Forward jump. Propagate the current min length to the target loc of the jump. + if forwardedLength[jmpDest] > currentLen { + forwardedLength[jmpDest] = currentLen + } + } + + case urxBacktrack: + // Back-tracks are kind of like a branch, except that the min length was + // propagated already, by the state save. + currentLen = forwardedLength[loc+1] + + case urxStateSave: + // State Save, for forward jumps, propagate the current minimum. + // of the state save. + jmpDest := op.value() + if jmpDest > loc { + if currentLen < forwardedLength[jmpDest] { + forwardedLength[jmpDest] = currentLen + } + } + + case urxString: + loc++ + stringLenOp := c.out.compiledPat[loc] + currentLen = safeIncrement(currentLen, stringLenOp.value()) + + case urxStringI: + loc++ + // TODO: with full case folding, matching input text may be shorter than + // the string we have here. More smarts could put some bounds on it. + // Assume a min length of one for now. A min length of zero causes + // optimization failures for a pattern like "string"+ + // currentLen += URX_VAL(stringLenOp); + currentLen = safeIncrement(currentLen, 1) + + case urxCtrInit, urxCtrInitNg: + // Loop Init Ops. + // If the min loop count == 0 + // move loc forwards to the end of the loop, skipping over the body. + // If the min count is > 0, + // continue normal processing of the body of the loop. + loopEndOp := c.out.compiledPat[loc+1] + loopEndLoc := loopEndOp.value() + minLoopCount := c.out.compiledPat[loc+2] + if minLoopCount == 0 { + loc = loopEndLoc + } else { + loc += 3 // Skips over operands of CTR_INIT + } + + case utxCtrLoop, urxCtrLoopNg: + // Loop ops. The jump is conditional, backwards only. + + case urxLoopSrI, urxLoopDotI, urxLoopC: + // More loop ops. These state-save to themselves. don't change the minimum match - could match nothing at all. + + case urxLaStart, urxLbStart: + // Look-around. Scan forward until the matching look-ahead end, + // without processing the look-around block. This is overly pessimistic for look-ahead, + // it assumes that the look-ahead match might be zero-length. + // TODO: Positive lookahead could recursively do the block, then continue + // with the longer of the block or the value coming in. Ticket 6060 + var depth int32 + if opType == urxLaStart { + depth = 2 + } else { + depth = 1 + } + + for { + loc++ + op = c.out.compiledPat[loc] + if op.typ() == urxLaStart { + // The boilerplate for look-ahead includes two LA_END insturctions, + // Depth will be decremented by each one when it is seen. + depth += 2 + } + if op.typ() == urxLbStart { + depth++ + } + if op.typ() == urxLaEnd { + depth-- + if depth == 0 { + break + } + } + if op.typ() == urxLbnEnd { + depth-- + if depth == 0 { + break + } + } + if op.typ() == urxStateSave { + // Need this because neg lookahead blocks will FAIL to outside of the block. + jmpDest := op.value() + if jmpDest > loc { + if currentLen < forwardedLength[jmpDest] { + forwardedLength[jmpDest] = currentLen + } + } + } + } + + case urxLaEnd, urxLbCont, urxLbEnd, urxLbnCount, urxLbnEnd: + // Only come here if the matching URX_LA_START or URX_LB_START was not in the + // range being sized, which happens when measuring size of look-behind blocks. + + default: + panic("unreachable") + } + } + + // We have finished walking through the ops. Check whether some forward jump + // propagated a shorter length to location end+1. + if forwardedLength[end+1] < currentLen { + currentLen = forwardedLength[end+1] + } + + return currentLen +} + +func (c *compiler) maxMatchLength(start, end int) int32 { + if c.err != nil { + return 0 + } + var loc int + var currentLen int32 + + forwardedLength := make([]int32, end+1) + + for loc = start; loc <= end; loc++ { + op := c.out.compiledPat[loc] + opType := op.typ() + + // The loop is advancing linearly through the pattern. + // If the op we are now at was the destination of a branch in the pattern, + // and that path has a longer maximum length than the current accumulated value, + // replace the current accumulated value. + if forwardedLength[loc] > currentLen { + currentLen = forwardedLength[loc] + } + + switch opType { + // Ops that don't change the total length matched + case urxReservedOp, + urxEnd, + urxStringLen, + urxNop, + urxStartCapture, + urxEndCapture, + urxBackslashB, + urxBackslashBu, + urxBackslashG, + urxBackslashZ, + urxCaret, + urxDollar, + urxDollarM, + urxDollarD, + urxDollarMd, + urxRelocOprnd, + urxStoInpLoc, + urxCaretM, + urxCaretMUnix, + urxStoSp, // Setup for atomic or possessive blocks. Doesn't change what can match. + urxLdSp, + urxLbEnd, + urxLbCont, + urxLbnCount, + urxLbnEnd: + // no-op + + // Ops that increase that cause an unbounded increase in the length + // of a matched string, or that increase it a hard to characterize way. + // Call the max length unbounded, and stop further checking. + case urxBackref, // BackRef. Must assume that it might be a zero length match + urxBackrefI, + urxBackslashX: // Grahpeme Cluster. Minimum is 1, max unbounded. + currentLen = math.MaxInt32 + + // Ops that match a max of one character (possibly two 16 bit code units.) + // + case urxStaticSetref, + urxStatSetrefN, + urxSetref, + urxBackslashD, + urxBackslashH, + urxBackslashR, + urxBackslashV, + urcOnecharI, + urxDotanyAll, + urxDotany, + urxDotanyUnix: + currentLen = safeIncrement(currentLen, 2) + + // Single literal character. Increase current max length by one or two, + // depending on whether the char is in the supplementary range. + case urxOnechar: + currentLen = safeIncrement(currentLen, 1) + if op.value() > 0x10000 { + currentLen = safeIncrement(currentLen, 1) + } + + // Jumps. + // + case urxJmp, urxJmpx, urxJmpSav, urxJmpSavX: + jmpDest := op.value() + if jmpDest < loc { + // Loop of some kind. Max match length is unbounded. + currentLen = math.MaxInt32 + } else { + // Forward jump. Propagate the current min length to the target loc of the jump. + if forwardedLength[jmpDest] < currentLen { + forwardedLength[jmpDest] = currentLen + } + currentLen = 0 + } + + case urxBacktrack: + // back-tracks are kind of like a branch, except that the max length was + // propagated already, by the state save. + currentLen = forwardedLength[loc+1] + + case urxStateSave: + // State Save, for forward jumps, propagate the current minimum. + // of the state save. + // For backwards jumps, they create a loop, maximum + // match length is unbounded. + jmpDest := op.value() + if jmpDest > loc { + if currentLen > forwardedLength[jmpDest] { + forwardedLength[jmpDest] = currentLen + } + } else { + currentLen = math.MaxInt32 + } + + case urxString: + loc++ + stringLenOp := c.out.compiledPat[loc] + currentLen = safeIncrement(currentLen, stringLenOp.value()) + + case urxStringI: + // TODO: This code assumes that any user string that matches will be no longer + // than our compiled string, with case insensitive matching. + // Our compiled string has been case-folded already. + // + // Any matching user string will have no more code points than our + // compiled (folded) string. Folding may add code points, but + // not remove them. + // + // There is a potential problem if a supplemental code point + // case-folds to a BMP code point. In this case our compiled string + // could be shorter (in code units) than a matching user string. + // + // At this time (Unicode 6.1) there are no such characters, and this case + // is not being handled. A test, intltest regex/Bug9283, will fail if + // any problematic characters are added to Unicode. + // + // If this happens, we can make a set of the BMP chars that the + // troublesome supplementals fold to, scan our string, and bump the + // currentLen one extra for each that is found. + // + loc++ + stringLenOp := c.out.compiledPat[loc] + currentLen = safeIncrement(currentLen, stringLenOp.value()) + + case urxCtrInit, urxCtrInitNg: + // For Loops, recursively call this function on the pattern for the loop body, + // then multiply the result by the maximum loop count. + loopEndLoc := c.out.compiledPat[loc+1].value() + if loopEndLoc == loc+4 { + // Loop has an empty body. No affect on max match length. + // Continue processing with code after the loop end. + loc = loopEndLoc + break + } + + maxLoopCount := int(c.out.compiledPat[loc+3]) + if maxLoopCount == -1 { + // Unbounded Loop. No upper bound on match length. + currentLen = math.MaxInt32 + break + } + + blockLen := c.maxMatchLength(loc+4, loopEndLoc-1) // Recursive call. + updatedLen := int(currentLen) + int(blockLen)*maxLoopCount + if updatedLen >= math.MaxInt32 { + currentLen = math.MaxInt32 + break + } + currentLen = int32(updatedLen) + loc = loopEndLoc + + case utxCtrLoop, urxCtrLoopNg: + panic("should not encounter this opcode") + + case urxLoopSrI, urxLoopDotI, urxLoopC: + // For anything to do with loops, make the match length unbounded. + currentLen = math.MaxInt32 + + case urxLaStart, urxLaEnd: + // Look-ahead. Just ignore, treat the look-ahead block as if + // it were normal pattern. Gives a too-long match length, + // but good enough for now. + + case urxLbStart: + // Look-behind. Scan forward until the matching look-around end, + // without processing the look-behind block. + dataLoc := op.value() + for loc = loc + 1; loc <= end; loc++ { + op = c.out.compiledPat[loc] + if (op.typ() == urxLaEnd || op.typ() == urxLbnEnd) && (op.value() == dataLoc) { + break + } + } + + default: + panic("unreachable") + } + + if currentLen == math.MaxInt32 { + // The maximum length is unbounded. + // Stop further processing of the pattern. + break + } + } + + return currentLen +} + +// Machine Generated below. +// It may need updating with new versions of Unicode. +// Intltest test RegexTest::TestCaseInsensitiveStarters will fail if an update is needed. +// The update tool is here: +// svn+ssh://source.icu-project.org/repos/icu/tools/trunk/unicode/c/genregexcasing + +// Machine Generated Data. Do not hand edit. +var reCaseFixCodePoints = [...]rune{ + 0x61, 0x66, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x77, 0x79, 0x2bc, + 0x3ac, 0x3ae, 0x3b1, 0x3b7, 0x3b9, 0x3c1, 0x3c5, 0x3c9, 0x3ce, 0x565, + 0x574, 0x57e, 0x1f00, 0x1f01, 0x1f02, 0x1f03, 0x1f04, 0x1f05, 0x1f06, 0x1f07, + 0x1f20, 0x1f21, 0x1f22, 0x1f23, 0x1f24, 0x1f25, 0x1f26, 0x1f27, 0x1f60, 0x1f61, + 0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1f70, 0x1f74, 0x1f7c, 0x110000} + +var reCaseFixStringOffsets = [...]int16{ + 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xd, 0xe, 0xf, 0x10, 0x11, 0x12, 0x13, + 0x17, 0x1b, 0x20, 0x21, 0x2a, 0x2e, 0x2f, 0x30, 0x34, 0x35, 0x37, 0x39, 0x3b, + 0x3d, 0x3f, 0x41, 0x43, 0x45, 0x47, 0x49, 0x4b, 0x4d, 0x4f, 0x51, 0x53, 0x55, + 0x57, 0x59, 0x5b, 0x5d, 0x5f, 0x61, 0x63, 0x65, 0x66, 0x67, 0} + +var reCaseFixCounts = [...]int16{ + 0x1, 0x5, 0x1, 0x1, 0x1, 0x4, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x4, 0x4, 0x5, 0x1, 0x9, + 0x4, 0x1, 0x1, 0x4, 0x1, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, + 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x1, 0x1, 0x1, 0} + +var reCaseFixData = [...]uint16{ + 0x1e9a, 0xfb00, 0xfb01, 0xfb02, 0xfb03, 0xfb04, 0x1e96, 0x130, 0x1f0, 0xdf, 0x1e9e, 0xfb05, + 0xfb06, 0x1e97, 0x1e98, 0x1e99, 0x149, 0x1fb4, 0x1fc4, 0x1fb3, 0x1fb6, 0x1fb7, 0x1fbc, 0x1fc3, + 0x1fc6, 0x1fc7, 0x1fcc, 0x390, 0x1fd2, 0x1fd3, 0x1fd6, 0x1fd7, 0x1fe4, 0x3b0, 0x1f50, 0x1f52, + 0x1f54, 0x1f56, 0x1fe2, 0x1fe3, 0x1fe6, 0x1fe7, 0x1ff3, 0x1ff6, 0x1ff7, 0x1ffc, 0x1ff4, 0x587, + 0xfb13, 0xfb14, 0xfb15, 0xfb17, 0xfb16, 0x1f80, 0x1f88, 0x1f81, 0x1f89, 0x1f82, 0x1f8a, 0x1f83, + 0x1f8b, 0x1f84, 0x1f8c, 0x1f85, 0x1f8d, 0x1f86, 0x1f8e, 0x1f87, 0x1f8f, 0x1f90, 0x1f98, 0x1f91, + 0x1f99, 0x1f92, 0x1f9a, 0x1f93, 0x1f9b, 0x1f94, 0x1f9c, 0x1f95, 0x1f9d, 0x1f96, 0x1f9e, 0x1f97, + 0x1f9f, 0x1fa0, 0x1fa8, 0x1fa1, 0x1fa9, 0x1fa2, 0x1faa, 0x1fa3, 0x1fab, 0x1fa4, 0x1fac, 0x1fa5, + 0x1fad, 0x1fa6, 0x1fae, 0x1fa7, 0x1faf, 0x1fb2, 0x1fc2, 0x1ff2, 0} + +func (c *compiler) findCaseInsensitiveStarters(ch rune, starterChars *uset.UnicodeSet) { + if uprops.HasBinaryProperty(ch, uprops.UCharCaseSensitive) { + caseFoldedC := ucase.Fold(ch) + starterChars.Clear() + starterChars.AddRune(caseFoldedC) + + var i int + for i = 0; reCaseFixCodePoints[i] < ch; i++ { + // Simple linear search through the sorted list of interesting code points. + } + + if reCaseFixCodePoints[i] == ch { + data := reCaseFixData[reCaseFixStringOffsets[i]:] + numCharsToAdd := reCaseFixCounts[i] + for j := int16(0); j < numCharsToAdd; j++ { + var cpToAdd rune + cpToAdd, data = utf16.NextUnsafe(data) + starterChars.AddRune(cpToAdd) + } + } + + starterChars.CloseOver(uset.CaseInsensitive) + } else { + // Not a cased character. Just return it alone. + starterChars.Clear() + starterChars.AddRune(ch) + } +} + +func (c *compiler) scanProp() *uset.UnicodeSet { + if c.err != nil { + return nil + } + negated := c.c.char == chP + + c.nextChar(&c.c) + if c.c.char != chLBrace { + c.error(PropertySyntax) + return nil + } + + var propertyName strings.Builder + for { + c.nextChar(&c.c) + if c.c.char == chRBrace { + break + } + if c.c.char == -1 { + c.error(PropertySyntax) + return nil + } + propertyName.WriteRune(c.c.char) + } + + ss := c.createSetForProperty(propertyName.String(), negated) + c.nextChar(&c.c) + return ss +} + +func (c *compiler) createSetForProperty(propName string, negated bool) *uset.UnicodeSet { + if c.err != nil { + return nil + } + + var set *uset.UnicodeSet + + var usetFlags uset.USet + if c.modeFlags&CaseInsensitive != 0 { + usetFlags |= uset.CaseInsensitive + } + + var err error + set, err = uprops.NewUnicodeSetFomPattern("\\p{"+propName+"}", usetFlags) + if err == nil { + goto done + } + + // + // The incoming property wasn't directly recognized by ICU. + + // Check [:word:] and [:all:]. These are not recognized as a properties by ICU UnicodeSet. + // Java accepts 'word' with mixed case. + // Java accepts 'all' only in all lower case. + if strings.EqualFold(propName, "word") { + set = staticPropertySets[urxIswordSet].Clone() + goto done + } + if propName == "all" { + set = uset.New() + set.AddRuneRange(0, 0x10ffff) + goto done + } + + // Do Java InBlock expressions + // + if strings.HasPrefix(propName, "In") && len(propName) >= 3 { + set = uset.New() + if uprops.ApplyPropertyAlias(set, "Block", propName[2:]) != nil { + c.error(PropertySyntax) + } + goto done + } + + // Check for the Java form "IsBooleanPropertyValue", which we will recast + // as "BooleanPropertyValue". The property value can be either a + // a General Category or a Script Name. + if strings.HasPrefix(propName, "Is") && len(propName) >= 3 { + mPropName := propName[2:] + if strings.IndexByte(mPropName, '=') >= 0 { + c.error(PropertySyntax) + goto done + } + + if strings.EqualFold(mPropName, "assigned") { + mPropName = "unassigned" + negated = !negated + } else if strings.EqualFold(mPropName, "TitleCase") { + mPropName = "Titlecase_Letter" + } + + set, err = uprops.NewUnicodeSetFomPattern("\\p{"+mPropName+"}", 0) + if err != nil { + c.error(PropertySyntax) + } else if !set.IsEmpty() && (usetFlags&uset.CaseInsensitive) != 0 { + set.CloseOver(uset.CaseInsensitive) + } + goto done + } + + if strings.HasPrefix(propName, "java") { + set = uset.New() + + // + // Try the various Java specific properties. + // These all begin with "java" + // + if propName == "javaDefined" { + c.err = uprops.AddCategory(set, uchar.GcCnMask) + set.Complement() + } else if propName == "javaDigit" { + c.err = uprops.AddCategory(set, uchar.GcNdMask) + } else if propName == "javaIdentifierIgnorable" { + c.err = addIdentifierIgnorable(set) + } else if propName == "javaISOControl" { + set.AddRuneRange(0, 0x1F) + set.AddRuneRange(0x7F, 0x9F) + } else if propName == "javaJavaIdentifierPart" { + c.err = uprops.AddCategory(set, uchar.GcLMask) + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcScMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcPcMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcNdMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcNlMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcMcMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcMnMask) + } + if c.err == nil { + c.err = addIdentifierIgnorable(set) + } + } else if propName == "javaJavaIdentifierStart" { + c.err = uprops.AddCategory(set, uchar.GcLMask) + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcNlMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcScMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcPcMask) + } + } else if propName == "javaLetter" { + c.err = uprops.AddCategory(set, uchar.GcLMask) + } else if propName == "javaLetterOrDigit" { + c.err = uprops.AddCategory(set, uchar.GcLMask) + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcNdMask) + } + } else if propName == "javaLowerCase" { + c.err = uprops.AddCategory(set, uchar.GcLlMask) + } else if propName == "javaMirrored" { + c.err = uprops.ApplyIntPropertyValue(set, uprops.UCharBidiMirrored, 1) + } else if propName == "javaSpaceChar" { + c.err = uprops.AddCategory(set, uchar.GcZMask) + } else if propName == "javaSupplementaryCodePoint" { + set.AddRuneRange(0x10000, uset.MaxValue) + } else if propName == "javaTitleCase" { + c.err = uprops.AddCategory(set, uchar.GcLtMask) + } else if propName == "javaUnicodeIdentifierStart" { + c.err = uprops.AddCategory(set, uchar.GcLMask) + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcNlMask) + } + } else if propName == "javaUnicodeIdentifierPart" { + c.err = uprops.AddCategory(set, uchar.GcLMask) + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcPcMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcNdMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcNlMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcMcMask) + } + if c.err == nil { + c.err = uprops.AddCategory(set, uchar.GcMnMask) + } + if c.err == nil { + c.err = addIdentifierIgnorable(set) + } + } else if propName == "javaUpperCase" { + c.err = uprops.AddCategory(set, uchar.GcLuMask) + } else if propName == "javaValidCodePoint" { + set.AddRuneRange(0, uset.MaxValue) + } else if propName == "javaWhitespace" { + c.err = uprops.AddCategory(set, uchar.GcZMask) + excl := uset.New() + excl.AddRune(0x0a) + excl.AddRune(0x2007) + excl.AddRune(0x202f) + set.RemoveAll(excl) + set.AddRuneRange(9, 0x0d) + set.AddRuneRange(0x1c, 0x1f) + } else { + c.error(PropertySyntax) + } + + if c.err == nil && !set.IsEmpty() && (usetFlags&uset.CaseInsensitive) != 0 { + set.CloseOver(uset.CaseInsensitive) + } + goto done + } + + // Unrecognized property. ICU didn't like it as it was, and none of the Java compatibility + // extensions matched it. + c.error(PropertySyntax) + +done: + if c.err != nil { + return nil + } + if negated { + set.Complement() + } + return set +} + +func addIdentifierIgnorable(set *uset.UnicodeSet) error { + set.AddRuneRange(0, 8) + set.AddRuneRange(0x0e, 0x1b) + set.AddRuneRange(0x7f, 0x9f) + + return uprops.AddCategory(set, uchar.GcCfMask) +} + +func (c *compiler) scanPosixProp() *uset.UnicodeSet { + var set *uset.UnicodeSet + + if !(c.c.char == chColon) { + panic("assertion failed: c.lastChar == ':'") + } + + savedScanIndex := c.scanIndex + savedScanPattern := c.p + savedQuoteMode := c.quoteMode + savedInBackslashQuote := c.inBackslashQuote + savedEOLComments := c.eolComments + savedLineNum := c.lineNum + savedCharNum := c.charNum + savedLastChar := c.lastChar + savedPeekChar := c.peekChar + savedC := c.c + + // Scan for a closing ]. A little tricky because there are some perverse + // edge cases possible. "[:abc\Qdef:] \E]" is a valid non-property expression, + // ending on the second closing ]. + var propName []rune + negated := false + + // Check for and consume the '^' in a negated POSIX property, e.g. [:^Letter:] + c.nextChar(&c.c) + if c.c.char == chUp { + negated = true + c.nextChar(&c.c) + } + + // Scan for the closing ":]", collecting the property name along the way. + sawPropSetTerminator := false + for { + propName = append(propName, c.c.char) + c.nextChar(&c.c) + if c.c.quoted || c.c.char == -1 { + // Escaped characters or end of input - either says this isn't a [:Property:] + break + } + if c.c.char == chColon { + c.nextChar(&c.c) + if c.c.char == chRBracket { + sawPropSetTerminator = true + break + } + } + } + + if sawPropSetTerminator { + set = c.createSetForProperty(string(propName), negated) + } else { + // No closing ']' - not a [:Property:] + // Restore the original scan position. + // The main scanner will retry the input as a normal set expression, + // not a [:Property:] expression. + c.scanIndex = savedScanIndex + c.p = savedScanPattern + c.quoteMode = savedQuoteMode + c.inBackslashQuote = savedInBackslashQuote + c.eolComments = savedEOLComments + c.lineNum = savedLineNum + c.charNum = savedCharNum + c.lastChar = savedLastChar + c.peekChar = savedPeekChar + c.c = savedC + } + + return set +} + +func (c *compiler) compileSet(set *uset.UnicodeSet) { + if set == nil { + return + } + // Remove any strings from the set. + // There shoudn't be any, but just in case. + // (Case Closure can add them; if we had a simple case closure available that + // ignored strings, that would be better.) + setSize := set.Len() + + switch setSize { + case 0: + // Set of no elements. Always fails to match. + c.appendOp(urxBacktrack, 0) + + case 1: + // The set contains only a single code point. Put it into + // the compiled pattern as a single char operation rather + // than a set, and discard the set itself. + c.literalChar(set.RuneAt(0)) + + default: + // The set contains two or more chars. (the normal case) + // Put it into the compiled pattern as a set. + // theSet->freeze(); + setNumber := len(c.out.sets) + c.out.sets = append(c.out.sets, set) + c.appendOp(urxSetref, setNumber) + } +} diff --git a/go/mysql/icuregex/compiler_table.go b/go/mysql/icuregex/compiler_table.go new file mode 100644 index 00000000000..e8cfe0d5e55 --- /dev/null +++ b/go/mysql/icuregex/compiler_table.go @@ -0,0 +1,357 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +type patternParseAction uint8 + +const ( + doSetBackslashD patternParseAction = iota + doBackslashh + doBackslashH + doSetLiteralEscaped + doOpenLookAheadNeg + doCompleteNamedBackRef + doPatStart + doBackslashS + doBackslashD + doNGStar + doNOP + doBackslashX + doSetLiteral + doContinueNamedCapture + doBackslashG + doBackslashR + doSetBegin + doSetBackslashv + doPossessivePlus + doPerlInline + doBackslashZ + doSetAddAmp + doSetBeginDifference1 + doIntervalError + doSetNegate + doIntervalInit + doSetIntersection2 + doPossessiveInterval + doRuleError + doBackslashW + doContinueNamedBackRef + doOpenNonCaptureParen + doExit + doSetNamedChar + doSetBackslashV + doConditionalExpr + doEscapeError + doBadOpenParenType + doPossessiveStar + doSetAddDash + doEscapedLiteralChar + doSetBackslashw + doIntervalUpperDigit + doBackslashv + doSetBackslashS + doSetNoCloseError + doSetProp + doBackslashB + doSetEnd + doSetRange + doMatchModeParen + doPlus + doBackslashV + doSetMatchMode + doBackslashz + doSetNamedRange + doOpenLookBehindNeg + doInterval + doBadNamedCapture + doBeginMatchMode + doBackslashd + doPatFinish + doNamedChar + doNGPlus + doSetDifference2 + doSetBackslashH + doCloseParen + doDotAny + doOpenCaptureParen + doEnterQuoteMode + doOpenAtomicParen + doBadModeFlag + doSetBackslashd + doSetFinish + doProperty + doBeginNamedBackRef + doBackRef + doOpt + doDollar + doBeginNamedCapture + doNGInterval + doSetOpError + doSetPosixProp + doSetBeginIntersection1 + doBackslashb + doSetBeginUnion + doIntevalLowerDigit + doSetBackslashh + doStar + doMatchMode + doBackslashA + doOpenLookBehind + doPossessiveOpt + doOrOperator + doBackslashw + doBackslashs + doLiteralChar + doSuppressComments + doCaret + doIntervalSame + doNGOpt + doOpenLookAhead + doSetBackslashW + doMismatchedParenErr + doSetBackslashs + rbbiLastAction +) + +// ------------------------------------------------------------------------------- +// +// RegexTableEl represents the structure of a row in the transition table +// for the pattern parser state machine. +// +// ------------------------------------------------------------------------------- +type regexTableEl struct { + action patternParseAction + charClass uint8 + nextState uint8 + pushState uint8 + nextChar bool +} + +var parseStateTable = []regexTableEl{ + {doNOP, 0, 0, 0, true}, + {doPatStart, 255, 2, 0, false}, // 1 start + {doLiteralChar, 254, 14, 0, true}, // 2 term + {doLiteralChar, 130, 14, 0, true}, // 3 + {doSetBegin, 91 /* [ */, 123, 205, true}, // 4 + {doNOP, 40 /* ( */, 27, 0, true}, // 5 + {doDotAny, 46 /* . */, 14, 0, true}, // 6 + {doCaret, 94 /* ^ */, 14, 0, true}, // 7 + {doDollar, 36 /* $ */, 14, 0, true}, // 8 + {doNOP, 92 /* \ */, 89, 0, true}, // 9 + {doOrOperator, 124 /* | */, 2, 0, true}, // 10 + {doCloseParen, 41 /* ) */, 255, 0, true}, // 11 + {doPatFinish, 253, 2, 0, false}, // 12 + {doRuleError, 255, 206, 0, false}, // 13 + {doNOP, 42 /* * */, 68, 0, true}, // 14 expr-quant + {doNOP, 43 /* + */, 71, 0, true}, // 15 + {doNOP, 63 /* ? */, 74, 0, true}, // 16 + {doIntervalInit, 123 /* { */, 77, 0, true}, // 17 + {doNOP, 40 /* ( */, 23, 0, true}, // 18 + {doNOP, 255, 20, 0, false}, // 19 + {doOrOperator, 124 /* | */, 2, 0, true}, // 20 expr-cont + {doCloseParen, 41 /* ) */, 255, 0, true}, // 21 + {doNOP, 255, 2, 0, false}, // 22 + {doSuppressComments, 63 /* ? */, 25, 0, true}, // 23 open-paren-quant + {doNOP, 255, 27, 0, false}, // 24 + {doNOP, 35 /* # */, 50, 14, true}, // 25 open-paren-quant2 + {doNOP, 255, 29, 0, false}, // 26 + {doSuppressComments, 63 /* ? */, 29, 0, true}, // 27 open-paren + {doOpenCaptureParen, 255, 2, 14, false}, // 28 + {doOpenNonCaptureParen, 58 /* : */, 2, 14, true}, // 29 open-paren-extended + {doOpenAtomicParen, 62 /* > */, 2, 14, true}, // 30 + {doOpenLookAhead, 61 /* = */, 2, 20, true}, // 31 + {doOpenLookAheadNeg, 33 /* ! */, 2, 20, true}, // 32 + {doNOP, 60 /* < */, 46, 0, true}, // 33 + {doNOP, 35 /* # */, 50, 2, true}, // 34 + {doBeginMatchMode, 105 /* i */, 53, 0, false}, // 35 + {doBeginMatchMode, 100 /* d */, 53, 0, false}, // 36 + {doBeginMatchMode, 109 /* m */, 53, 0, false}, // 37 + {doBeginMatchMode, 115 /* s */, 53, 0, false}, // 38 + {doBeginMatchMode, 117 /* u */, 53, 0, false}, // 39 + {doBeginMatchMode, 119 /* w */, 53, 0, false}, // 40 + {doBeginMatchMode, 120 /* x */, 53, 0, false}, // 41 + {doBeginMatchMode, 45 /* - */, 53, 0, false}, // 42 + {doConditionalExpr, 40 /* ( */, 206, 0, true}, // 43 + {doPerlInline, 123 /* { */, 206, 0, true}, // 44 + {doBadOpenParenType, 255, 206, 0, false}, // 45 + {doOpenLookBehind, 61 /* = */, 2, 20, true}, // 46 open-paren-lookbehind + {doOpenLookBehindNeg, 33 /* ! */, 2, 20, true}, // 47 + {doBeginNamedCapture, 129, 64, 0, false}, // 48 + {doBadOpenParenType, 255, 206, 0, false}, // 49 + {doNOP, 41 /* ) */, 255, 0, true}, // 50 paren-comment + {doMismatchedParenErr, 253, 206, 0, false}, // 51 + {doNOP, 255, 50, 0, true}, // 52 + {doMatchMode, 105 /* i */, 53, 0, true}, // 53 paren-flag + {doMatchMode, 100 /* d */, 53, 0, true}, // 54 + {doMatchMode, 109 /* m */, 53, 0, true}, // 55 + {doMatchMode, 115 /* s */, 53, 0, true}, // 56 + {doMatchMode, 117 /* u */, 53, 0, true}, // 57 + {doMatchMode, 119 /* w */, 53, 0, true}, // 58 + {doMatchMode, 120 /* x */, 53, 0, true}, // 59 + {doMatchMode, 45 /* - */, 53, 0, true}, // 60 + {doSetMatchMode, 41 /* ) */, 2, 0, true}, // 61 + {doMatchModeParen, 58 /* : */, 2, 14, true}, // 62 + {doBadModeFlag, 255, 206, 0, false}, // 63 + {doContinueNamedCapture, 129, 64, 0, true}, // 64 named-capture + {doContinueNamedCapture, 128, 64, 0, true}, // 65 + {doOpenCaptureParen, 62 /* > */, 2, 14, true}, // 66 + {doBadNamedCapture, 255, 206, 0, false}, // 67 + {doNGStar, 63 /* ? */, 20, 0, true}, // 68 quant-star + {doPossessiveStar, 43 /* + */, 20, 0, true}, // 69 + {doStar, 255, 20, 0, false}, // 70 + {doNGPlus, 63 /* ? */, 20, 0, true}, // 71 quant-plus + {doPossessivePlus, 43 /* + */, 20, 0, true}, // 72 + {doPlus, 255, 20, 0, false}, // 73 + {doNGOpt, 63 /* ? */, 20, 0, true}, // 74 quant-opt + {doPossessiveOpt, 43 /* + */, 20, 0, true}, // 75 + {doOpt, 255, 20, 0, false}, // 76 + {doNOP, 128, 79, 0, false}, // 77 interval-open + {doIntervalError, 255, 206, 0, false}, // 78 + {doIntevalLowerDigit, 128, 79, 0, true}, // 79 interval-lower + {doNOP, 44 /* , */, 83, 0, true}, // 80 + {doIntervalSame, 125 /* } */, 86, 0, true}, // 81 + {doIntervalError, 255, 206, 0, false}, // 82 + {doIntervalUpperDigit, 128, 83, 0, true}, // 83 interval-upper + {doNOP, 125 /* } */, 86, 0, true}, // 84 + {doIntervalError, 255, 206, 0, false}, // 85 + {doNGInterval, 63 /* ? */, 20, 0, true}, // 86 interval-type + {doPossessiveInterval, 43 /* + */, 20, 0, true}, // 87 + {doInterval, 255, 20, 0, false}, // 88 + {doBackslashA, 65 /* A */, 2, 0, true}, // 89 backslash + {doBackslashB, 66 /* B */, 2, 0, true}, // 90 + {doBackslashb, 98 /* b */, 2, 0, true}, // 91 + {doBackslashd, 100 /* d */, 14, 0, true}, // 92 + {doBackslashD, 68 /* D */, 14, 0, true}, // 93 + {doBackslashG, 71 /* G */, 2, 0, true}, // 94 + {doBackslashh, 104 /* h */, 14, 0, true}, // 95 + {doBackslashH, 72 /* H */, 14, 0, true}, // 96 + {doNOP, 107 /* k */, 115, 0, true}, // 97 + {doNamedChar, 78 /* N */, 14, 0, false}, // 98 + {doProperty, 112 /* p */, 14, 0, false}, // 99 + {doProperty, 80 /* P */, 14, 0, false}, // 100 + {doBackslashR, 82 /* R */, 14, 0, true}, // 101 + {doEnterQuoteMode, 81 /* Q */, 2, 0, true}, // 102 + {doBackslashS, 83 /* S */, 14, 0, true}, // 103 + {doBackslashs, 115 /* s */, 14, 0, true}, // 104 + {doBackslashv, 118 /* v */, 14, 0, true}, // 105 + {doBackslashV, 86 /* V */, 14, 0, true}, // 106 + {doBackslashW, 87 /* W */, 14, 0, true}, // 107 + {doBackslashw, 119 /* w */, 14, 0, true}, // 108 + {doBackslashX, 88 /* X */, 14, 0, true}, // 109 + {doBackslashZ, 90 /* Z */, 2, 0, true}, // 110 + {doBackslashz, 122 /* z */, 2, 0, true}, // 111 + {doBackRef, 128, 14, 0, true}, // 112 + {doEscapeError, 253, 206, 0, false}, // 113 + {doEscapedLiteralChar, 255, 14, 0, true}, // 114 + {doBeginNamedBackRef, 60 /* < */, 117, 0, true}, // 115 named-backref + {doBadNamedCapture, 255, 206, 0, false}, // 116 + {doContinueNamedBackRef, 129, 119, 0, true}, // 117 named-backref-2 + {doBadNamedCapture, 255, 206, 0, false}, // 118 + {doContinueNamedBackRef, 129, 119, 0, true}, // 119 named-backref-3 + {doContinueNamedBackRef, 128, 119, 0, true}, // 120 + {doCompleteNamedBackRef, 62 /* > */, 14, 0, true}, // 121 + {doBadNamedCapture, 255, 206, 0, false}, // 122 + {doSetNegate, 94 /* ^ */, 126, 0, true}, // 123 set-open + {doSetPosixProp, 58 /* : */, 128, 0, false}, // 124 + {doNOP, 255, 126, 0, false}, // 125 + {doSetLiteral, 93 /* ] */, 141, 0, true}, // 126 set-open2 + {doNOP, 255, 131, 0, false}, // 127 + {doSetEnd, 93 /* ] */, 255, 0, true}, // 128 set-posix + {doNOP, 58 /* : */, 131, 0, false}, // 129 + {doRuleError, 255, 206, 0, false}, // 130 + {doSetEnd, 93 /* ] */, 255, 0, true}, // 131 set-start + {doSetBeginUnion, 91 /* [ */, 123, 148, true}, // 132 + {doNOP, 92 /* \ */, 191, 0, true}, // 133 + {doNOP, 45 /* - */, 137, 0, true}, // 134 + {doNOP, 38 /* & */, 139, 0, true}, // 135 + {doSetLiteral, 255, 141, 0, true}, // 136 + {doRuleError, 45 /* - */, 206, 0, false}, // 137 set-start-dash + {doSetAddDash, 255, 141, 0, false}, // 138 + {doRuleError, 38 /* & */, 206, 0, false}, // 139 set-start-amp + {doSetAddAmp, 255, 141, 0, false}, // 140 + {doSetEnd, 93 /* ] */, 255, 0, true}, // 141 set-after-lit + {doSetBeginUnion, 91 /* [ */, 123, 148, true}, // 142 + {doNOP, 45 /* - */, 178, 0, true}, // 143 + {doNOP, 38 /* & */, 169, 0, true}, // 144 + {doNOP, 92 /* \ */, 191, 0, true}, // 145 + {doSetNoCloseError, 253, 206, 0, false}, // 146 + {doSetLiteral, 255, 141, 0, true}, // 147 + {doSetEnd, 93 /* ] */, 255, 0, true}, // 148 set-after-set + {doSetBeginUnion, 91 /* [ */, 123, 148, true}, // 149 + {doNOP, 45 /* - */, 171, 0, true}, // 150 + {doNOP, 38 /* & */, 166, 0, true}, // 151 + {doNOP, 92 /* \ */, 191, 0, true}, // 152 + {doSetNoCloseError, 253, 206, 0, false}, // 153 + {doSetLiteral, 255, 141, 0, true}, // 154 + {doSetEnd, 93 /* ] */, 255, 0, true}, // 155 set-after-range + {doSetBeginUnion, 91 /* [ */, 123, 148, true}, // 156 + {doNOP, 45 /* - */, 174, 0, true}, // 157 + {doNOP, 38 /* & */, 176, 0, true}, // 158 + {doNOP, 92 /* \ */, 191, 0, true}, // 159 + {doSetNoCloseError, 253, 206, 0, false}, // 160 + {doSetLiteral, 255, 141, 0, true}, // 161 + {doSetBeginUnion, 91 /* [ */, 123, 148, true}, // 162 set-after-op + {doSetOpError, 93 /* ] */, 206, 0, false}, // 163 + {doNOP, 92 /* \ */, 191, 0, true}, // 164 + {doSetLiteral, 255, 141, 0, true}, // 165 + {doSetBeginIntersection1, 91 /* [ */, 123, 148, true}, // 166 set-set-amp + {doSetIntersection2, 38 /* & */, 162, 0, true}, // 167 + {doSetAddAmp, 255, 141, 0, false}, // 168 + {doSetIntersection2, 38 /* & */, 162, 0, true}, // 169 set-lit-amp + {doSetAddAmp, 255, 141, 0, false}, // 170 + {doSetBeginDifference1, 91 /* [ */, 123, 148, true}, // 171 set-set-dash + {doSetDifference2, 45 /* - */, 162, 0, true}, // 172 + {doSetAddDash, 255, 141, 0, false}, // 173 + {doSetDifference2, 45 /* - */, 162, 0, true}, // 174 set-range-dash + {doSetAddDash, 255, 141, 0, false}, // 175 + {doSetIntersection2, 38 /* & */, 162, 0, true}, // 176 set-range-amp + {doSetAddAmp, 255, 141, 0, false}, // 177 + {doSetDifference2, 45 /* - */, 162, 0, true}, // 178 set-lit-dash + {doSetAddDash, 91 /* [ */, 141, 0, false}, // 179 + {doSetAddDash, 93 /* ] */, 141, 0, false}, // 180 + {doNOP, 92 /* \ */, 183, 0, true}, // 181 + {doSetRange, 255, 155, 0, true}, // 182 + {doSetOpError, 115 /* s */, 206, 0, false}, // 183 set-lit-dash-escape + {doSetOpError, 83 /* S */, 206, 0, false}, // 184 + {doSetOpError, 119 /* w */, 206, 0, false}, // 185 + {doSetOpError, 87 /* W */, 206, 0, false}, // 186 + {doSetOpError, 100 /* d */, 206, 0, false}, // 187 + {doSetOpError, 68 /* D */, 206, 0, false}, // 188 + {doSetNamedRange, 78 /* N */, 155, 0, false}, // 189 + {doSetRange, 255, 155, 0, true}, // 190 + {doSetProp, 112 /* p */, 148, 0, false}, // 191 set-escape + {doSetProp, 80 /* P */, 148, 0, false}, // 192 + {doSetNamedChar, 78 /* N */, 141, 0, false}, // 193 + {doSetBackslashs, 115 /* s */, 155, 0, true}, // 194 + {doSetBackslashS, 83 /* S */, 155, 0, true}, // 195 + {doSetBackslashw, 119 /* w */, 155, 0, true}, // 196 + {doSetBackslashW, 87 /* W */, 155, 0, true}, // 197 + {doSetBackslashd, 100 /* d */, 155, 0, true}, // 198 + {doSetBackslashD, 68 /* D */, 155, 0, true}, // 199 + {doSetBackslashh, 104 /* h */, 155, 0, true}, // 200 + {doSetBackslashH, 72 /* H */, 155, 0, true}, // 201 + {doSetBackslashv, 118 /* v */, 155, 0, true}, // 202 + {doSetBackslashV, 86 /* V */, 155, 0, true}, // 203 + {doSetLiteralEscaped, 255, 141, 0, true}, // 204 + {doSetFinish, 255, 14, 0, false}, // 205 set-finish + {doExit, 255, 206, 0, true}, // 206 errorDeath +} diff --git a/go/mysql/icuregex/debug.go b/go/mysql/icuregex/debug.go new file mode 100644 index 00000000000..92c43e704d7 --- /dev/null +++ b/go/mysql/icuregex/debug.go @@ -0,0 +1,151 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +import ( + "fmt" + "io" +) + +func (pat *Pattern) Dump(w io.Writer) { + fmt.Fprintf(w, "Original Pattern: \"%s\"\n", pat.pattern) + fmt.Fprintf(w, " Min Match Length: %d\n", pat.minMatchLen) + fmt.Fprintf(w, " Match Start Type: %v\n", pat.startType) + if pat.startType == startString { + fmt.Fprintf(w, " Initial match string: \"%s\"\n", string(pat.literalText[pat.initialStringIdx:pat.initialStringIdx+pat.initialStringLen])) + } else if pat.startType == startSet { + fmt.Fprintf(w, " Match First Chars: %s\n", pat.initialChars.String()) + } else if pat.startType == startChar { + fmt.Fprintf(w, " First char of Match: ") + if pat.initialChar > 0x20 { + fmt.Fprintf(w, "'%c'\n", pat.initialChar) + } else { + fmt.Fprintf(w, "%#x\n", pat.initialChar) + } + } + + fmt.Fprintf(w, "Named Capture Groups:\n") + if len(pat.namedCaptureMap) == 0 { + fmt.Fprintf(w, " None\n") + } else { + for name, number := range pat.namedCaptureMap { + fmt.Fprintf(w, " %d\t%s\n", number, name) + } + } + + fmt.Fprintf(w, "\nIndex Binary Type Operand\n-------------------------------------------\n") + for idx := range pat.compiledPat { + pat.dumpOp(w, idx) + } + fmt.Fprintf(w, "\n\n") +} + +func (pat *Pattern) dumpOp(w io.Writer, index int) { + op := pat.compiledPat[index] + val := op.value() + opType := op.typ() + pinnedType := opType + if int(pinnedType) >= len(urxOpcodeNames) { + pinnedType = 0 + } + + fmt.Fprintf(w, "%4d %08x %-15s ", index, op, urxOpcodeNames[pinnedType]) + + switch opType { + case urxNop, + urxDotany, + urxDotanyAll, + urxFail, + urxCaret, + urxDollar, + urxBackslashG, + urxBackslashX, + urxEnd, + urxDollarM, + urxCaretM: + // Types with no operand field of interest. + + case urxReservedOp, + urxStartCapture, + urxEndCapture, + urxStateSave, + urxJmp, + urxJmpSav, + urxJmpSavX, + urxBackslashB, + urxBackslashBu, + urxBackslashD, + urxBackslashZ, + urxStringLen, + urxCtrInit, + urxCtrInitNg, + utxCtrLoop, + urxCtrLoopNg, + urxRelocOprnd, + urxStoSp, + urxLdSp, + urxBackref, + urxStoInpLoc, + urxJmpx, + urxLaStart, + urxLaEnd, + urxBackrefI, + urxLbStart, + urxLbCont, + urxLbEnd, + urxLbnCount, + urxLbnEnd, + urxLoopC, + urxLoopDotI, + urxBackslashH, + urxBackslashR, + urxBackslashV: + // types with an integer operand field. + fmt.Fprintf(w, "%d", val) + + case urxOnechar, urcOnecharI: + if val < 0x20 { + fmt.Fprintf(w, "%#x", val) + } else { + fmt.Fprintf(w, "'%c'", rune(val)) + } + + case urxString, urxStringI: + lengthOp := pat.compiledPat[index+1] + length := lengthOp.value() + fmt.Fprintf(w, "%q", string(pat.literalText[val:val+length])) + + case urxSetref, urxLoopSrI: + fmt.Fprintf(w, "%s", pat.sets[val].String()) + + case urxStaticSetref, urxStatSetrefN: + if (val & urxNegSet) != 0 { + fmt.Fprintf(w, "NOT ") + val &= ^urxNegSet + } + fmt.Fprintf(w, "%s", staticPropertySets[val].String()) + + default: + fmt.Fprintf(w, "??????") + } + fmt.Fprintf(w, "\n") +} diff --git a/go/mysql/icuregex/error.go b/go/mysql/icuregex/error.go new file mode 100644 index 00000000000..219ddcf602b --- /dev/null +++ b/go/mysql/icuregex/error.go @@ -0,0 +1,149 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +import ( + "fmt" + "strings" +) + +type CompileError struct { + Code CompileErrorCode + Line int + Offset int + Context string +} + +func (e *CompileError) Error() string { + var out strings.Builder + switch e.Code { + case InternalError: + out.WriteString("Internal error") + case RuleSyntax: + out.WriteString("Syntax error") + case BadEscapeSequence: + out.WriteString("Bad escape sequence") + case PropertySyntax: + out.WriteString("Property syntax error") + case Unimplemented: + out.WriteString("Unimplemented") + case MismatchedParen: + out.WriteString("Mismatched parentheses") + case NumberTooBig: + out.WriteString("Number too big") + case BadInterval: + out.WriteString("Bad interval") + case MaxLtMin: + out.WriteString("Max less than min") + case InvalidBackRef: + out.WriteString("Invalid back reference") + case InvalidFlag: + out.WriteString("Invalid flag") + case LookBehindLimit: + out.WriteString("Look behind limit") + case MissingCloseBracket: + out.WriteString("Missing closing ]") + case InvalidRange: + out.WriteString("Invalid range") + case PatternTooBig: + out.WriteString("Pattern too big") + case InvalidCaptureGroupName: + out.WriteString("Invalid capture group name") + } + _, _ = fmt.Fprintf(&out, " in regular expression on line %d, character %d: `%s`", e.Line, e.Offset, e.Context) + + return out.String() +} + +type MatchError struct { + Code MatchErrorCode + Pattern string + Position int + Input []rune +} + +const maxMatchInputLength = 20 + +func (e *MatchError) Error() string { + var out strings.Builder + switch e.Code { + case StackOverflow: + out.WriteString("Stack overflow") + case TimeOut: + out.WriteString("Timeout") + } + + input := e.Input + if len(input) > maxMatchInputLength { + var b []rune + start := e.Position - maxMatchInputLength/2 + if start < 0 { + start = 0 + } else { + b = append(b, '.', '.', '.') + } + end := start + maxMatchInputLength + trailing := true + if end > len(input) { + end = len(input) + trailing = false + } + b = append(b, input[start:end]...) + if trailing { + b = append(b, '.', '.', '.') + } + input = b + } + _, _ = fmt.Fprintf(&out, " for expression `%s` at position %d in: %q", e.Pattern, e.Position, string(input)) + + return out.String() +} + +type Code int32 + +type CompileErrorCode int32 + +const ( + InternalError CompileErrorCode = iota + 1 /**< An internal error (bug) was detected. */ + RuleSyntax /**< Syntax error in regexp pattern. */ + BadEscapeSequence /**< Unrecognized backslash escape sequence in pattern */ + PropertySyntax /**< Incorrect Unicode property */ + Unimplemented /**< Use of regexp feature that is not yet implemented. */ + MismatchedParen /**< Incorrectly nested parentheses in regexp pattern. */ + NumberTooBig /**< Decimal number is too large. */ + BadInterval /**< Error in {min,max} interval */ + MaxLtMin /**< In {min,max}, max is less than min. */ + InvalidBackRef /**< Back-reference to a non-existent capture group. */ + InvalidFlag /**< Invalid value for match mode flags. */ + LookBehindLimit /**< Look-Behind pattern matches must have a bounded maximum length. */ + MissingCloseBracket /**< Missing closing bracket on a bracket expression. */ + InvalidRange /**< In a character range [x-y], x is greater than y. */ + PatternTooBig /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */ + InvalidCaptureGroupName /**< Invalid capture group name. @stable ICU 55 */ +) + +type MatchErrorCode int32 + +const ( + StackOverflow MatchErrorCode = iota /**< Regular expression backtrack stack overflow. */ + TimeOut /**< Maximum allowed match time exceeded */ +) diff --git a/go/mysql/icuregex/errors/error.go b/go/mysql/icuregex/errors/error.go new file mode 100644 index 00000000000..f03a5157acf --- /dev/null +++ b/go/mysql/icuregex/errors/error.go @@ -0,0 +1,27 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package errors + +import "errors" + +var ErrIllegalArgument = errors.New("illegal argument") +var ErrUnsupported = errors.New("unsupported") diff --git a/go/mysql/icuregex/icu_test.go b/go/mysql/icuregex/icu_test.go new file mode 100644 index 00000000000..42c98dde5db --- /dev/null +++ b/go/mysql/icuregex/icu_test.go @@ -0,0 +1,422 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex_test + +import ( + "bufio" + "errors" + "fmt" + "io" + "os" + "regexp" + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "vitess.io/vitess/go/mysql/icuregex" + "vitess.io/vitess/go/mysql/icuregex/internal/pattern" +) + +var ErrSkip = errors.New("ignored test") + +type Matcher int8 + +const ( + FuncFind Matcher = iota + FuncMatches + FuncLookingAt +) + +type Expectation int8 + +const ( + Unknown Expectation = iota + Expected + NotExpected +) + +type TestPattern struct { + Line string + Lineno int + + Pattern string + Flags icuregex.RegexpFlag + Options struct { + MatchFunc Matcher + FindCount int + MatchOnly bool + MustError bool + Dump bool + HitEnd Expectation + RequireEnd Expectation + } + Input string + Groups []TestGroup +} + +type TestGroup struct { + Start, End int +} + +var parsePattern = regexp.MustCompile(`<(/?)(r|[0-9]+)>`) + +func (tp *TestPattern) parseFlags(line string) (string, error) { + for len(line) > 0 { + switch line[0] { + case '"', '\'', '/': + return line, nil + case ' ', '\t': + case 'i': + tp.Flags |= icuregex.CaseInsensitive + case 'x': + tp.Flags |= icuregex.Comments + case 's': + tp.Flags |= icuregex.DotAll + case 'm': + tp.Flags |= icuregex.Multiline + case 'e': + tp.Flags |= icuregex.ErrorOnUnknownEscapes + case 'D': + tp.Flags |= icuregex.UnixLines + case 'Q': + tp.Flags |= icuregex.Literal + case '2', '3', '4', '5', '6', '7', '8', '9': + tp.Options.FindCount = int(line[0] - '0') + case 'G': + tp.Options.MatchOnly = true + case 'E': + tp.Options.MustError = true + case 'd': + tp.Options.Dump = true + case 'L': + tp.Options.MatchFunc = FuncLookingAt + case 'M': + tp.Options.MatchFunc = FuncMatches + case 'v': + tp.Options.MustError = !icuregex.BreakIteration + case 'a', 'b': + return "", ErrSkip + case 'z': + tp.Options.HitEnd = Expected + case 'Z': + tp.Options.HitEnd = NotExpected + case 'y': + tp.Options.RequireEnd = Expected + case 'Y': + tp.Options.RequireEnd = NotExpected + default: + return "", fmt.Errorf("unexpected modifier '%c'", line[0]) + } + line = line[1:] + } + return "", io.ErrUnexpectedEOF +} + +func (tp *TestPattern) parseMatch(orig string) error { + input, ok := pattern.Unescape(orig) + if !ok { + return fmt.Errorf("failed to unquote input: %s", orig) + } + + var detagged []rune + var last int + + m := parsePattern.FindAllStringSubmatchIndex(input, -1) + for _, g := range m { + detagged = append(detagged, []rune(input[last:g[0]])...) + last = g[1] + + closing := input[g[2]:g[3]] == "/" + groupNum := input[g[4]:g[5]] + if groupNum == "r" { + return ErrSkip + } + num, err := strconv.Atoi(groupNum) + if err != nil { + return fmt.Errorf("bad group number %q: %w", groupNum, err) + } + + if num >= len(tp.Groups) { + grp := make([]TestGroup, num+1) + for i := range grp { + grp[i].Start = -1 + grp[i].End = -1 + } + copy(grp, tp.Groups) + tp.Groups = grp + } + + if closing { + tp.Groups[num].End = len(detagged) + } else { + tp.Groups[num].Start = len(detagged) + } + } + + detagged = append(detagged, []rune(input[last:])...) + tp.Input = string(detagged) + return nil +} + +func ParseTestFile(t testing.TB, filename string) []TestPattern { + f, err := os.Open(filename) + if err != nil { + t.Fatalf("failed to open test data: %v", err) + } + + defer f.Close() + scanner := bufio.NewScanner(f) + var lineno int + var patterns []TestPattern + + errFunc := func(err error) { + if err == ErrSkip { + return + } + t.Errorf("Parse error: %v\n%03d: %s", err, lineno, scanner.Text()) + } + + for scanner.Scan() { + lineno++ + line := scanner.Text() + line = strings.TrimSpace(line) + + if len(line) == 0 || line[0] == '#' { + continue + } + + var tp TestPattern + tp.Line = line + tp.Lineno = lineno + + idx := strings.IndexByte(line[1:], line[0]) + + tp.Pattern = line[1 : idx+1] + line, err = tp.parseFlags(line[idx+2:]) + if err != nil { + errFunc(err) + continue + } + + idx = strings.IndexByte(line[1:], line[0]) + err = tp.parseMatch(line[1 : idx+1]) + if err != nil { + errFunc(err) + continue + } + + patterns = append(patterns, tp) + } + + if err := scanner.Err(); err != nil { + t.Fatal(err) + } + return patterns +} + +func (tp *TestPattern) fail(t testing.TB, msg string, args ...any) bool { + t.Helper() + msg = fmt.Sprintf(msg, args...) + t.Errorf("%s (in line %d)\nregexp: %s\ninput: %q\noriginal: %s", msg, tp.Lineno, tp.Pattern, tp.Input, tp.Line) + return false +} + +func (tp *TestPattern) Test(t testing.TB) bool { + re, err := func() (re *icuregex.Pattern, err error) { + defer func() { + if r := recover(); r != nil { + err = fmt.Errorf("PANIC: %v", r) + } + }() + re, err = icuregex.CompileString(tp.Pattern, tp.Flags) + return + }() + if err != nil { + if tp.Options.MustError { + return true + } + + return tp.fail(t, "unexpected parser failure: %v", err) + } + if tp.Options.MustError { + return tp.fail(t, "parse failure expected") + } + + matcher := re.Match(tp.Input) + var isMatch bool + var findCount = tp.Options.FindCount + if findCount == 0 { + findCount = 1 + } + + for i := 0; i < findCount; i++ { + isMatch, err = func() (bool, error) { + defer func() { + if r := recover(); r != nil { + tp.fail(t, "unexpected match failure: %v", r) + } + }() + switch tp.Options.MatchFunc { + case FuncMatches: + return matcher.Matches() + case FuncLookingAt: + return matcher.LookingAt() + case FuncFind: + return matcher.Find() + default: + panic("invalid MatchFunc") + } + }() + } + + require.NoError(t, err) + + if !isMatch && len(tp.Groups) > 0 { + return tp.fail(t, "Match expected, but none found.") + } + if isMatch && len(tp.Groups) == 0 { + return tp.fail(t, "No match expected, but found one at position %d", matcher.Start()) + } + if tp.Options.MatchOnly { + return true + } + + for i := 0; i < matcher.GroupCount(); i++ { + expectedStart := -1 + expectedEnd := -1 + + if i < len(tp.Groups) { + expectedStart = tp.Groups[i].Start + expectedEnd = tp.Groups[i].End + } + if gotStart := matcher.StartForGroup(i); gotStart != expectedStart { + return tp.fail(t, "Incorrect start position for group %d. Expected %d, got %d", i, expectedStart, gotStart) + } + if gotEnd := matcher.EndForGroup(i); gotEnd != expectedEnd { + return tp.fail(t, "Incorrect end position for group %d. Expected %d, got %d", i, expectedEnd, gotEnd) + } + } + + if matcher.GroupCount()+1 < len(tp.Groups) { + return tp.fail(t, "Expected %d capture groups, found %d", len(tp.Groups)-1, matcher.GroupCount()) + } + + if tp.Options.HitEnd == Expected && !matcher.HitEnd() { + return tp.fail(t, "HitEnd() returned false. Expected true") + } + if tp.Options.HitEnd == NotExpected && matcher.HitEnd() { + return tp.fail(t, "HitEnd() returned true. Expected false") + } + + if tp.Options.RequireEnd == Expected && !matcher.RequireEnd() { + return tp.fail(t, "RequireEnd() returned false. Expected true") + } + if tp.Options.RequireEnd == NotExpected && matcher.RequireEnd() { + return tp.fail(t, "RequireEnd() returned true. Expected false") + } + + return true +} + +func TestICU(t *testing.T) { + pats := ParseTestFile(t, "testdata/regextst.txt") + + var valid int + + for _, p := range pats { + if p.Test(t) { + valid++ + } + } + + t.Logf("%d/%d (%.02f)", valid, len(pats), float64(valid)/float64(len(pats))) +} + +func TestICUExtended(t *testing.T) { + // This tests additional cases that aren't covered in the + // copied ICU test suite. + pats := ParseTestFile(t, "testdata/regextst_extended.txt") + + var valid int + + for _, p := range pats { + if p.Test(t) { + valid++ + } + } + + t.Logf("%d/%d (%.02f)", valid, len(pats), float64(valid)/float64(len(pats))) +} + +func TestCornerCases(t *testing.T) { + var cases = []struct { + Pattern string + Input string + Flags icuregex.RegexpFlag + Match bool + }{ + {`xyz$`, "xyz\n", 0, true}, + {`a*+`, "abbxx", 0, true}, + {`(ABC){1,2}+ABC`, "ABCABCABC", 0, true}, + {`(ABC){2,3}+ABC`, "ABCABCABC", 0, false}, + {`(abc)*+a`, "abcabcabc", 0, false}, + {`(abc)*+a`, "abcabcab", 0, true}, + {`a\N{LATIN SMALL LETTER B}c`, "abc", 0, true}, + {`a.b`, "a\rb", icuregex.UnixLines, true}, + {`a.b`, "a\rb", 0, false}, + {`(?d)abc$`, "abc\r", 0, false}, + {`[ \b]`, "b", 0, true}, + {`[abcd-\N{LATIN SMALL LETTER G}]+`, "xyz-abcdefghij-", 0, true}, + {`[[abcd]&&[ac]]+`, "bacacd", 0, true}, + } + + for _, tc := range cases { + t.Run(tc.Pattern, func(t *testing.T) { + _, err := icuregex.CompileString(tc.Pattern, tc.Flags) + if err != nil { + t.Fatal(err) + } + }) + } +} + +func TestOne(t *testing.T) { + const Pattern = `\p{CaseIgnorable}` + const Input = "foo.bar" + const Flags = 0 + + re, err := icuregex.CompileString(Pattern, Flags) + if err != nil { + t.Fatalf("compilation failed: %v", err) + } + + re.Dump(os.Stderr) + + m := icuregex.NewMatcher(re) + m.Dumper(os.Stderr) + m.ResetString(Input) + found, err := m.Find() + require.NoError(t, err) + t.Logf("match = %v", found) +} diff --git a/go/mysql/icuregex/internal/bytestrie/bytes_trie.go b/go/mysql/icuregex/internal/bytestrie/bytes_trie.go new file mode 100644 index 00000000000..aff80dc3e69 --- /dev/null +++ b/go/mysql/icuregex/internal/bytestrie/bytes_trie.go @@ -0,0 +1,354 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package bytestrie + +type BytesTrie struct { + pos []byte + original []byte + remainingMatchLength int32 +} + +func New(pos []byte) BytesTrie { + return BytesTrie{pos: pos, original: pos, remainingMatchLength: -1} +} + +type result int32 + +const ( /** + * The input unit(s) did not continue a matching string. + * Once current()/next() return NO_MATCH, + * all further calls to current()/next() will also return NO_MATCH, + * until the trie is reset to its original state or to a saved state. + * @stable ICU 4.8 + */ + noMatch result = iota + /** + * The input unit(s) continued a matching string + * but there is no value for the string so far. + * (It is a prefix of a longer string.) + * @stable ICU 4.8 + */ + noValue + /** + * The input unit(s) continued a matching string + * and there is a value for the string so far. + * This value will be returned by getValue(). + * No further input byte/unit can continue a matching string. + * @stable ICU 4.8 + */ + finalValue + /** + * The input unit(s) continued a matching string + * and there is a value for the string so far. + * This value will be returned by getValue(). + * Another input byte/unit can continue a matching string. + * @stable ICU 4.8 + */ + intermediateValue +) + +const ( + maxBranchLinearSubNodeLength = 5 + + // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node. + minLinearMatch = 0x10 + maxLinearMatchLength = 0x10 + + // 20..ff: Variable-length value node. + // If odd, the value is final. (Otherwise, intermediate value or jump delta.) + // Then shift-right by 1 bit. + // The remaining lead byte value indicates the number of following bytes (0..4) + // and contains the value's top bits. + minValueLead = minLinearMatch + maxLinearMatchLength // 0x20 + // It is a final value if bit 0 is set. + valueIsFinal = 1 + + // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds. + minOneByteValueLead = minValueLead / 2 // 0x10 + maxOneByteValue = 0x40 // At least 6 bits in the first byte. + + minTwoByteValueLead = minOneByteValueLead + maxOneByteValue + 1 // 0x51 + maxTwoByteValue = 0x1aff + minThreeByteValueLead = minTwoByteValueLead + (maxTwoByteValue >> 8) + 1 // 0x6c + fourByteValueLead = 0x7e + + // Compact delta integers. + maxOneByteDelta = 0xbf + minTwoByteDeltaLead = maxOneByteDelta + 1 // 0xc0 + minThreeByteDeltaLead = 0xf0 + fourByteDeltaLead = 0xfe +) + +func (bt *BytesTrie) ContainsName(name string) bool { + result := noValue + for _, c := range []byte(name) { + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + if c == 0x2d || c == 0x5f || c == 0x20 || (0x09 <= c && c <= 0x0d) { + continue + } + if result&1 == 0 { + return false + } + result = bt.next(int32(c)) + } + return result >= finalValue +} + +func (bt *BytesTrie) next(inByte int32) result { + pos := bt.pos + if pos == nil { + return noMatch + } + if inByte < 0 { + inByte += 0x100 + } + length := bt.remainingMatchLength // Actual remaining match length minus 1. + if length >= 0 { + match := inByte == int32(pos[0]) + pos = pos[1:] + // Remaining part of a linear-match node. + if match { + length = length - 1 + bt.remainingMatchLength = length + bt.pos = pos + if length < 0 { + node := int32(pos[0]) + if node >= minValueLead { + return bt.valueResult(node) + } + } + return noValue + } + bt.stop() + return noMatch + } + return bt.nextImpl(pos, inByte) +} + +func (bt *BytesTrie) nextImpl(pos []byte, inByte int32) result { + for { + node := int32(pos[0]) + pos = pos[1:] + if node < minLinearMatch { + return bt.branchNext(pos, node, inByte) + } else if node < minValueLead { + // Match the first of length+1 bytes. + length := node - minLinearMatch // Actual match length minus 1. + match := inByte == int32(pos[0]) + pos = pos[1:] + if match { + length = length - 1 + bt.remainingMatchLength = length + bt.pos = pos + if length < 0 { + node = int32(pos[0]) + if node >= minValueLead { + return bt.valueResult(node) + } + } + return noValue + } + // No match. + break + } else if (node & valueIsFinal) != 0 { + // No further matching bytes. + break + } else { + // Skip intermediate value. + pos = bt.skipValue2(pos, node) + // The next node must not also be a value node. + } + } + bt.stop() + return noMatch +} + +func (bt *BytesTrie) stop() { + bt.pos = nil +} + +func (bt *BytesTrie) valueResult(node int32) result { + return intermediateValue - result(node&valueIsFinal) +} + +func (bt *BytesTrie) branchNext(pos []byte, length int32, inByte int32) result { + // Branch according to the current unit. + if length == 0 { + length = int32(pos[0]) + pos = pos[1:] + } + length++ + // The length of the branch is the number of units to select from. + // The data structure encodes a binary search. + for length > maxBranchLinearSubNodeLength { + p := int32(pos[0]) + pos = pos[1:] + if inByte < p { + length >>= 1 + pos = bt.jumpByDelta(pos) + } else { + length = length - (length >> 1) + pos = bt.skipDelta(pos) + } + } + // Drop down to linear search for the last few bytes. + // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 + // and divides length by 2. + for { + p := int32(pos[0]) + pos = pos[1:] + if inByte == p { + var result result + node := int32(pos[0]) + if (node & valueIsFinal) != 0 { + // Leave the final value for getValue() to read. + result = finalValue + } else { + // Use the non-final value as the jump delta. + pos = pos[1:] + // int32_t delta=readValue(pos, node>>1); + node >>= 1 + var delta int32 + if node < minTwoByteValueLead { + delta = node - minOneByteValueLead + } else if node < minThreeByteValueLead { + delta = ((node - minTwoByteValueLead) << 8) | int32(pos[0]) + pos = pos[1:] + } else if node < fourByteValueLead { + delta = ((node - minThreeByteValueLead) << 16) | (int32(pos[0]) << 8) | int32(pos[1]) + pos = pos[2:] + } else if node == fourByteValueLead { + delta = (int32(pos[0]) << 16) | (int32(pos[1]) << 8) | int32(pos[2]) + pos = pos[3:] + } else { + delta = (int32(pos[0]) << 24) | (int32(pos[1]) << 16) | (int32(pos[2]) << 8) | int32(pos[3]) + pos = pos[4:] + } + // end readValue() + pos = pos[delta:] + node = int32(pos[0]) + if node >= minValueLead { + result = bt.valueResult(node) + } else { + result = noValue + } + } + bt.pos = pos + return result + } + length-- + pos = bt.skipValue1(pos) + if length <= 1 { + break + } + } + p := int32(pos[0]) + pos = pos[1:] + if inByte == p { + bt.pos = pos + node := int32(pos[0]) + if node >= minValueLead { + return bt.valueResult(node) + } + return noValue + } + bt.stop() + return noMatch +} + +func (bt *BytesTrie) skipValue1(pos []byte) []byte { + leadByte := int32(pos[0]) + return bt.skipValue2(pos[1:], leadByte) +} + +func (bt *BytesTrie) skipValue2(pos []byte, leadByte int32) []byte { + if leadByte >= (minTwoByteValueLead << 1) { + if leadByte < (minThreeByteValueLead << 1) { + pos = pos[1:] + } else if leadByte < (fourByteValueLead << 1) { + pos = pos[2:] + } else { + pos = pos[3+((leadByte>>1)&1):] + } + } + return pos +} + +func (bt *BytesTrie) skipDelta(pos []byte) []byte { + delta := int32(pos[0]) + pos = pos[1:] + if delta >= minTwoByteDeltaLead { + if delta < minThreeByteDeltaLead { + pos = pos[1:] + } else if delta < fourByteDeltaLead { + pos = pos[2:] + } else { + pos = pos[3+(delta&1):] + } + } + return pos +} + +func (bt *BytesTrie) jumpByDelta(pos []byte) []byte { + delta := int32(pos[0]) + pos = pos[1:] + if delta < minTwoByteDeltaLead { + // nothing to do + } else if delta < minThreeByteDeltaLead { + delta = ((delta - minTwoByteDeltaLead) << 8) | int32(pos[0]) + pos = pos[1:] + } else if delta < fourByteDeltaLead { + delta = ((delta - minThreeByteDeltaLead) << 16) | (int32(pos[0]) << 8) | int32(pos[1]) + pos = pos[2:] + } else if delta == fourByteDeltaLead { + delta = (int32(pos[0]) << 16) | (int32(pos[1]) << 8) | int32(pos[2]) + pos = pos[3:] + } else { + delta = (int32(pos[0]) << 24) | (int32(pos[1]) << 16) | (int32(pos[2]) << 8) | int32(pos[3]) + pos = pos[4:] + } + return pos[delta:] +} + +func (bt *BytesTrie) GetValue() int32 { + pos := bt.pos + leadByte := int32(pos[0]) + return bt.readValue(pos[1:], leadByte>>1) +} + +func (bt *BytesTrie) readValue(pos []byte, leadByte int32) int32 { + var value int32 + if leadByte < minTwoByteValueLead { + value = leadByte - minOneByteValueLead + } else if leadByte < minThreeByteValueLead { + value = ((leadByte - minTwoByteValueLead) << 8) | int32(pos[0]) + } else if leadByte < fourByteValueLead { + value = ((leadByte - minThreeByteValueLead) << 16) | (int32(pos[0]) << 8) | int32(pos[1]) + } else if leadByte == fourByteValueLead { + value = (int32(pos[0]) << 16) | (int32(pos[1]) << 8) | int32(pos[2]) + } else { + value = (int32(pos[0]) << 24) | (int32(pos[1]) << 16) | (int32(pos[2]) << 8) | int32(pos[3]) + } + return value +} diff --git a/go/mysql/icuregex/internal/icudata/README.md b/go/mysql/icuregex/internal/icudata/README.md new file mode 100644 index 00000000000..070633b555e --- /dev/null +++ b/go/mysql/icuregex/internal/icudata/README.md @@ -0,0 +1,46 @@ +# ICU data files + +These are files copied from the ICU project that contain various types +of data, like character properties. + +## How to update + +Not all data files are immediately available in the source code, but +need to be built first. This applies to the character / word break +tables. + +### Copy from source data + +The `icu4c/source/data/in` directory in the source distribution contains +the following ICU data files we use: + +``` +pnames.icu +ubidi.icu +ucase.icu +unames.icu +ulayout.icu +uprops.icu +nfc.nrm +nfkc.nrm +nfkc_cf.nrm +``` + +The character and word break table need to be compiled before they can +be copied. + +In `icu4c/source` run: + +```bash +./configure --with-data-packaging=files +make +``` + +This will compile the character and word break data into a binary file +that we can use. Once built, the following files we use are available in +`icu4c/source/data/out/build/icudtl/brkitr`: + +``` +char.brk +word.brk +``` diff --git a/go/mysql/icuregex/internal/icudata/char.brk b/go/mysql/icuregex/internal/icudata/char.brk new file mode 100644 index 0000000000000000000000000000000000000000..a243ae6580ac2a4271f4ed78d252ca11bae801b1 GIT binary patch literal 13680 zcmeHOeT-CB6@UBY&6_tf`^~~KJ1B3-Qh~s3LHpru*CN}}0!yT@yGH1;4zttUfnjEu zofTKvstHZ{MKDGZZ9;5}k{JKNkl2O7_c6eCm8+lWP_U`Ww(=2E2TE&D6mzIFGhfb6O;^lyL+i~g zw{O1F%oHchLZ*@{_nV`|e6}>1+i316W_w9cs!Zp~v%ThUKAS7f=FHnn03Mw+OOKdy zv$=kAx>A|x@9R5${CMviQ|>L5r~0zOP49H&Xu(vT{e_}n3~zrMgWYONqx2`AbDRV4~%cvIDg7S%Lei)ltYp~VwPaO>MR zUvyrwC5KuzJ4_|YhEFqTG{AM7`VR+<1Clks4d>>F_f;*{-kzYxD4UQvSh7-zE` zC)5ErKJhmJH-_=x*d#<$pP0J$3Gjq@TJ`Uq(%w1}!jbzReAI?Ri17|9SkiNgF- zQho+_Ah-st7VK9?JBW4QQ4#Ir#p%J;MR@`$Po8EaN8DyCw< za@eYT3n=$-S-EcUHZ?Hm!wt_gTy1DkO=YWkOg*Q*r2bXyi;PE1<;ln^k+`-)dtCdq zrbq9Oo{YX6)%0!pxAm9wPxLRu9*td!{U>&hQ8vyQe>AQeo8qJKGx68r?TNLC$;6Kn zrt+tRnp~6InY810Qlf3Hpq@{@pX_d&X#9Sosk{p3KjFmVj;60RJ==7xX?62>^V#Ob z=1-cDdshpS9_Kw9;q|$e_kh>cx~tVx9)nXH=YjrhYeO3w-%I^y+f!|eZP(gT?bq7& zwV!Uk*e=kfvWp`H=cD$XWr8-9exM6*?&g)WCEo$=WeJIrcU)r|eT%wPfm;>$|5iZl z*xE4|7co5AVJeq8KJMsWKD+$F@(-7*@Q?88*v_SIz?hkPu&pm^g#^mVfHma3$1bY5A6Hz-qS zO?{firt&GsW@GB}sUK0z?opY1duo)+bEyiam~VEyqMiiqr5KJ*r_QDZ^`EEGp1RzZ zdNH+Gs^wzps*f#U_lo*nYIVbgC>wvAx>K^3=<4bcI8%8;-5`x@KBID}{tzi*c6ROe zAo5-FzW4t2aA6$90>s@J#$bWRj(oCZzy_!?91(|-4 z>ML{HbSGF`+L7kfO-WS_w{%7AP4~fZSNa2MPPgj2G_TeJ=~Z<_HI|-CAA?#fNcE`D zI`!JF#`z$Q$Eh@4qltU&UjSjp z)9~eRkbg;>YJD|5(fVfk=~kxkm-L6}x7yxqIoPVM>JGIMo5Jfk&jr-@nv8c*@8(AFuUe^1{>ZrjeW|LoaZC%XPoTYIz8tgto5 zt~iztS>~l>`V0CS`bT<4Y<+A;Y-h~2wKVE*>c% zM6H2E=r^}negX)~dkm-uUYyOi!4|Kk?{ufbk5|y*aW}iRaW!WSIk=)fwVC#!64Y$I zWL%Hf>_AGw+oT(9xEXBB74Ww{Y*;oTy=Z`VnKFl2u&=VLW;$-SnsxYGv-r7IK&*tZ zLc~m2;(@*pQA4v#G9uv`JBN^B(qpr+mKIMMdNL_Wl!VrlOi~8G69#nMf7M&^IOw>W z5X!yI2YMr*k%mI+adNLi7^0(8%Tts#kC6CHD|JnsL{jSU5r6+SANgK~yc4 zg!GaubXamw23UroNwpO${~q*{96p%JhTV7qif6+POqH`wHuJ+!@$JtDzi4=yqC_P%mQoxM7+V0@TWSz-aO+bc{o&NXj_ZP7iMK6XK= zmUmUBW-UQi^$nisc7#_nPgx)*cq(YMJ45jwK|y#m@YM$&L#&~BEEV2gu;JT5osB7q ztMyE&(~1ShxV%poOYBRqI}xOaxzu_y6A?QSuIii0h!uldp1rPoqY_L5(HIzJR#R#? zC)n5=p_aF-I9XN-NZ>NThHiOI&;ggl!Gue>dcG_o@wQ0d9^s$WB6;e)U(v*Ne}Ox6 zVBiQAYk~7DGu;Z_ae!xEc8r?KM$8w=Ku%c*&}Gv(4Ax@-Sy2H&n!yTA!~P>V=o_$v zQRx(#=zIo@Ro~T)x;{P3OVWUU`NOlrP9nK|xeO>K1lIvGLW9s`){e_bAy_n zzUl6c=KRW6+YOut!#j2}XIJTE#0f%wp5}lA#p?j^N4xLb<1LgoXBwgJt6(0KYI<%- zk6J-KRp=GB-Tiv|Otr>gV}whYu8tEDJ~A4<}L7pIF*(D^MCe+v{eiKL$%{GrH& zB%!T`d+TLU{eK}i!3VuUzs721R>Hna~STSft*i~_}V54Qx$ zloSvwxu$n_6v*}&oOB4wRXlNG6cY zzmF&YjFSD$23N3N#`#!)@i|3}{SpLm(jO~JE`)G!<>1?_VI-uP!qVd}gAlh=9O6nB zZcfqz4qs064FE$ZkRcW?&7Spav4Ww^ft6tqqYZ3gu6d7zW(I)bmH;YHXwCWlV!wf* zjK7d|xSHV8dzP&sB2gL?*1Efo&6O&-$%#T9{(nHGFgsBy7UlP3M{xN9D?Y5Gns6@ zIJG-dJ`(0if}z$}zMMIlVG47H%M0LYHaj?yE6j0ncVYf0C%;sl&v0^Et~kZX`*L5) zEZhcqlCgSRh?1BC)UgTrA}9H!cZA;u6f5{4L|l8l8xhRMqCSg0Kk z^AusRz^>Yp&q8JUAwQ)0#|OH*v(uS;5&A-@bmWWExyc*ahgj84vO8i!Dp-bu=;#>bsHJz9l$%_e*x8GX$=4X literal 0 HcmV?d00001 diff --git a/go/mysql/icuregex/internal/icudata/embed.go b/go/mysql/icuregex/internal/icudata/embed.go new file mode 100644 index 00000000000..2b7e3033a21 --- /dev/null +++ b/go/mysql/icuregex/internal/icudata/embed.go @@ -0,0 +1,96 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icudata + +import _ "embed" + +// PNames is the list of property names. It is used +// for example by usage of Unicode propery name aliases +// in regular expressions. +// +//go:embed pnames.icu +var PNames []byte + +// UBidi is the list of bidi properties. These are used +// by Bidi class aliases in regular expressions. +// +//go:embed ubidi.icu +var UBidi []byte + +// UCase is the list of case properties. These are used +// for case folding internally for case insensitive matching. +// +//go:embed ucase.icu +var UCase []byte + +// ULayout is used for property checks agains the InPC, InSC +// and VO properties. +// +//go:embed ulayout.icu +var ULayout []byte + +// UNames is used for named character references in regular +// expressions. +// +//go:embed unames.icu +var UNames []byte + +// UProps is used for all the character properties. These +// are used to retrieve properties of characters for character +// classes, like letters, whitespace, digits etc. +// +//go:embed uprops.icu +var UProps []byte + +// Nfc is the table for character normalization where canonical +// decomposition is done followed by canonical composition. +// This is used for property checks of characters about composition. +// +//go:embed nfc.nrm +var Nfc []byte + +// Nfkc is the table for character normalization where compatibility +// decomposition is done followed by canonical composition. +// This is used for property checks of characters about composition. +// +//go:embed nfkc.nrm +var Nfkc []byte + +// NfkcCf is the table for character normalization where compatibility +// decomposition is done followed by canonical composition with +// case folding. +// This is used for property checks of characters about composition. +// +//go:embed nfkc_cf.nrm +var NfkcCf []byte + +// BrkChar is used for matching against character break +// characters in regular expressions. +// +//go:embed char.brk +var BrkChar []byte + +// BrkWord is used for matching against word break +// characters in regular expressions. +// +//go:embed word.brk +var BrkWord []byte diff --git a/go/mysql/icuregex/internal/icudata/nfc.nrm b/go/mysql/icuregex/internal/icudata/nfc.nrm new file mode 100644 index 0000000000000000000000000000000000000000..a1254c0aa7551085e9cd8deba49cea933fe69a75 GIT binary patch literal 35124 zcmeIb34Dy#_dotT&rBvW*=65XNhW4bW+pR}L=ZvjTkKR2#1>m@rA3)iOHq5NrKPke zO6{d~YOB%?jkTJh+N#o)lKkKIOlA_nhc2J*_xFFjzMj{4-*eA7_ndRjbDw2yCK1h@ zPxR;5KY?pHxPP)3oG10|&>YR0MjYoq2{idh+QZC59Cy`?<1Qz0T+ej$A9CCzUyfUp z#&LS|Nd_DCFQs5`YmPfDITN{`OAub+Ij$D>GPi^Kle6%n_>KG>K0#1Yw}d5|@i;e|6DGUXmmTQzgeFGS?*6-mdSvUUL;m zYe|z_M@rw9mPiwXXj!UkxNN%YxGX^sxy8A4aqHu@!tJ=5n|nj|3GPeW6NK}iP!F?5 zp~o5z7RP>^ti(HH744s-)p{~XQ{V{V5`;~jXFUDAT6sJ=WPdA_CK39EWt97fkyxP@jOyldD<(nW3@m=Zrt#7PfH^2FQ2mS0L{vQ5PW%Mp& z>*bG3L60BP{SW&~0_p`kF8#mW|5f1sD)9ei1ttdU3;5P~y+#DGEVBcL2d)V8kZz^k z;XrASJ@;t;20_h(+68qDY8=!$s8i7Y_5RID%tn>Z1bGMlzqwBSXR~S+{ATdUVDFG- zA#aA94Dk+44jml&v*NzNJdNEmaL~^6Q1S&Waqz!O$2VDr z)syYu@KNEDoL-*P+a>>b-VdKxWl#E<|5%Lo9_v+DZwlWZekS}{_}y@EgnvZ!2z^A| zh}IE3B8Emxh?pJmUc{z|{Sjv(u0`C95J&n)R*%$2){Sf(*&}jjxHdCu~0eZyMkU+OVnD_PqbdNNpwndTO=22#dXD<#J$B+ z#mB@vcSd|y?Bk+!8SOH`WvR<5ms2j4$NwAr^}p!x4c{5Q*WBj%K5!Ge-*xeFU*x;o zH`slZ?>gU2zT17(?mV~0_n_}FUz2-N_pazYbUE#7^S$WX$Nicw%lD@5ZQrr(zxmE} zU+T_DK5&2NEBI}4m->16`TK?X9mR-fzXU&}-x>Ft?hnzEN|OCdel`6h9(DbAuAyIZ zzczm19?2e^{kr@0_8Z_g)NhpESU;=ZRKHpHeEJS-c&>p*8;|$=KJZ)b_le&wzkPm( z{Z9Cu@jLH#*{{U!mfsz}`+jA9Vt+S(X6xf0=pXJM>o50L`y2ez{A-bv=j!`6@o(kd z9=$I9J^cIn5Aq-8Ke}8u-hYz+oBnhB7y2(Pm-O%$?6J~+jsHgftt9!>f4~3d{$Kci zOj;)XE$LvS5D>O`I*I=AbT7EoqjD%+##`N^`V~^^j zH%PyXSazLV!+4&n;937znLUeRWv9!|;BcVb>S^ex&y^$xSJ8e~Nwy_uSI~{5eL+Wp zPU3u@1$`5ACFn*_aCz}DmiJi};jTQV1hGS3-nMpK&owGIKDZ+5f0I)L>)<(+ybpEF za7M`r&IxXY-U!NSEO3JB4A=Rti(GSq*OX_vzP#nRP37GF^7b&=W5k~#v;m7AKw92n!n?t%gFg!1OcpzWKMU4NP0~Zb$AiBNJ{No`_&bvS5d2H< z@4=6PMIo|~bnG2_hXjO#g~Ws;hNwbPLd+qVA=x2~LRy3{uWds*Q(FbN>9u2H+)&a%T8=U*CF8eAG$8eSS%8eJM&TD>#@hrF~#sj`&k)F5qXGLU(} z`33hsdsNKA&ZU>uEUi@~i*6ykLkdHRLS7E3TfP=wJjdo3(OPOOHI-UQ?Y`@j)-7ds zRCX7vh;HXu4gT{{uS#xJt;dD5lfM3(v)V~I4HjLc710^TJUJvR8%Lk#UNkp0qQP_W ztD5iQeYOf%4i2>6+a2i8Fs*~^c|PvtztwY8$KJ9Go{aSTGgx_y!(&C|9TMi-&O2Jx zt339_c>7iPVz$q>vY(fV93ST@Rtl3lZ0#A?uK>HfU1LA14iC?d{nspdNZ)$wD(Ed; zfPO#elEoXv~!YRXbyDOWc4XB|(QH?y_Ws8ypL`Dk3p z)3dz%cjSL3?|(Ow5z-BG1#Ah~^^_HYpLRP}RLdIe{*^jSz?$4w<*NHP=2Vd>^EOF3 zgVz2L?AV`u%FLd;R{nVovk~zd=gg|EyH%y1IBWUOWBoM`_H{O2dIi}z>a~mempMy@Aqz%-g-(lcT4TZe?PuG0(*b> zd$HI!M(ZCN|I$I5x8uLKKl|s17q@@9<7?j#e%dJe%BetR z^KWPKWZa5)_Rq(5WKtIUzaRg}j4Dg^OBJDqrMl2#($vsX(u~kE(k!}L+oU;F?%I_{ zy(sThT5+#0mNpB$EX@tQCcQ507@8M)OWNzH?|U!m`$;>;r}v7AKWX#NCI9(3_1FDB z>yzS(#Ietm=Y-OVwPjyfj#b8Vl@CXLkFPt&9f^%~Sg_Xa%O1sn_C1|5Woxj0XT$#A zI!<91`7m7*>}NYH%GR=e_&(N%&{skyhE55c89Fa?N$Bd(^`W1H?hM@A^r9*g+@B!f?!pDbC4WApnG<D2EG&WPQ}Ak%q{$$l8(hBO6DyjBFR#C6edzLG~lmOYYSta$w|$$nlX=Bj-jgjr;)LDETz< zaOCO8;>eQ7+mR0<#Zg{S!BMeM$|z%$(W{nM&8WIj4WpVzwTtQ&)h}vT)YzyuqGm_E z6SXR8W7PJjJy8dvjzu-}YVCD8$`*Aw>Som4sM08Dv~P5HbV9T?+7ewix@mN~=x)*d zqK8F~jea9~cJw>ZE2Gy$Z;aj=y(ju;^qJ_3(buEzL_ds{#Q4O7#>B?RV>B^_7)wm8 zn0hgdV;s4&wu{$Luc=-Oy;gaB;0rTk;VGOUi7);6A_yjtBXyGW%)etkygu& zZ5G=;wmbC$tI4bNj~yO6E_O=noY*ChS*r2eYP27NcEvWvx8+Awn^bLXwfCxRthT4x zF?@6WK{cswpl@~GWZzo8jeXxqI}p1vt&Q)xw9jKdN&6!9)3mQ+52khXJ(0%ZU5vdR z`+KZwTu@wMTxwkXxR!C9;`+wD95*HI?YIx)cElZwI~8{+?&r9&IPdCF)iu>E)$3Mo zRlR%lA=O{2{#NyOtFNoRwfdpzUswOW`tQ|U*KpC{y_Za@n6J$mG%|-8*ALgC-V#O-^JgGzlZ)%-?Dhugo|mO2_>_O%A z@ITG7dmFoZI_`1yJ1OfqGI4NrD|Xjnoa1in&^oODS^H`EXSuyK)aftkNh;$x50DSY zar~LUCp!55)VjWam1m+~Vo0Jtya$8AD@FNFp8u*YS{7BFOLbX1IJr#m0=ZR0QOb0Q zT3PB}7^5$rNe0H4WLb&n=;tKXk~J&$QTK&rnw!`_))6Bb%bHiQ&r9r;*gtVd;>g6- zvUajp6DKBil693$Nt~J3L)Kf?Pd1S3=Or#q94Z?jdpB`);%M1e*++?+tIT17Y*H1^ z6|=T;Q!D9b$Y!HAU$!Ihv&2J*$H~_s*;3hivQ?F=s(N^_{+Eg85-%lw_gH>EB>s~4 zd*Y+VWH09Jc~*yNeE!(Z#`Eke>rejek(Isteg4Hiu{MV{EwF3VF2Khw|!f8{~4gEpnyXF1glipIq;DL~e3B zDNlDhE3f7Djl8bg6?p@<@8PGh+mG_*ZokS~yWOwkwVhj;+$E{2n|o5Vq~2~pNfAi{ z-Qtpxl18{`k_<_XXwPoPx=nDK#=bczCw&=W8mSaZg)~ z&maG6i^|X0XIuR<>aX^J}#tzrXA2X?3kCtBCS%a`yL_7xVacXZK>>pKrz1A*+i0 zlkMZ@)!wUEsn7TJ&n%u+i>q#vegDq!|Ce!pw_-i51%=$&_m}iHbgR{qT zNdGt5dH8&Ew#K;{7iwIo@m-A@HGZt|3yym=?$>x!gIBmHWC~A3HAR3TL=mBgQN$~f z6e@*IVNjSA8H(D9Y(V6_XXy6f+fb z6blrK70VPW6sr|$6&nYrfFts=4cjZ z7HgJiR%ljh)@n9rHfy$Nc4AptxlV&Wm%Ue&#(`#0|uR-L|d>Qg<=(#_W`)qSAb zsN1UhRJUJuRCh}Em9AKK71q~vKk4r39_YAa{Qd;UBiR=ioE!^>$cAXd5Kb{Bn>dI#_GwJQ5$0IswW^e6292QTDT-7tred@+NT#@uq z68CT$)#&LN+dZFh`^s%!((&?I{wGUECQs&2kwG!d>TBnju~*r?2glIvqbhoodOSm8 zw|v$(yS77ekK_@_Gm=*&?@6{LuP<9q`+1Hr`_X4d)tD74pJ^&;{`8etVNunOyRAAG=ox#QbwkXNok=OmohPBa!Rh|&6KxN=B2!yvMgmq%IcK0DIce7N!gLIJ0(-Q zFXc$e@sux8&ZeA8xsY-t<-3$tTK2qZtG$u(V@iAFFDZGNdnxx*9;NVlcDU%>_1=0v zeULsvAES@gC+Ssso!+1~>ofGV_1XFyeG?onXrd*x)PIfR8~qji zclsRV4gHV$U-iH1|J0Z1MFv;H7>&EZ+u&yiF+>>R42cGXL1RcUq#Du;nT9MweM2Kd zGefSSy`i%q&oD@lZ|G|%#8G4ziDQgmoZ)rDG{a289K!;`V#6}S3d3r{TEhmzIL&6m z4#RH4KEs>ZeTGAZ$(qj%CksZ ze#RhU7=CaQXG}CIj2dH#ai%8Km}abP%r@p2n-~{pS{QSU?Twv{dB%KWfw8}lJyi!8 zi;N?UV~npEUpG!MzG+;ndCR!KxY)SNn5SD|Ty0!y++f^n++o~p+-E#wJZ}8Lc-DB% zI8AxMc*Xd=@kisY#@~&PjC`s~sw~wrb(!Xds#Vnk8smoH| zPhFe3A$4==w$xoXK1)52`la?r>haVsQqQKIOTCo(ZECLaM(WS0zoy<#ElYJV$xJ_J zJxzY55L1*X-jsynXKjH_Wzv}pCbKEiv`g{3Hp^7sbX?QK)XEf~X>aOmx}eE3<(mpj zg*b{#BTZvWubEyqO)5%ET=}XhsrVFMk zrteKZnC@wQHr+G*X(}_hm}O>fv!6N09A=I($C(q&Dszh2Y%VfnnzPLH&5g{>%&p9A z%^l4znY){Nnfsa-D+ig2%p=WX%;U@x&AfK9d762qd5(F3d5L+2`9t$Z=8w%=%sb4V znGcvhH=i_rX+CGZWd7E?TJgR42lLP7U(LUp|1|sQ%FH5*tHs^oZSk`NS;8z)mN-k2 zMP<=h3>LE`!;)pmu{5!?uq5gfx?D?p%S)E-mR^>=mO@LBsOme(w=Tc%kwx|x=F zmc^E3mKB!OmX9nSTeetsSaw_XSq@n~w|rqaYdL4RVENYaz2yhX&z4^;_bp`>QJQO- zds?-$fVBF$`ns^RsI>UBq%>8UF3pf;PRmHEoz_TKKdn((v$R%eZPPlY<)!7P^-UX; zHazX+v{%z!OM5+SO4^%gZ>6!%K|EK6e*>+>bq8AafG*bMBx89&GM*PpCh`)Am6u95 z!9(IAcuAUweI%24UrF5zf5{X+Sh7$QDdEJi60x|Rq;^I_376j7Dutamy`Dsx-p%Tf zE=W8CvBU>5e<4^BEHt&g$%9W1mV^uSB*E!TCDG{}B-LRXp3X^TAnq*0orAa|GNh7t z#Fb;LQt*+?Mckno!4ka?DUl+k2VxG&XewzTZZ6RZ9VFWHu2y}zmo*(RuZzSIlOUDU zO81bYV|*>dJ|=2vt&3b5pkFuLN75KMq{Bx8?d@y_C_v)5qlKo z+8?rE$a4hdIu!EJ>0Xkt>GiA=5cjr7kXQwYWI1Xx2|lL7ek$zWfc_XT8{jJkzcsjD5;HH7GyNHo`wHasLdM0U4=30 z5o0~-7!AAXn8#+sT7!IdLcbX`+6tRJu=xym97u00(TbZ|cS3dsvcr(sAk&MRO7ip$nOi(?kw`Np>Erd+cB({Vj)m+8TqY5esaWBBEOfA-{Fh~l56SB zCD-8V7S7@Ibcy5-)a4<3av6d|oFSH6M{JJ_ABi-hp4BG<>nFp<8l1sNu0g*YF>j%E zx3OM&h`l7?u)8hzN^ZgSI`nripJ;dF^5cL$0uPfp- zLp_fm=X%KbbFA@2$X7tV64WURYh1t@cY~b-YupWU_d?CQ5W^R;K*R{e8b68i7>HU$ zB3>wJ8;dob0NY5+=@jCgM%=TAI|?}`B5n6gkv{kA}#hWqKz`8|0wG z+-=C=JaQ;T4lUuM4RYv!_?HlW7UD0(oP7~L5M$?Jjdu~cT05kRC4Gco>t(E^Zt!~= z=dLIG4iLIo`yij8h(8+hEre_YavgwG-UrxkO*gRzrvuCAg!Z@|Y4*iVQ3 zRM^jj{VdpALvAI=?FMrD0q5mykI3&Gtn=lF^&WBzMUIhJM_I__ZLIUJ z;r|2FXC2~xfH4~pVj#nJRpfY)%>y~!MUKBA$1hR8uaV<<)bBp>JArj`5o`S_a$JQRYaq57 zIp!h9qgd-DSnDP5^%KtJO|10?s7)z+3RtTWto56S?}at&20vP?VPCA>K&2TehBPuLl=zx zO!VIZ*BqugO3~NcSgT9?6P1t4R%)8-GaOe{49r`rHDBPHhHk=UUPzV z4fMC6$KN-SjDw%oY9>eyXUeTdA-@axLX6vsaeFXs9&DCj+`AZe4Ep=fFNW+h$o3)L zkBC=-d8~Bxv91DnNfA%xV|`C1SS3>2>(F0=zJPW-+O43SGENdIl}h%@JS53dAM0V* z2BSSDlS+yq3zzyxF3SR~*I;`cbQ@%Yd^y^cuw4niOW|)F`peNyK)W6EDg17S--ECn z41Y&qyA|yT_`L|(PWZhFza_A}3AzJ14_YUUwB7>!1lvvMe*!uK`bxIaqprs)k9r;- zOl|0~YH9;eW6%W8RZ~}j#(IvJ)zWvwtZty*zN7u-`mK9wzTfD;xq)l&*Cf`>ogcVv zo;+xD(Cnaf^X3PQo;NpW&HNd`A1oLfG9hG6$Oj?QL)I*q5i&Y-cIdi=^Fv23oEthD z-yC}hV0mqWh}`VXKl zq}&!Fw+_hd9kOG&wLxx0l-pM1wjQ~yL2jF`AdD3(C+uOs0;>KJYA`MIz ziu(~a7+g!@wt^c>+;(uSzhf{i6!kaJgogPgyKBvbwFmCV^;p~l^lG3thje=MOw?mK zBYGC}wva9x8Rj6vCde=v-nP*A`j9q)v>BwanbF9UtP!z#W(@W0c_%n^EK|8t$9grK zI+m%@sbf7gBF&@txro#rkvb!imZU7lwvcv&^d(4>Gv$a#))=YJOroAWzR{^;5lv1V z>sg#S7BStaW4)S)w2k8TK&0Myr|O4_Y@@mNgrpB710Y#IHbWs90m*2Ru>6KWG76Gc zAlXee6HwJj)MI)pdT*e&gmkmfn@>Han~UB;^bU~jJ$PCLPisiR^eZ9x0Frf(te}iO zfn+-*pF(nkMje3U2)_O}h@G4|*6V^u8)ybSz-=LJ0=Of@T_Y{C8$epdO(!kme##WlQ<|fD zC{KvHJ)S8{l8xsALp*u${AH3HlH}kiCzLd0PXpXhnY$;n5t5TR?Gh$Y(c4#N;U(?rYUI$5H_Reg@nyGcZ@A0Y)NAW z)7X|Yb})@?Nuvf6wxZmIP;RZrW(Z+x!l7i}nk|$tTPvc|KtdNeT#aNK!!9j3kAG%}7#6 z*peiJ30sn6Fkvo9iU@N_Qp6DNPzk^xgslmOGQ`!z=G`0k6=v0p>iZkGLgLPnOoTe! zBN^k=s5yH_6pJ+A*gGPtVtcClNWxWAftggFIfU~F7Z5HXTtT=RcvO@{?~<&J3-AWX zo?Q$#lf^dR=dfV!mds)|;Q_+a<;bi9#Oxj$AZEOum|04Q4<$aBv~gl~rwS%C5gJHt zAj=f8Y(u<-coS)}$a1KdozEh|K#~{u@^ab)rMK9@yt`Ui3XU_UV_cxwvAe!51=pZNc1UHR zkiY(fO(^gFly?*I+Mglbv)K6>K=P&}A3)fYybU01N4cDPwbPPT374Y)-ihBy34I7}x=J7qOK(K0GCHH#&v@y_Sx7~)EylAY|K2gCO<C>~1K znqmzlY)!QoN|?)4hfNV-E}h#V!d%i80ed4N+u`6v1>o2Y2QTVFGBNJTeM!bRp`;DH znGIuD+=ikKV_4Fbo}$AE+ftQ=6Sk%GHk`0606VQ+>dL(3zwcczi_=JYDhs#cqm0rP-+v6k7M951>Gj_rW) zP8sKvaZZ`cDXSPOa>_)ar8qZiw?Yst1IKnMkR@tX8}D#DO37r5bI4eE?J_*k%I#!M z8L}*wiJY>E7!qd;cJ*h|obm~?>CEHgw@}j%o*sdV&lyOcO z=ak8uvWl@Hr%WU|hUjdUM-Uwc$98!H(Nffk?YZ%yW#HHz8}F2HP8sKv$(%BoQzmlC zP-i|1J4s=pI3~kR661v8Towyg5D?c1n_m&|BSd6-cf4plIJQ^EJ7t_x#yMp&r>tVE z$SD&sJ49j;;SbR$Hr~6~d(TLQ#qBANk(5V!%3~y9dph?cfj;;wO}jp#YQ(Wm)eaf& zl<}fqXlVyX6ha)cbI5q7j2CI4rQHybjyPuLknv7gg>M)Q zDXXxnt~ssp{Nv+qxRDI&35#B33E5W%sSZsj)i-NkH? zyR&bP>EZ0#V|qIK_LyGIzCEV5vu}?XjYw<68<;IpIQ#aPN@w35Q|0X2W2&8f zdrXb9Z;#m(k=UMDwg~%W;8@IV-cv}{p#XWt&PhqG^wneXh|WA=3R?J;{f`}UZ< z5sB@SWs9&!29Cw-Q*LA5Kg$;Nb@uHs`#JmenEjo7d&~jOzCC84vu}^d_R-k0Wdq({ z1|llkRl}-9n=M)+VtuwUKOqfUgKU?Z+0SI}J3w3oSb0poimM|U3OlwN$2@ESu7d~m z6NYTjLDJYg%weOjkITl+qp`cA=xT~$?_Rs3*j2WTqHIGH$LC3Au?_3q(Pvf*$Z7%Q zvVgpnlBZJgR7#!>kj(+uI6ikWPoJUA4E=!z;MJjHURThF736gVd0j?!t8iByjd`$L zMmEDEz!R7u`>9B_=r_{XXSjq$t!9Yx2wQ$ej0rf4a5mv7!ZU=Y2}QBYMoQ>T7)KaS zm`<2Qm`&J#FoR>G@`!Ijm`;+Kggr=-PdJ!x2;nfo;e?|IUnU$+_!^;=@O8rFgsTWw z6MjfIns5x^Ou|`&8wo!q+(fvUp|p{9fmV<*kLAFO435%ERDk05>o@h=nqGV%3@UrhKR;ab9v2saRZOt_hF3*k1x z9fZ3GcN2a_xS#MK;bFp)<;b!+P5fz!ewO&N#M_9s5&sSG-w?l&_@4=H6aGSYhwu?l z>?*b`C>Gnc7c=f{C%3}Rablb9g4mXLK@2Y0$<@Ckw$;4EWQ=PI&c(K^*oAp1=hi#9 zUBqp@;9~m-USLMI$e3j=itvt8= zb)NfU63@LkgXe@kJQvoV=gtkpUxXRLa~Tux*IF?4mjygG9zKiUQ`{FegYny77#sNx z&o%GNb3YXF-0_J#*JCoz9qGn%wsAZc1)oQ|@LW(Y_<+xc7;^$Ktj%zKTEKrxp4;Dp z=e|TtALzbEPW)K>wh{bx0{uJ<^O(nTg>U1p;VkC4sUvvq)sZ~6bPQs=hCJRvAMw9> zgXb=_L;odIYBc`33w(5gzoW3}OyNaEQ4O^^e22oIyM{c_?az@pqALA!?BT+nkxHqfb$zXAGL3W@rb<%bK45}6^yaM$NA>S0kzJ=xm$oao;hG8usuhGbB+7i?c zb5mo?WaRcT*4nS2+BnCH2H+||%=V~fUz{5s%&QAx^=|=t%zXsLDBwQ?Io^bQ4a|26 z=29262|~@S&?mvCH|Cqq)(+w{h3{3+eTQ=99(;X{xYu#5e2(=N{W8zZLjCp(##~X~c35+JPy-{@ zpaprjf>xs5Un2LfU^fwUiNyF#$SV!GH%9y>@IM9D;Fqwgg?aAD<+%=6cSB)w5&arC z4@ZabTsmsq1(XkZ6VwLh;ZykBg}L}zaX-L1GoikZKtH2q4?*$;h_x7gAlrgC>u}D# zZ;qJodjz>%1GT`s7bB*m4bSxk)k5r5pdOugZUAUD&TDVfcRJ_}s2R@bRnSc2HxYFm zj`@V5Zn1^1N8J>I!J~FFM__Fs&m2$#P;*clP-oCI&~#8O)NmYV59%m;1#18_81r$# zdi!x4&J(VmwpcelsLxywkNTQHU*aAioyK#YpoZUKUS+678s=NnRp6T2e!7SiijfI8 z6Pq#9-k`CVc{Xc7hcN30pj^y)0q82|W6Zd2d;A|Apl%&dIh?7}I3wpk1z2HgK{iyP zDJmcZbp`2hR`=s73da@M64VA&3Bw9$0~(1d>l;+%b5P_!o_im65d$jo5oi#qBPl}0 zpon3(>w>-k-32v-T`kZtRBr<+^D@qGM_dIpQI(chRX>A9zK%+v8dlJ5ob}H^KZ4#s zWlX5ZV4Up*Gx7go&B7Ho7rs#)cT~ssZGmfOYgo*%F0irf>?NH0G*qh=>RLHe>OU|EJ=!bryvsoz92kJ_$A>P!n1^55!wjP5uPXfhOn6MBH?AiD}>(?UL!0a z{GRYSA%WCuvVJz^AS0q7*?9lgNSwpp+q zTI2fZ0UF+dNy0H^o1W)-K{AabjUbta-g}^9@Vy4o9=1!6-h)JhtL_r))6muf4F(Md z1sHJ6<31z-<$yYXhJeO^UTuZ9G@BB!2M)xY5RywI>5jGg1A4!JB(0Gn$OLKt>J3^B z`VHiRtGgAbJ7@yvL(s>dy`aFhcy54Nw88xr^b_s^C(yqO@&rExZ8BONXiYnT%eGxB z#$6D--$4@GD?&lB?4Hq{X;yK!8|}DMoEfxxK+`(m=V7*M7?;(VaV6jebjH&XG#YdT zbQPpA;Yom2-Ua6wQ~>H`61WDo*Dk;k+Vzlp4Em&t!1cDxLflet4Qx7aJno*ch<*qo z8r!yFR_$!(F|HeAeL>%1{6e(HL0^OJgT~?RXffk%Xco8}+nI|P4M_vsCmVxCfi{BP z1APR#2=c-GGYr%jv;lMm)Vw>pJ9%6}*0{4igrqmx*Fe`u^9wYwxYr(lWHm@_!Q9Y) z1+*A69Mli8e)%kOw_@aqyYe4+R^@;~(olWSHCW8TdYl8|aJOCr-P*CZ+h_4y4EkL_ zt$HyJ`w%Y&BOX995!AA`z%{Y;hu`UV_CG0TB z;F{Xr$C%c5Dl~;;Fxn-c9i*9t`qsC(Utm@bFs^Aofoo>l3H#l6XE_e)1q#G-wkD_m zGy*iSKeLL&S<$1X!ESm1aj(af6b70L>xO8@ zfQEzC4rEt#JbY#kVo%cU@Ue)v4>3|c80TlOz_qYRAs>t09O@b1yQR$^`HmWbYX-EY zCeN)vyA^Hyq4;b7ng}`y3d_V_BPbHMRyHF>)Ff^eo;M9_OAyy*7}G3+$5ytjz}9$D zVI5$Ml%7w&!m_pP4q*r?*4mbWQIW%OEcJ{HgjRS3bp+|N*!3f)tEfJ_x2OI&aHC#jnwl6l=T)pR^xE6b zV{YwjahO%tv5Z>HvwI)aYH!;NY;1dra1rdq z<8f!iJ3(LA&ICUNv>5a;?z$UT3)%%L882`hZ0Q&g@fv0US__H;?E-xdY6Ut9ItD6! zP2f7>{4wl{s&%xTMO75pxEoKvoeUH>QQ$h+j>2CLa2;(*aGjyg%Vxa|$gh*_G{*FW zthp5{98>^W4Vnb^lT+ zK(+wwdXm|{`7<3d_MM@LXxaCRrlMuvBzhAq_E|&+xH={5Hzi6+#+7h{JfRv?(XUx_SwzOgIE3Eck&+6w*+ literal 0 HcmV?d00001 diff --git a/go/mysql/icuregex/internal/icudata/nfkc.nrm b/go/mysql/icuregex/internal/icudata/nfkc.nrm new file mode 100644 index 0000000000000000000000000000000000000000..2e6e3dda074ae9c7c3307dcad2bfb352869fac84 GIT binary patch literal 54136 zcmeFa2XvH0)IR?1ZnD`;C%re)vuWGX+t&2TCfRH%X+(MnMMR1aL_|PAKtw=6LAro| zf{1hkDT082hzf!Vf`WoV_&+mSl0XnF-}gKJb3W$WXYSm4=iWQ>&c0>#-QdT38REo{ ze{3dyNPnRjI2+p5Lm{dSPccmSG0^7^(<0~zy=TcVeZm;#&1N+2!p?zV{A$5A8e|;E z@lVt;q!+^sw%8JRgo$83%VL;xW;F95^Cy$U8qM0o`kfWQ*0Nt_A7eXkia0YlM>+Op zd1fz~MX)2-d(61zQu6`k5$x6G=ghsh5$ss*K<)z62e_6NYKvxzw=FJO1n@F=&+?ir zHt{a<0{Ic_YJMw!BmW@Z$})nTVOeMSy5(}q?=0P{G*+{%4qN?T6~PX&7FiFqe%X4T zwYd$6qc5SIc%!VN?nSjzD82(cYOuDkjkN7=yVCZ1+YmdQ-DEozkM#M_?v|azet`XI z`)l@5JqGkx-Q!x1D2D+Ks~xU6L^}3&T;O=l(c7uYX@=7wCu`?i=LybhosF^1ID5M2 zT#T*9)ze*GhwbN`?5~4&L3UBD5$pjV%%1CKuC{LZZZElg=4R`jJgxa6#a?K#!n`px1&<2e}282hR;Y z8T=#n53YkntVIDWB^Li~CWEa1s{b_HIdm=A^uD6A%rb}hPQ^VGW?F`!=wma-}*oCm4!ycFCU(b8uL!bGF{oU^p z!z%n?3nK9hj|fi+&k5Itw}g)hpAtSVe0lh0Q1{jkOZSDJ3cnm)H5co9VJM$u>) z8WA6n8Bre5Ct_q5xl#Y*_45&PBbG&Mj@TD*D&lg)?FgR0O%N)G7i0>`1$_h~1`q!XWK))XPU1z|IGZFNp8yx;F5k4iW+Vs zcM|1}FjEcn!(N|MvV!|5m&N?QDP|0hv8J)@nL?(L8N@u#9K<+(Vt6bCE0a~p8payO zTEhB{#bSP9S+hgg3ib^4T=rV_X7*Wj=bUe{#hi3bFHS$sTFwT}5zci^s9B7e!mQG) z51z~qnz5MUW;f03&11};F`sC@$b6ak5%bRd{|Vm`*0onZw1=2v!zni=Y<$>6oG*yw zOE`PX1-Bq3IiG1&1ZE4Fo{qsTGdG1XDw znC)2TSn62m*xRv>;{e#O7&q%+>tT*hJC1W~bDZY*isM|zHyoEZzU#Qw@qNdwj-NR0 zcRcKP-0@3l`?cdG$DbU3alGx=?r830X)n^PLtuz3sHpsnEK@ z`aP%3PTQS!J2hAju>REPsM8ltXQ|yt>j~B`TEA-jjniePYfd+v{%~TPEu3wf9h}{q zeVhZG!<}QCrOwIDGUrU^eCJ~{7V-a2%OdL!tRLO?xb?LzTIQioS#YhmR#y8(*x9ge z!Y+qh3%eQC%elh2*15^KpYssski&ne$9D_^J?eK&Yw6RbY?MMIDhT@gY!-2 zd(Ipe5}W5@>*D0%;li_Vu<7X%=^}ASa>;P{Ba8{>hO1n1sZFs}%QBbME*o4vaQWC}kIO-qV=kv$zH<4_UE4n1n+@g2{k8Xu{O0?!P&VJwms$5_b!do8uR%X{yP@1Q z_Jn7Y_C~X&gZC_(?3=zuEWJ*zAv}xe;7R-7eSH=O?;pE=9L0!wtM1S^zuHdoQ)%Gq zO|pq$eZnWZ4i2B@`ikp3tgwZkrLHSn*SqozM>`O;7?j!R=r@2LW@wKz)HApry6$i- zx7q93+vbq#3D+eC`7YGQK|dMVBrj_V7UKmXCD2eSEc)3zYje)^Th|#jEQWt4_Rfsk zg|(Hn#W-&Z;!goVp_v*|E}*f$~0!uzT)~bMtTdxVk#~Eq&2m3vv9L< zV==Wft3JSf79%ajSxmHWbemJ60NOz~sjE{h`;r!3A|xV!yC zWw?qM>YIkzjHl0#huWI*zHULtAp*o=?0K=YCVZS*iksZj()ao+bmF=3vfK(D>d~=Y z{;K!?gihn8dnlWZICXB#ZvEYcQj1Y;W8J)Xe!NL;Q{85|&2d}c_7;^dcU$AO(d|RG z9d3Kw0`W9>$nA5t({AV8F1r2bcEjy=w|j0JcfPxwJMrr5?nRxGde0==Aky7a}! zZKa<$qty86?D&l8?Y7?Ehkj1&F70mZ9_?Q3KJ9+(0que9!6>2a;q8KUT%kcR?Lr{& zg4GW0f46juLeC|)r?#g*kcFRnuzRGt$UVVbU|5S!o?|?EB=e;0@$HH2df&2kMLRj8 zlCxk(bUjbz;IE6yl-qN7;+h^l`+*~t=AO<|K5$eY^r+zFz#@~^5uI?v$|xb(7~3r7 zNpmATGEMnC81+foWEGGcjHo}i8_}pCS|jQ6d=lUP-o`jP`kG|$u+PUI!One*9y?~< zC?US}ys;*&&V8SZ*MBOX%=YnC`s31(V^^+Yr4YH%R-b|X6QH-(YxGCe=;86P|DHuI zuc*tXppaLFb}wF47b%Oez58gt4u-XC9E;xaweUsZZ-=i8e-C>dABFFF;PdxrZ~wK% z7TGZmWM6JLKh?XpxEr(Y9Jj;%K==`4{>8t_{-3PJ&e=REf0kzXO{ZMR*dJB88#l4N z9DXhQCiU@0Cr^*^@;@X0Cwc$3nbh(I(oaCE`{-^~2;NgLttcjf8}VPn?uRuw{((>3 zf8n*nie=pX*wRUJHxfrw5dz-SLy#;%h8n2 zPwhW-lh4g|eR*64?h(EwPmiL%L=lPuQ6#hJJi z6Zc?5`2!mEeY5*+O}zVlO_>qBA}aA+hZ!WKke+7uLz~DQ68Qv>`ay5{_TUG666pw& zY>bI}&csbIaSwW&-bM3I=kY{zHWUh~Y=eE9P z;DSq~*Vji*xf6N7h^Z*YTg+pZ@9N$?dzh?C$#1y=C{(C+pw6)uYY{ z-A8&d>ApvtXP&J0KeO6OpBVpMU-De`_j)|ey8Aiqu9tr0bRe<$ud{hLZbv-*Q?(w6 zl*Im@kNPxHXi$U zjvfbjM|kcYz8=SUK^{+j?4BgH@tMD4#-F6u-?jPcYuMlQhTV)uJQqJn9Q{aH47$)?Zkw{;o#eZGT!uuSr<4$E!#FB{R5i29sMr^>_9a|$lj@X5F zJaDfRaWvvM-t{;e@pZ&^5ktbRL|l!y5pff5e%y^<3Csn2fwchN9SEER?t-~t-U9w@ z{%t=&Al?aaza1%v5oFw!2oeM-f((HY?})gCheAdqxm5D#(Gi64@-(QKd#4H?@|Ee6p4*x!S<{{s^ktmz*hkL|$NIjB0 zeUFbl4tN~%IOXw`$9EoA zJ%0DN?_us~>1prj;_2lX;2G)}T_Q>-oOtR?km7_j!KidEE1i=LOG8oF5X_=0p6kBQQl(jMDKL(Oz&dvO7AA`0p7#BM|+R= zZu5T8`(^LB-fws>@qX8Pt@kGHZQeV*_j@1qKH>eP_t)N+ynpik)%%Y3eQ$FgOCNh5 z7auR5bAmp###Qr!&25_ipHQC|pF|&pPrgsNPj8=oKEr*+`MlsW%jXTBWj^ouZ1vgW zbHwLMpYMFG`EYIRZGZE*>%;ct`P%z>;C<#Oywxo8&Gjwyt@VA%cZlyu-)DU%`g+&~ z+P>gB!*{Olo4zZ3-}n94_kizl->-Zx`~Kp4*O%*O@8{tc=ojS|?w!3W4+WugB+m>tRWEWr;WtVJcAyZ}uPTKiYqi|8)Ns z>|eK^=l_=fD*sLXJN!TOUv0nD{)GQ||118#Qhx{RPubt|w+QGF;2A*jyJUacJ}4kM zAR#~=kQYz}nPm?aQ-``Qs5QW=hpkMbT(J%;s|&|^lA*L%Fz<3Nv7JudaQ*@Nw1 z@8IPysCG=i$l73sakb9{JYPE{U`FlqfVs6X4vXk9d^KQEz?y*V0f+IX@6Q4E11x(w z_3YVG)KlKGq-TB4p*^4N`FzhgJ>TiMspsCFCwqS1^Ov4?ds+p021W!X1?B|m0-FK{ z1&$4z9=I@YP2l#x!+~E0ei!&_AScK%sArHUNFJ0IR1%~M>K)WKh{Ys2v<8g{dM;>6 z(6ZW>g1*6w>lft*zZ2w7GUyP&?jgbBO#j=qTPb{)Faw zA?QlbjiBFy?gbr0+bc3SGA5D}%n$An{6(#Muy1fsaAdF~I4L+I`0HAAaAKq?vLKSo zVL@=>19AoDLI-~c_HeLBck(u-ie>Cz$ z=NAb$C+L23&_fj>yIS`o4%%n)~qxWz;ELiBHjeh0Lh@s8Yx) zLA8c@YsBr^*CSlex{IrM1L|Fvqn4=tQG=seqsV-Zj2a)6Aea<2C2AV9`m%^#%QFQ| zUH0;4M$L;_9JL~9ebk3hJEK00`aJ4P)VEPTM*SM~XA~#eDtbeNeY8uoPjp~(XmnJx zG&((66`dVj82vHgFT%Y8-Up4Yj&6#M5%h~568&`a#OT8C7oulG=Z2RHZbvwEbCl*q z8#5yHDePLF1$|9-FZG(|f~VlK^7u0S39M!Z&ktT4{2jbs2L042%H#b%xy5zfJBD0t z@>YSn&0GHjxpnxz%i9vn@V9sBvpD)4%+s3aja_D|qlXz^&+kSb9{Z>1r z5PukdWGAZ!Jv>=k7g86}9MZo_enUe>g^UfE)J67W-X3SQEqcczj@;vVcIY2IGwzQ5 zH2P@t$>?*@-$q}F{yF+q^qgLY7}przn2?z0n7Ej<7q#mZxI zVoPFmvGuWiV~56$j(sk6YV0eq^JCwNT@kxJ_QTknv7g3%9(yMC+t?pte~tY!mLs$h zItpju>4KaQ$oc5Or;CGlvLNR)4Jtc)5Gb~A#UCGJKv;OW9ToSd7C z=lHo{euADtH@uNYo-twsUg3p!#}9Yy4_tvdX6s=tMEJ1me?}4`j2EW<<7n!C-1Fa# z#wk4TU$4@?&SADU!LGLA;56;=uBgpI-$VSnLZ zVXJVIaEx%gaFTF}aGG$YaJF!saDi~KaH(*)aFuYKaHDXGaGP+4aJO*3@R0DR@PzQR z@SO01@S^aF@T%~J@Rsn7@Scz*G8gehHliLPXOX+eTjVba5`~E(MX@4@C_$7W$`C0< zS)x2qkw_!bimF6)qDE1RsK02is8uvdG)6RDG)Xi?G)**9G+Q)Jv_Q02v{bZQv`Vy2 zv{AH0v`w@_v|F@abVzhmbV77mbWU_ZbWwCgbX9aibW3zcbWg+*n~V8k8*vY@v)Eni zE%p}&iNnN^;#jdnoFGmSXNZ;JEODN=NURZS#Z}@uaih3J++RFc+$tU=9wQzvo+O?k zo+h3to-LjyULam9UMgNLUL{^9-YDK8-X`85-YwoQJ|sRWJ|R9WJ}15)z9_yTzAC;U zz9qgRz9(i$%q4t@jiiUfS>i77miSA8Bw>vzaV4=|E-W&{N*8Y z`D;R6=Wh&I#Q!j434ce(GXCC>mHb1v?ylv39cYCQJLf_IEc(K9GFJ4+-7TCHKpw+<)RnhF<53 zLT~aDLT~faLhtgGp^RltsF`I^D9^Gi)Y`Ht)ZVf_)XB0X)Xj1re0y28hWc5K4h^&% zZ;HfXLMVXuRdZ(50d2mMcQng=SiA3fsDjxT&KDM{?ftHR_Q3|80mQFB4~nOV3F!NE?M0rB|d^r8lIlk35q;>73n?-jkZg*~B@=dB+9CMaD_u zQsR_xd2yP!s<`{L{5r=v@4Aq>*t(=TWnDp?wywUef8B_>@pV({X4fsP!xMj8|G3t; zF>#aPrp3*UTM)N2ZdKgIxNUK};||50h&vZ|G45*It+;z}=J7W1&hg&yLGh9ClK7N( zWqe+|CcY}ZF}{C%Yy6n_N%2$St3=b{AIy73{Ji*u@k`@Z#IK9r6u&KgXZ-&7!|^BL z&%|Gd|33a|{6Wca$tfIDaujdVb|HuTlY=f<+^KiH|zeW zV|sIYJM{MI&0>sm`)mB~@qfmT7v7IwCme;d`%;|uuZoWf2a7lf7711fb_tFNt_hwA zz6m`OLK3bBBNCz$LyTYl&|pzM1%T;=74!64xhgO8hW!d*aT-y@{VD9!~r`@nqte#Pf;YBwkAV zA@N$`uZe=*cM|2jIZ5TcEt4FR2KDw#3QUSjk|ZT3sguU{p4R*I-phLzC2i}iOFG%R zE@?p0_q~7ZZBsuwX=2i}q=5R_Neh#t_3tLVm!zmKtk>0ln6x8lPts>e4fVt7$JbA< zUs(Tc{l@xz^Xyx)6 zZ%^Kxd?@)u^10-T$ybwaCErUnPq9gHPVr6&N{LL7q$H;(QnFJDQ_54SQyNl+raYJO zLdwjPIVlTL-bz`X@?Og3l#fz&r5s2(lJZ5$*_3Zmen`2Q@<$4j%1yOSbx3tf^+^p* zjY^fKDpCtmD^vTV4oDrA`gH2J)V9vFUQByAZEo5dX-m@HO!>=nyyICPA^U`Pp?jIN*|Ekn*MBhTl$OXFQ?B< zem%h9n~;Lz$76 zp~ zS&%GD7AcFBNn{DK6j_E$Da(@O$% znrx8x~DUJ-jM{gpw=FlD4NRw+>?C{vUfN~JPO znWqdA7b!JLt+Gm4r)*TVDEli1D_fPLlw*|Rm6McHl+%Z8QfX9LRh7yrp-$DP zYEku9c_s{2wW>y`#;C@tuE-~;rl_W=W~yeZ=AkT5Emkd6Emy5lty67OZBd<2Zd2`0 z?N;qq9a4R+BKeMrCj@m+PrM6Q$s$JEdYF~9v zb%;7b9j%U3iPW*GICYXbO)Xce)k~FmvK)1RdWxt@zFbkFE>q{Jbm}5ijk;dltX`$; zs~)Hxsve;ptzM@btA0*BSv^%fUHyuBylRemzWR!Mp?aq3E%iI<73wwW_3EXnP3oh{ z57o<6J>=Wft5iGH8&!MNpQ;b5KUberZ&U47o>A{momcNxeXCw7`Cffh{j2(p`ktDV zX`X47*(1|6(=+p*e%`z5=aU(j8J-!F8K0@j%zdQil1y!8O=e?e-%b{TGR5t~Gsk4M z;CsjMnUgZ7WKPSRnK?W2O61(kH!|POT${N$^P|jNnFlhDWPXu(HuIaz%jkVA^JeBB znM@Wp3%^eWIb^v3eX;_9;aM?R(yZhxSypCNepaunimbY zouw~hv-IV;EPa`rr7u&n^ksUMzPys9FLScyWX(rei1HT7J18qq)}X96lucRs@?n;~ zY|qk{omo4x_8Q6$4L>v-$kLa?S^9D;>sZz=4Zk#WH|Gz_PG;%LnXEHe=TW{zxsvrW z%C9KDqx@+o_p|P2{n7A8gIS|lqb-U%N>7wXlsH=Oz=Fcb)|Z?{O=At(eHtyYZL%G+ z-LrkO1G6KtW3%J3Q?ljRnb`%|nrvNmZFX~Z|LmdJqq4_lPs*N}Ju`bw_JZuUvX^JC z$=;a#VfK#fz1fGdKhHj$eLnkQ_K(>&vVYIMm(9uH=h)>q<#^`!=Y-@4a)dbvIcYh{ zoSdAZoU)wCocf%WoPjy5IiqvNy$ykI5yYAzA4&GUlSsVnXh`oGH1}bGPNp&Lz=Za_8qR&b^cK zPA-W{W@{$m%s0)}s$An-t%vW9IqEz^TXH+k(Ze%DW~XCbjB~O**El1_+1Q;+=HfuE zaVC!Dl1$vQS7C%C`pMk4vd`syn|mdfjHwm#w;P!s&ApcUTW))*d(7cg(WAi5EP0jl@cV^z4yajo0<^9usMnC6} z&*z5qqL-0XvMg^|-oxv>t7J{yCY0@Yd-D$Goyp{Vi;P|8qh@|#fxqKwFYHh*&dbd)*y3-jO3Uz5KHWqbbK{KNTQ!luFj zg~JPhf})b5vLankeV0C6pCyg&yXb48>t8gqXhhLy6!Ip|*rMl(CKpXDnqKru z(VU|BMdbVMg+*@_y;HP;dR+ENH(axg1MOPv} zE&8nJSkZOZoh&+2biU|YQ;wI4ek%F}t=pjXBC}%4V*6s3Vz1(W;?Ux#VsUX&@sh?R zjVn<$pln6iWhkk|sl{?bQ5UO=a}1@RSYJwt^`)#>Uv$O#Qd3-0T#wR>(idf*p$sk7 zml4JKGP;^qs6_c4Ivzb=(Ddv1fad&TBk(KZn zX8IV8#~!U?OCwyT?)ng=uwZ+^D9jkaHEezo}x!}H{pl5HhBO7@f-Dmhwm z0^UxSoGrOf@?FW5lB*>*sQ17AY?Hl?hj%r|dutudxeT^NfQZuyMmqu;^r$lmC+ z>zn+D(an4Pw%z;!Q{Uf~+IP)^#Y`}a#67HMSl5WgIn&pAA4ZGrB0uOEe>2UJxf>Z4 z7slMpz-Y;D^SbM&_q@pRZOfc64Ym7gOJgPv`|8MGB^_0b!JF|{f%^9z3F>`3h}P}a zkI?8XAJtEQd$BZ=qRcfsjg7`$(;Duiao2cj{53(EFpWSn zB`QXfpcxgOqDj{%HCdWGO>I<>rk6(lJCkxvm8Mp68gFnnYFac?!}@CmYldmYN977e zX~t;AYbI*QFHxpwUewIg%+)N`EZ3~lY}V}1{8h~E?00MS(;d6+u9$x%>sYOX>sXr| zax(9oTMyb1y}opO4!U3Y9bOLJ@BGOpcR3}vXTf{kxLYa1{m5$EW6Z~W#ap;@nT-37 z5xDng#@$N}?tFB(vl)$hm8r#tG&{47YEEF>q(6&!hu%|^0lVwZVvPM+%xKt5rPd^K z(pO*feOb(WNc6GsduzldeTjt*XzaT)i^V(~Tc11ro?MsQyT;b%j=d;kZXEkNxLY!g zoy70j|KYJ4Gk$pHI`=-UIj^~>`B8I2^SkDrhEvKfwJUWlHTGpOo~1oYBT7Z3Nu~1A zoYIm~U1@!3-_oI_qf4JFom%=z>HN~SN>`MwFa5A|XX&SB~{kHVSQWv}<`Ag|- z`04I#$@bEB)6B{&%k0bC$~q!-a6V;$W#I-*Ot%&tw$ifXvg*W+_JdrJNM5Ec`>nCR zFsH1b?5eb+tgK8|HeOLvR$ta!*0*e6+0e2Jq7h}I%f^;HS2nq9YMBOaTTCy@NjNPn zNO+}ehj>oe{IZ2*Z_v%Dyf8vFz8fKTR5a zCY*AM@&|M3-0tp^*I92>ZddMD?pp3$eoN_J9#kGy9$9`z8CxzXPbj~qOe>d{tIKoB zi^?_S&1Ks1n)3Sc=JLMfXH)~rXA4`)N0pB$A78$vY*P7@@}E^?-||=0wDOtdVe;AK zeAT@21?7v&m(sGld`#p_I`fG!+eIokQ!d%}gceE?CYqT4+TeRD? zJGFbYpK1?lKi8hrp3$Dyeyjan`=j=T_LlaJ_MSFN#;S-ESyWh5*i`gDajtN$@T~}{ z2&;&!h^>%RBvhnSWK<|CvMPd9c@-rUWfi)L2(_-FrXo^RU(sCAw_;#Ltm<6G(25Zi zqbtT%JXbNfVrs?AirE$ODi%~Mu2@>JqGCTt#96{joC zRa~gJSdpT-QgOB7M#Zg)I~DgTSUPhZ*udb&qM3#F3fx>35Zx=FgJy6L*vx_P<(JU77k*-C^Ac-D%x9-M6|cx~n>!`iAb7?vC!Bj#X)1$*;7l zbgXo*^sek#S+5SMjHryQlvE~Erd28{vnul{iz+oJ+RCcRzUsQl#>$q;{*{9(M^uik z43dwpoLo7za%Sb6$_15+E1y#@tz1#Lu5wf5w#pqSld*TZyK;Z!p~|C`Co5;lrmN3X zp0Dh$x>)&RYwYch?sufjhs@7L+ zs`{{Md)4l$Ppghr*;JgYI#YGN>f5UCtA4Edx$4)d->d$tx?jbqZjtk=ZK``zJ6F3` zdsi<|`Bw*3hgC;b$5zKxr&PZ#S!t6!;Jp`KGczj|TyTh;GWuc%&Ay}o+3cvJPZ>K)a)tM^wQsy%>>8f3E(u`uFNTtM6Bv*I3oq)i~C;)_B+KRr}Y3)I`?A)<|j+YSL=tHR_ri zY|WI^l-1~J>S`KmT59^&46PYaGrDGM&2u$VYNpq`QZuJ!e$C>Vr8Vc(=hZ7}*3@jQ z*;2EuW=GBLn*BA0YL3=?t3FwCrsjOjw>96_{8)3N=2p$0HLTjf5{ugL602IfTE|+~ z+SwBFeHbbC@o!MuS;e5Xk3nB4816V0n>&GJ#+}6Ca@$xuF2lCwnzL=Wd1m(9DJ%!B zBE^Y2mF32LjpM^*%mTP(W-4w*N;a2CE^On$&MaBQ!3x)U( z=sPQ=kXy;FZ7WYUYwN>ib8F%A2uHL1J5@Q_(`7_D3 z+;PdOwuy*)ox|p~vANtgF*nb{$4jt(5%w>@eirO!z-9?@dmFheM{X-{Oy(iC*O6Nl zj>#(I_Iip7cM)Q}iTvD??k?o_HS#-){N6);8<5}o z$nPlT?ga8Xg?Zb8+zw*BoMpRkzeav#dd4^8f&RH{2s%xYl7ba?A~op zA)jH0{|v@E5VDcT^=ZT!4*9cK=M!)|+c5TD(D!*9t8XwrFTlqP*uMn(7h(S@>|ch> zcgXD$a=U`ue#CK^kKEqCcxrH5t|GrTu+HB^tR=|N13CI&9m$c)e5~`6@c%C6XEox! zi$3om#(S8{0N4d%ESnH(HS+xk`c0Up4`A~NY<41#y;##itn-f``vP+pg?Tst`InGO zvDWt>$8V71S7aQ>@g{Qo4LN>}`8$amzr_6AL4KcM-F$_${ta?mh8)8YI|@10A;$w) z>zAR3Aqn!kHYuo@O>M$7gE_=Eo^(kwidaupyQ-YKFgIyc!euBIje%^$iMTj{YHg&M6Po3Dd68h`V&xLF}{5+Q$%-xq3+I9f) zn~=YTexIP<$LKc)HjB~kE%ZAG{T=8RK(+(2orrf8@h+v=x7qR#hi~7ugwJl{@^DT= zdnMXz)Duu|25sdtTn`?PyNhql74q!c_QBQ-^+7(5dloV;o;~+#zDwJ8u)Per4mt(- zo2cK0?c4CX2>w>1{U+*(s6POG1iv4^?;h9=fxiQ=-HiG(`27m9t?>H|{9c0X51?N_ zUxHTieA<2jU4!igwBH9E2YtbR+gf40%vxpr?u*&h%U;X`<$@;KEPL^7&^VisFBdzE ze7QHMkHa&LuR5-NWv=5h_zV9l@pt}LzdF}t^_)=GXIy8wuAVd3^_e-Zx~`l%!|mO9 z2@&vt*;{U!I6^Jcg|<1x!)^=orIo_Xz6k6E4@Jm2u#;`!11t)7*Oa{Zoq-&*?A z`}QCwkQ>Mg6bKp)dKxqiGy&8m9n>b44{DQwDnYM--T=K0;^98W0pteC1m%JzfSw1v z1bP|tC1@AuYtT2K%b*`X*FnF480DZgHt1u}CmxmgDOF_paxJMP(RQh&@j+Q&@-TMpoyRtKrew_ zt`pWRsuN#$6ZPAum!V#XdNu0zP;WrJ8TALSy#~4t`UP|obhmEGh6zz(e}eH8OM#zc zRR$2O4wxdIaQM|=@F(U5PZhHTBg3YOCqxYoo4P?3{S5GvzQZC24v(0&ft@=@h`*e6 zDT86-jFpSCT4oBD+bhMPtuTd1c{s0a_;CwkZna3?Rvu}Hy+L0)|%-EMS=*w{5hkiAO z$9)&)(f1I$j{3=mpKSOkg`Z;hsiV1lMD--M-;i59&CLNm8)=^j=<_W4ynsG!=+i=T ztA>6c)xQn>o6xU@{$1z?(%fD{ZWYMw4QfYnD@AUtG`G#jZ7p(JiQG2OdEp}CTKK7i zpFZ%@KyxF~-jG&9`V^#1kSZB7NJEp^Z33DhX@#Q|g;oL8EkbJvjY@QHqV+afB@ByW z+qMtfP|6*kGHb{VQW@bs1E(R`w_OLXpTYi6*ON$ z^Gyo-KmikqxjTj9GLUj#()qH0>|=V=2=@uJN*p!ow)kZBfkw(DBHIFnXlpU=11Z-V z+&T`6^FmvnH1?%UlzSfB77kf4OVZd}KZ}WNt0CD4YX^(WTv z&U@9J&78J5Y%^>A2cGP+xlB()8kEdF+mCWXz!g(&Gq@p?`v6=CINP=(;D%D}7`O~@ zX58`=>$ZL=?6Y;WRe{z3w3MXP_9D1O%Dn`xfO2cW4W!(ASQYCyju?SCu6KM~=WK9& zW{~qbjuWI7kXk`%2Pr#^@s;bXF$-pCEZTw|?h%}SN zcSa<4MDj)?dnzUIT_N>^)E82RG#(;SYxHzV z(ncCTFqPdFno3$EZZKNmXw9ZNFElaHIubG7q$90Fla548Ht9$!6_K{k_zj5E2lq_Sm!em$r!R zRkU71Yd6&`fv08gw314Q{%uI!g=94(OKC>$L-GM6A3?I8_Sy}}e$3Ay%uhPTvY*=R zh4fQM4?`+TtHnG~Ym8TsR!3X#kCw`$BlDDL(veoSNk`@>*Q6t@d_-DK_QYH~Mmpqb#MOlsMO zVip~53&kutmKKUx)I$rw16ed~3yqsaeP+|XPZ2zjLp}GUo^$DV`w={lOXd9xax%t# zG~0Y?Gl1IUQ=0+QCZB2tP%NO)2U09xtk8EL#bVlb2<=-;`wpRfi)pVR6iaAsLuqa$ z)MhBfUKEE>`(D&?7)1?@(n?W7?hJ9qK~O^@wh}y`p)=4*XP}JwA3;$|ag>24E4G4W z_%y`|I#*9qte~-;rkFvoj$#(Y7K#NF2U65fYz3ZWC%OtZl z$vXNnCKPf@&0 zF@v##ypCc9(}VH^l~mG5QAy>E6tk$Lg<=+!v`{Rdl7SQpsAM3;Vk#Lzv6xDRP}ERK zD@6^Jv=YQUR4{NT#am2ognx2TM8QJ8abk7&k; z21o7@$t-H=ypN(dlg`X+igPH=qqvabQi{ug2RL$i7fEJt9_}5D^GtrT6TL`u<~VvK<&(PO9wXDX3UJVhm^DY{UUQgo#lN72oU%vn4|H!6vz=tD7; z;$@0gC`MAuqu7)7l2hzSC31tDoO$GC1VgAqO)-LE4#f~|2^U&!FJLCYvmsm!_-raU zL2(DgV+1Wi@mHhR7BLh>1ev}3k0_o5s;x?`2&Pgj1HNo^61d0eBj8?&dx%7*vM#k| zGuy3C0QVU13+n^ca{63Aq$-GZ`L6J|}lOEPSp8d$&_M@0fN7s+wr9A4dKgB$ncYm69 z9`)LvAnvNj@ftwo`BXlDVm|dYfMP!NIRI!php|lOFqTQiGs$?ITpS6y7r@B_H;^6^ zql{&ekuew2>;_RRq`3^DSWIyUumU3`PqJnl9XRqNOU7JGGaEt>I}2nCLy0Y-k$naW zjt(5zXRzSZQkgZlIw~VvZ+Ie4!WNu*aO6ptc<4p_3?q297i8?S!zlKmv4&CXMdxA| zMGX@On^uY%dTd)MYN)mq*as2Geg=!v0*>rwusBaqnHkQ+eW{Fa>`SHeK5RI_v!yiZ zaDtc0=rwZ$#WFfeBPf>9^)`ZH8O?hH&AW{D9YOn+Q~5}W<~yhAGR7ohOftSn z*3p+^l5seTa8$^y1e>!M9NCdTeweKc+^exLi9|*?ql`?fUWRL#!H#c|AxnddW0G~m z;F@BP&%Kh4sga_R9+O7k0gQ_56S6ozfFrwdxZ)aQj7i3rWPFp1Z<29LG7e`mMn!g3 z*qklk$j%CRy2zqYTL_-bBC@kB1o25C^E?oE95Km$B#Uzs9NGP2nPiMf#+YP$ldPjJ z$0Xx$4k9|)&0%v6fg`&)Y|bLg71>*3aTbFkduc3_j4{a=lZ~XC*fAm;`>ck18pcRe_xEsj$%CD1sMkNQkhTxpQv3)^|g31}RIi@zr zN69fWwH>jRlpJ$Y+ljWhrnVzT2EB!;?L^x=L?XL|N)C1k!I7Aj1{?h@qLO1}YU^WK zo7(!AHm0^drmd;1k7;LW>tp&M64~ifaj>HTw*ywivm7JcYwmxQ{sjZJ0WNPbU z2AkUYm?5UNK4vH)k=;Ed2RnPti;W+WMGHrnWw2v#G6**$0uxewdPj zy)bYj=2He6{XUtJ)7RA2$Lwcn>tpsewe>LvnA-Z715IsxOtN1_t}W|u=Q0RU$?h3e zEovoaC5N=h%6y+{$QmTO*~ES(xzhpSQ-GDH59#Hvw0pFZm8Z$=O3S`o6P? zJ$7nIuQ|jPpCjUJ3%nVh5{bnYtZ!qReCFm+t9dl$JnCaNmF$MZ`0Pmf?ZE62bOP>% z52KFwSW0^=r9PHYAB$o446-A;gCy_$z|W93`8Gqz`HgDyc`u~)%L(GRz?L<`KOXop z#aR@OP&`iY7)4G1vEfm)qS%vS5XEGQa*9fdnG{nP(yNa0c@&eWB$Z+Vl{8WuLUAa? z;S@(ue465DiW4Y4N3o6KWQuQ6Tt;y@#T68vp*V)(Oo}g4e2?OKiW?|yB*@E&nHR(6 zuhvGm3z6Sq8C_f~I@U4_@rTj?BkEHx) z%8#bJit-C6uAsPz;u?zUD6Xftk>VzbTPS`=aT~?$6n9YEMR5lANL{DtB@pqYi)sd;D3PJM8eaPv*vQa#6*or*nY zb}HnY891SdQ=d0Gm3p4Y2v-Kq{M44S=ERGETWjLBQEu}&^HXc!1$G9GdrESSdn)D} zexrgNaX;K!-*?{je|0j0DXFzlF`d?#JL7&LlC* za2HF*j@BmV=c0ZK*Yig63Fd4jggcDOW-=-6rI=07!kO-&xKMPZ=ta?=VgSVmiqRAo z^S5H>b$SJ=*@xELt2 z+hm8?q*z3;l%kGey4^-Q_JsHB#@W4RHy(C!@PpA}PgoCJZ;-9m%b1sGpVuiarua6+ z6%=Q{7mLZj9PI(gKnf7PF=1X}u^8;KGMONJUxK5_Vz85dW6WZ(1HfR{ox%4dOaSH{ zPumQ3(wSZ$d~d?wiIORVya>ehW--0dZU8lcw2yb zK4Oz~i|aYFg|1W?$G8?{ob9+fB|65$u}qwd(@wSI&cMjAOk9T-hBFDiJ63lsvw@Mp zE7|k#V{q;CBS+JZ;TUT6SwBXE9#QzrMWJvgl9td<=*OWb^kXv;KM7g{C20Ah5P#+v zQyL0ENI+?fnW2vm5n?b0VvqzR1;v5lK?$G;kV62D59&}*7-#@|6ytos!AwQti0NzW zr!e?Ni&+cgKpIWYMWiNc0Pkr-qXHKKE+0CqD_ECc6>_lRlvt5$)MQQKyAlSw*3|ll zIexJOItKdOoC8?~Qx5&d(7ys&1nLo>Kj+246FK)KV05I8#mMv$5o#eMq$V1Pp-pOX zb|kgfP!qirZHb{K?RbM7sfnD_akOSrf8^YXZ_Fs-`$*KK$gvb$DU}n%w`jCQIATFY zQ4X#gJclBYx1ew+;(JHb!-2!8j3B;)qb^#IK$sp7k8@t>=h$JxBcOITAt78Do%~ z!{`X;Cx?ETL=?poB^0F;<0!^cOaPK~6Jw}j4Yklv6CXrRypWpsAvN(tYT}F3#2cyg z{zx{Y9m>=pD^mA1)b)nC!B96E>Lx?oY^eJf>J~%&l%eixsQVe}{)T#hp&n?c2N~+Y zhI)vh9%`tE8R}L;J={=_Fw`Rr^(aIAG-~{=5rl8rnZcNS{rV#-F%cyNbDx63p@LSOvG?nnHpj;+Q2-~@68g@A^F z3Uw@I9OyR4t`gTuP&{Y=XfWtZ75+{g=mw}AG^85W70_7FO;9_?s)oflf(k)(prN3h zAYN~b7nB2vZp3*QG#NApv;?#T^fl-r=z0_E@mCN+L4KIC-k?#S^`K8d$3bU6cKuk4 z6G#l|I|zR@Y6xP0l0a!7HAoB6fmVVh4#l+*baoj2KP!-1E9L_f3W@>!6LGnKN*&G!yqIpo5^Jpb@jN20&h~ zV(oyofG&fI=CGJ*P;XER=u^;D(61oYTo$tobOv-D^y)nP|L&lbpk=S&fA;~M2L;W? zu?Go3$)F0*#n=D8-p(|O zSqPg5vMI- zr>d)}tGcV|*1cg4vw$`M?|>a(5BLlm13!U(gUSo(S0EC!1X-XHml zh%{uS7-_ItGA8?9L2mT#<6U4_logo31Hyq9JOX^+J`e$FfeJwW!dXR73EU4VgDRjZ zus}7?8&n51z+SKq90G^I3GfB@5 zARV*;--91OF6aunftSEUFd0k-Gr*f*0ayeo@LW05g?hn_K}8Tle}5Pb15b2g9OTLq z+OV7_wdu0QWv|OVm+y1Ag3A?MuHJZ~8&1r2;v@PyTv+OGaFD|dT>G)YE4Y5GMmpj9a~vL7 z=J28^4)=89H+6EtSKM-pcKr*wyY}G@hqZTjubV!{EpL&VkLAYS9Pjwug;}?B4l_pg z3Nv0CNJ$&OCQ{M{ zG;M&i^cZ?VQAb7y+!I6%py%);ZZ(ooz%Mq8sPN;b!!rfjmv3NLR`z z{Av&*izk8Ao+kmeJf7Cqd1Krqt|ge=omCIGGMt`=&tCG&!TxABDT|anpk)te*#lbk zpq4$TWe;lEgIe~WmOZHT4r9t z@w+VPCG6yo!Ftfc8nFy!tcW*^t^YUqxeW-ame{GXBE zdRX$`h|dA?*i8Ow)qk^=$+_wdw|v6;sK-9oq~#8i^04OVq0N^1KPHD${v~=%58BuG zd1=A5{$1FlrZ)0p(w-s4gLJIIQt$J6_1naVCjRHi{fP1{a`6(r>)*m())MavSmZBk znP}1`<#qLNUz~ie`xmR{O)cXsJa1AallvoW>9mGPAKX?dYZZ|(Dg!EmDq~f~scfdQ zxypE?(}NwB^iB_USn`mtysyI|haGl&bQ~Bkfy_lspshD^|JFBYKhQg(_RKYs5~L%5WPEe6vS>#TS$Rf4msVvk`k>RwH#KchM@~~(z8j@>@F8+?MXcAXsIBhCgK+`&5 zNgL2uCv8B}I&qRVKwXQB8nuVGas%y^;_C7ik^3yI;c#1*(_JnjHsA1a--RWY|7aWd z+F#m#g(bIA-J>m5xzjU5TR~*0_L*XpJ3Z2Wic~LD*;ZvKyLoc=Meg*7EK25|5BSG49Aa>yrSlTgz>z2A@lDB;a&8-9b zQ1pErIoY~blB?;vP{Jb8Co0SH;aWd=K8Osc462M(8K<(D%H}HLkut9RE=zg|OAf-4 zhp^-#EcpmaPQsFxu;eD}m-vk0mxuR@gwWVeekhj=DEydt}(;t$Q<`&=2IE) z-}^9YVFogBEbB$qSKD7^{l?nrwb#Q8^I6tnMaRIxSdXy&9K+hOFZmq6_ao~6k6Dz1_`W?@LvfAMl%WdcY(gH*#;_(Q{y1fL zm2_t*&mr(A>)8*8|DJ0MBYxbg%s8meFI+30`rlyoR;@Q}N!ye%`+JD?_!rmPMV(eq zj{EUBG>Ek*>-pN`vxoRzlp$;cYfjqoujI3x_M42)FXVe|0(Ht`&x5?ykai$t*bl}t z`@BJ&N{ByBnj_QM|Ddc%%tXJTO+O+3U#LqIZL)eM?aMlIHuvH%__ToaEZ6A!8tZg? zU*~=sTx%|EU57N8gIR~O4{(6AKT^gL>R%7vZz*3L?oSqN`w-V!0^a9(`{)ZFaWD6i z*Wak)D9XEr{994JVYI_l%KcLp>eZ9>qir@|8%X|h>0fE&9h^a*qb)PIpTB>L_HIvl zuDyvmZN+|xG?U2VbO*{n`##0}nnV2#Q_iH}^c%`GnsQC1UeWmfnR1PyPu&HH+|zl* z%+$GFH`=g}dr_4-_2N3w)cHC58X1U4JVYB$BK`^Tt3o~dNk8El z?Z|gIao4y!K>`K4dO`YTDXDzt?ec)Z%_XK7B9KVUQ!{emgM0r|L=ChL^{|)ZYHupe- zIpjZ|Jg{vc|25o~8y&b7dG4n?S3yVWIiKtKy72i7NZ`85L4FTbOx(8twBt1J18C1Z zy9}mNu8FkY2+IEmZB)C2eI44UVHrB@H2pdHAmz&d9i*IKDwqZm7=PozZrZED81649 zqwT%)qub-T4~&Og`a@NoGqXS#?HCVEFy14kvUfvUU7;?&QumhBaaeEOF7~(Q>3O8A zM~iQ!b_2jTYMUwy_EOWIL1$_@2V4dlsbxwx)@Pva)3g?MMp7Ks#z4 z0eXWVckg3H$D@q2PM`~|QIlREzj}IxmN*C=FXcDocpAjg8f(E|TEsVu(t^i^^V9@q zz>gpczXWiI7F$oNjN)!~r+2raMLN-Q?tqam(@M0&B(RISd=T6Quh1&ZX^k@O?wslD z?#*Bn%_47FrXnpA<`#D3Q15U))g9{Xr?L<^#S$%J+iM#Hibn`s>-2eaBy z?^dr_<0v+>#xa%0Ri03JQspU?pR2T0eyQ@b$}=j@sywgqqRLAuuc*AL^18|!D!)=G zWie~q6#2j@?>4Xb&@t~W8oKco|Fu zQ@}JZ8N3CigB4&c*a+5x?O+G^5PSfRgVkU!_!R8(jqoKA8j5}pd;<1^qaYI;;?wsA zzD6n=tBh8esxnh0zkOpwsZW$oe4>2f6Xg@1D4+O5`NXH8`ZQE~L$x^Gs2n6!l3}pH%fpRi9M#NmZXz^+{8oH1$bSpEUJJQy+fmhwG-RPrBOE)t;{Q zEVXB8+AK|*r9N5elO;ZN{Jv6TQNTZ{qq8Bw) z8KW`{8KssepID-NVri(BhH7c3mQ-!eG_|LxJx%RdYROVdmRcJ4(lsBEv2II>o+kD> z=~{*?^=YUwpfad64WfH$CHR~gB>0>fB>1ETrqN8>p_#T-yz23)w@|%>>H*aQss~jM zs@_=j#;P|`y^-qCsz<9Ht9q>JajM6u9@P61)cX?D8U?l10hO^T<3!f1?`zG^G{_iC zRhCgCVHrolGJ1riCpp~MAen%UB;;7C!aCunL$vBm;jMQ?LbTV2cEG&M)VwW*1b{UIye-cc6FWYLm+^1KNrFKP28XyPp;R z;L9KsPD$knD=9Lt$&=T~HPcs3{WE=>xx+5GMuxm2L*5aTJ%t#N;wL#Z^&L{3PqGYV zrhZn_paps@@DzBE@I(H3k#dqo&iOX~r79bHMfn<;ZQb8I>2N^)#adID6Ur@99 zpb6*&MuRb6;uFlEz`tm%MPLbd2P^|Cz!h)}+yHyPJ#g2b-;`_fS!Yva!-`Q3vx3T| z)lAuAHCGO|;+4Htg7PC)qO#9QQohehR*tYzlxtb3$`!0M<=_=7ITO0pQ!%#v;#;$V5HEx)kjl@Ge*emV^6hGb@Mr zMz--D;N-rs9qa(_gAV}T%Qij&d%(xw6R;0(THY{g{|%W)`eVvlJ>MyB_ncDx)bqXa zhn~-s<^0@Pa1InRE*60jMv0tB64?mZRAr3HfXblCSe0=qo2hKBG9D?Pk|(<7fs$NK zb~(l6RF~6SPIo!O@;rM15%`Q_KOw-rK^f`aW6z>3Ul*zFZ`8kr zU${a$ho`vw=k88;TdBj@eI5Q{xWnfTC(7L%B(^h+1iLolq&GV! zJAm1Nl7Knj2v`tgjbyiApCE^N<%1C&#qua=wgovytfAI~W)rs*93tOUSo7@*SUuF@ z0)8#wG%y0xh-L4GQush7cp3}=FMzSx>@nDlxL#=~J78EYs3nhmj9Y}{uf_Oir3Iil z$N&StV(@oRl@`kedEh0m5^Mw?fd_MWQ-h9O*k=RZa#s!${t?`d{wf>;HwCM@nntR9 z^(?z!g#H11-1FM9Z^Y;NsrGVqQBv)jl=B;GHQ7t54{Y=T_}e(%HXwrj(yF`3lXf3% zv4@Zi?yBYY)aX}2K6c%*xi90JaTm~cgQ-1O-`Q75liX9Z>uAM2*$D-s!6k4RG-=N6 zAKai9_Y@2SeVg-1j{V{}Jc(V0Wg~d67oYOjGcHJ6Q?48~f!c7@y?D2=w^Ey~_G!}e z#a0NekajNo8TbPH1jhH~%^lB9SUjI#+9%JG8cRkWKDhwTgAHIQSPRaBs`TxepeI-l zPJ#}3vO87j0wt#R-@`Hhei2;NnD2?H%|6>FSXO}O7SxOI7%(4<0E4g%>L(@t9e3yo zqwSxJ!c6dROIjUV#bXA&aV9X>shdaK>T&G!B{NzH_X63RooHk4;d+^*_!-MY&}jhU z-!3BGY3z6Y0aPEzNCr#5O|bk~@mNCr&pgY{+LPqSXSeO_Wu(a|WcLtD8N3ke(3q*T zZ@OLaoOu0Ax^{z1qrJTo|6S}ye+K%42iaR~1qOoWz{Dc)dYrowB-DiGLNN&ZiFPlR zm}BJh6EVdl66(dhZEsJbHlOm8iD&fG1heqYf?oh5z-m5oPqgcjXX;?tMa?6Rd8(}> zWrH&APnl_Sv?H*OBQ#S(vE<#!u1>pGpQ2tvZX|+V^|2+HH&vUNZ9!-8->}AA#i_J`u zkktFrQM{2seJr{5IC4EcnmJSwdzA1MV7wqPlkm-Zfzd-R+6$XwXh#rCmhsa-M^QSt zchm5fXwQ$8m{z2lIhH;~sGEJ7x^=VbP^;eKL|cwliq^({2ES9}y@8(U8PB>Ph4zCt zKP`UCXsvGcW@H=t4VClo51+u(@HIv)zSGfP1@pm1p0w+Q!TaF)1k-rhev%ZAy+|#< zYETEf4{m^LZ~znUFk`#5O|v2~b44+jIm3NRT|ewk+&+yE{FlfW1FUV_8;MCK>--jhwEr`?}+ z`~u5)5SvCT!aH~`^|IGczFu|#?e_upz2NOvWM6wZ*Imhb_d`%~Dm{OySesC$Bs-kA zgV=V0@31G?=dqle#&<~UpDI zMDudR0d{5j<^X#-*X`1l92W2fUdTEVJh+h0{B46j2=GD}GbVnwMSkBZ#Tes;ls zobDAVr7}6)D^f~ha=KTfl)~hMuSmJNS7WzW23HNozu2m14JEE3+G77P&T+{Zw^UBb z9pM|75|R^Lr;(2Ju3x^DxtP;p2eGdcYlNfAw?vn7wrwq`_H%M9EuuUm-}_wUPvH#R z96di)uHW^a_pc#_b$*0=(^TTu;3xV)a#_QPwB0Z(RkhvXRNEZFa@uYk=gKb1RrH*l zTwB8HNc|r@Eu_o&IXPvwlXBnIR1%W!tV(zzd7fi-of09MeA9Fb=iAC_=jf8_ZgO`@ zu+6{M&#yCK+d^HFIiEL&{Eqq4IF%=@=A7F*hQAa3*niZYiY}%92G5gt%C}w@(_V5~ z@FboRmQtN0KhZb(PpBq2%jrcqf9SMpc}TAMIXTHUXZKK&Wq6(@johL4(Na`9=szCP zAL(@*9lmDr>mYXnZ2YOc9P1qQz0!Qr0or9 zpN6!3A?;{L%M59aLR#aHmKxHsLR!<178BC=t$w#u(IG9}SDO;cZ;;hd&hh;bmfs<( z>r2vKBCAJOYLbGdKB=iwHlaG9ki^xM&{^3z>+IX>Pl}LUdj0zN>xM^Un92u`6Ryj% z)EGZu!r&@pRp6HfR~afycw7}uwOGvZz-q7#tOpywMz9HN27N$ZkO%TXKTrVrg8^V5 zcor0bL7)f}gAz~*27@v%1PlekEF)rk#2gMOkB^w^4>XnCk-u88%B!q6<<(XT<#kp| z<@MH+${VadDQ~n|DQ~i#Qr>K}SMFnVQ0{AWRL-+HDd$_+%KfYy%)Lx2Dw6anc$1f(d^M2aFH zT|_`YM0#%`AWgb}hzJM?ko@17yPE<*5&eDN^M9U?dERr*oH=vm%-p$i=ic1CNSU7k zT^aI^%QWlXTg!)JPxCCv1AEY)VSe8O?0pf*C?{AYWSEZb4D+-QnWqfX&4yvBvKb}? z`NzOYsL4NFaew40t@1TT8DF8b6~iPkLz&IY6DEf9HfICp5hsG%g*%UXp6ktP&zr|P z&-3EH$)CgjfiDmw2;LE_5?mMf3Ofkj7akJ&3NH(#R?Vz@g+r}2TRpb&ux@Srk@X4d zKdi%TJgj@zd}MRd##iXjB%n#RCc~R-Y2qvVqlvR^uI-z)Gi>+S3hhjGBkkte`3iru zbGL70KgfQi{VjW6cx97)Q(vJ9XgrmmWI@v(o7y_~3gti!K+?MoOC5f32z2b^xXAHG zN3PQ&YhPiC(`ctHPEVZ-&gIVQogX?IT*_V6yF7F;xR$%FcYWZhckANzncG!2Klj$| zAG)7#cM)ZZ28k@~E)rc9$vrH&zvHbuI>UBOt^H3PHsVZiSFx{f7H}Hy6?#abB!eXz zC66Uho`XHtdEWEXc=hvI>vhk|SE%vs=e^eZp0~!QpU+yKdp>{H-v2BAe%$EBK&&WL#jUW9tUZQW@`v33RS&GvN55G73X8E1=^YDMef0qAgfA@fl zfOiA-23QAX1il-%J5Z=JDJzt_mBJun(72#oL4x44;9{%6R#UB3TOFe5n3=P=|B~Lr zF5%y#AF)q(c@n%(_D=Bg*6#-u%-H?TY$a=1a>S9l?$u77oP74bMTHmK?r8>KpOV3CAqUF)rxmnl7 zi&jMQn>pqqj*XZ53SaEk%Pi~btDhSSQpSY3zrw7vcyS2z4ONB4g!&4P1)D-!fwM{Z z(jBPS4;It@#H3#|@qs`62VsrV{am7l6Aw24Zh3RW?yKSQN}Q01;lP&HE(s`6A_ zRJ~Mhs~V53PW$rXKy1FmozCB-_h=h)RC82IA$|3d7I&(CP+e6$P;tYY!{lMwu#~VC zVMV~-dAF4J3#%553iI%oh8hcLt+fs6ZVx*ab}8&$7$@8*+*0TDxGY=~o*bSZUKHLh zd{p?f@P*-P!?%YY3%?Y8FPx)xQp?mDb+S5NU8L@(9;Ke9UZ`HH-mX5TzNEgV=4hNW zGL1%)ta-h**;=k{|1KH$E&1jAqs&oe27e`g2md1fDc?h25VR2V6TB}V`d3R!VY~Am z*uUuY>I=2+3hacT!W^N+wwG`ul|P_#op6Wnin;7&t&PR@pK=?kAS+TQ)2dK{o#V(2 z<>qi_aTjp6aCdTl<<_tNI&zVF!&+UC?@4u4g&d|2mCD_RxDRt5L*)wh$?j7j&2XRR zz8L&zRb6^qPqt_)8|XMpeVfXv74AK3`q-=y4zSrRJSyb#eiFWIGt%a&u)=1N&2*bt zHdR6n^NG!8HecBAtt3{VRvgBTx5nmcn=LjGR^Qujn7uaqs>48z6~F2v_)oykz;)nP z;13&46^G$hbwc@LB#3c zA>LX*Y6HC0WScu%rwei=K;I@i&6>VwaUe9q%w9)Bo&dZLyo0h2!RG?+LS6{|x!Gu z?r*?3*jxu5L;js`L|d1lY5(Iz$>zF7JWtE59UY4|%InpttN-6gj=b0Rl_zQ`dPCGs zBzqk6IR9}+Q5W`z|AUBoi28^Ih~5^B6jji=cikt6_Pb9N&G^Tj*j^lbR-0;SC018+ zkZ3G;cC==rIUTK8^UNY|toaaW<1;pW`0T^F5~5>EP>)+}PERd0;Tb8dFstz3v%JDq zg>ys)&HmXEVpmmICzJfa8a9f9HOFgyK(bKQsxfMr8S~BahV*)od90R+R@RQ%F!wd0 z&?lQjeosik+6p{)^56-_?r~KmO@v#(YAksUL-KWYWc{_VbFg!>^R!dgxtiyT8axRo ztT%rKKpx9uXYC}jeg%(f?P?ujr?HE)i>|V%)KpHbJXrZv<*3TcNkuc!6Lq2U-<#^XEMtG%EHd8c_l^RDvZ?H-tW`q<2$nzu2fF7 z%e3P#%>fSM!EZ@9k=xo8*>xwmR~!wd%8Sph>TUOeJySPlXxp#o?*%`vQLkSWzpCuT zh%BD4G7quiSB;<+{Hk$wOqCRUmS^MA1V{k{F2+B3S&maH3WvPfI#T4K>!)*W_bB3jmNOU+)$uGQ|< z9@Ua+uWFxaX|=rCuQ~upSsh#*TFqg?0Clw%l+@xdevtmprn+9RQe1UHbrPZ=dq_(< zZFj-$n%!-?dv;3mIc6#Od#*=~D1JnBRCP=>>pP`7t(sgb$ogK_IxCYg`0He<9ijW8 zN>PnS;PKb?|NGKMc5(b?wQEb`?IrPf_8caSU)NtE5nGFf^xu-lzLk#5-;V~Vkx@G$ zjko>RTFY#;w8&m3EtEt@O6r4+^RK=BSEZKGVL6H7MbED`N|t&Sk98w&(GcIPY~dtl zsrTxfT?by>{;w_B<5Cx6L#b}25WU5gjetGpSbMgFJ*pNDf8FjsFU{cFV2;g;1Yk`TG+Uq$~q^Ra$3uPW!zC|lHvmGu2(sj+?&+c!Mk^5{Z+^sMLUWnTU_ z`qzp3zm23FKaidQDtlccGX!tO)Xu2B9s~cAaq59N8U5T@_h0m>E>hyH7k>bq`-%2O z`|L(~_Tstn?_(H@7V~R+_WZnCTK#qHSpNIA{+S1Mo(J;>k`oC zqpg4B|5;oA*4nw6#A1n(^)`wBAw8O9_CxGP{2w}+&b3FYVJvJ!v-smMYHI7r_A~4o zEC08n&{+N9wdddA>ce!D8e08W&8CLh|6OiOZ9KnL|Jq1A531SpcM(kRm{RNM?`;37 z{2qCR&`2UXi^lYSuOsyc)-U}d8^R6rd#1g zk8eG8dmQxOLqig&$*<`{pBj-fgzS`__r~T=KIfBY&(`WL)k-&Nr8~9K^B(UvD0$s+ zeBSFn$$!S5i!-f$dhvWC>l?dn)VHfIzgpy#9?v{DVu9F3Y%g{ayNShOZ?R15FAfr` z#2T?d94(F)CyR}ELorvJFK#JrBW^D)!rO|);@;wZ;&Sm2@o@1d@i_4W@nkWFnFd(W zjC!&~_uui#$E+_d-irPE^xw7nyr-(dszRpvRnzme|84$N+qKyJ+xA|+K6{t*`t@J0 z=BCQJ^r{}7-xs`UJ&WDHt#9%3_ngeVzqfjEr^D8H*;@K49_wsK46hnTU9H!V_P%88 ze;r@1T4xKri+@#X){fjO|Far@XZ`9k%JMA1!ineqI+_=wwX{ZhUYCfLwEe$t|HX*v zYnJjC+OOa*v0uYqY5x^}o&84sCVF+xI=9QU3WxckZw1`$ap;bC|jjf6?aOYyNs)@z4Fg?4JLx+LOMm#hkYp_UEWj^Uh~_BOyu3Fj|8|rcuX{#vPI5_dO|n+>Uo{7O5wH|k z1$+f;2EGG!0|$ZQz)!$c;5Xngz;WPLSpjxH8vM9|djWEw8S=s4dLRKX0XaYm;0@p{ zpbO9w=no77#sU+8slZ3T$G{?B8L%2y4}1fB2kZe3IUIL5?Qp^2szbDADBb(7zJB^-D?q8|#DCyPS?VopHM2bjRt@3-vEp|8C83 zwsm%M_H_<&);ZU&Ph_r*qfNYXx^upBTj$37``D;$x;mFS4{;vjJlT0BY_?R^M~Y3M z%|hqp_|4ol=Y7s6Uhsd`IVm=&wV~G zaItl{kelH7OD>0Ta`_{d^!aISR!)Lvnr9Z!%(Kvw->jWy(`Fn-J?CIIx)7wSi65^sqDTmRy#JZ6B3C%ho_P0Gpd%owH>XPY_?~>81jY|iYt}eZ4 zd5dQKb9Pov_go{I>q+Xp+3XX~F7Qz9(x=(rW<7I8HhVkg6VGAppP{tKy|HB3Qyr zirnj3YFx7N8cSDrTQ3JM7cY^Qr7_K){ zMOWPOYOhwaB>UYze#vzs!mEC~e=n1jom`_WD?EF>COJ!8i^N)yHJeD5E54=FV*Nai z^GfqNC&@unExg|J!W)E4CogjKWm7RmXDqn};~j^ol3q0SS&}lZc6g(#9ewX%pw}?3 zvCu0q^6#1XyOMXC4Z!t}!wkdK`4`EVDqO?er+9tfHPdS@9q&)PmUwN))pv>4a_W;r zHJDz@$yL98jH|sicy06A=XK2MjMpWvoA`BUr5DHB#@oqT>@D*S@*XP*^VWOEd2f`Y zcxQO$dB5pBRn*bDoA)HqIry7|=>NLh+*Vaxo2A}$GKcvAPqYub1eHQyH z$8W|r_-ysr;j_=@n9muXOFlR88}dpYj<1ccldsrU<{RXz@s0LP_RaLo_if`_9`ilpd&&2vZ%=o!B9Xg=UvQ_;@a|%&JG-Wl zRh!%wbQDd+d$LN;*QsMwyPnmJ)61lLzAr2Juk}^FT&cD6b-g;hcFl$@{*(9g{<&?# zwq7HLYk)}VDfMN0ORflJER)!HE|#2;m^G{(S1dNYA=)TWNCTx7>;G%KHX<6AId*p6 zKx1lH{-5S1V8{Je>D8YOzRZhd|MR;1Wqw~xA0mA;iu42NOzB+deCemsCDP^6)zWp+ z4bsihZPFdmJ<_t9)*zk zrpP{!&6LfR&6j;DTOwO7TP<5B+aTL4+a}u~+ao(5J0d$FJ0&|SyC}ONyCJ(RyDPgd zdn9`ztC8{KR&rapgWN?fl6%U1RaxU8Y_EU z-B)sY}Q*LMXoTqg8pn_NE=Y;#>I*oEE83c&%_HG*TVUkOgt+skYeoOiwA`kmmW z>s{Bqf`_h8T#pJkZdPum1@>+(ZuKMjD_LA?>G;&~>2$>`#m@p?xA_fXzgipnF9LtJ zUj-pO4+N2Jj|K5=PX(!Nj4;!UFKq5+Eo|v#Cv5BHC@gYw6?TVj4`FXNFJYOR zR5+xzCA@Vl9N{+3EmSzsZJL{2IMZ#OTb%Gyw`Fc=!qsl;>)E|5@*H8Fuu%90e(|=M zT5osT<95jH1eMzf&$xZoApVzm`FHv+6sr^i3cpkgE*w_)wc;Daw~CR4V+-FcoK!fq z@Wa9#irtF+io=Q%iXRm}DK023E3PYUDSlJ@q4-l#rI=Otap8i(&kC0ot|(kn_*LP? z!Yzg0749tDTX?YWXyFfqrwh*&{#G5A+ZBkMy6}Y+19-%?>rY*z8_2UURqRfz6|vXE$%(ytMhK z<{va)(tLY!4wLAg?%&+MwSNcy?*66zgZxMMkN2PAKhuA{{}TVz{u}(a`S0;R;(yBj zqW=y5yZ(>-Yy7PO90EiEz5#&&;Q^5Wi2>;W%>!BobO`7i5G(5*@O<1o1Ihx11dIq6 z7cenkTENVJc>$jWEDKm2us&dO!1jPW0beM-RD6wNN{-@>^!U{sy`#-fH~+c$FU@~% z{ zHkX|bxD;?L;AX&`fO`QC11bZa1#klQNCkm5f%bt;fo_4~K<_|VAbV{51A_upfto-= zV02)7U~-@_Ff%YWupqEaV28l2fhB?c0tW>S4;&LXA#h6IjKH~p3j>z~t_)lkxG8X3 z;I6;}fyV+*1)dMQ7I-J{L10xNS81(uQo1QUl-^32(od;WhAP!cy)sG}r%Y0&DKnHg z$~Ws$P0vRK($*-tq@IaoPNIZ`=RIYBvDIZZi3Ia@hTxlp-SxlFlIxmLMe zxkh=R$2u)1i1&vg7)N{$h(|(H!mcJ zmv0F2&W{bs2vX%o=NII+40axOwoy7B&SfgJlJ826qlF4lWHY4;~sk zDtLVGls489S3H~3L-O|VsnLx_8bPl#VgaELY} zDkLGq7?K-O7}6Hs%IF@_JESaRSjgy*cSGI}`7mTo$byj1LssDH7vF~L4A~!YG~~yS zb0L>QehK+KVCeDC)A(+})zDj^zlZ)A`V`=*tW{0%RRjqjRRyS2sz_Cw zDn*r{Dp0+t>Zt05?;-S84OS(DzM~qedQUY~HA}T1s&7@hRR>kaRi{-K zRM%B^RS#89RGcuYF#9l;>YZ=x)tZP{Bu=23sVdKIkhs_9^ z7q&QTW!U<#tzo;u4u$;?_CMXn<2^CU`P|r!mX_7K1NkShtQ4OQBR(zu|5yA!YvZr_ zN$x8Cs{B7FtN--cJXU6}mtLF$Bpw>kLy1MA@%@81TZ-1=?}69CD&gT~*qyLv5`6LE zW&NOS4SDSC`LNE0jp4tgqRS0PCH4vWt9X3f zzP*a?SFs`IIvEr8ul5bkOE&kvUkxmAzW!J|3)}ZtB%RjSlZ0@KcftkXHsSX8LX2Cu zINUp27VaM&6t2SeUJT*U;qmzDi!nSiJQv@7X&K%oygj}E(>1&}yf?lBQyxAfd^o-a zGcJ5W_+)$&W=8nz@Oj}2!xx7y3tx%v!>kYA6uuQ-iP;stFZ>X`6>~EDO!#?xG3HwM z&G0+;Zp_2*%J66SdW=AAqqfI4WZcwZwKu*bYt{$Quj&C82Q%_J& z#ut%hsAsF^;k!tS)yvc?@pYv2>P_mc_(sw$^*;3>d@1Rq`i%NKzL#`OeN%l0Url5vtDc1DX^wX4UhG>RsMrp=rCTJ#WrfFtqW^2Z&=V=yd7HgJiR%+I2)@wFtwraL( zc4_u$4rz{QPHN6*&TB4du4!&+*2(W^?r9!sDmBkE9IZfWqqWyMY2CD9t+!UD_1Er^ z25D7Vjn<%z*2Zg-wMOkwX{I(;o3CxDZKG|kEz)+?7HfNJ`)SLyL$t%SqqO6+6SR}H z)3h_Rv$gZI*E9>Yi?z$NE46F2>$RJ-TeXWd+qJv2`?QC&$FwK4XSCoAQI~I_y zYpH9aYp*NPb=4K?dh7b>R%*(1Lv+J+qjckR6Lgbx({wX*vvu=y3w4Wid*sV>D|Ksi z>vfxSTXoxYyL9_>hjhnuCv|6ZJo$OuCEYdMP2C;cJ>5fHrS6%IqZjCH^!9ouy_;UF zFO_@ihstGoe|?ai%rh0JhDru~v_4*+tT*a2^||_deM`NsoWrz1S$kSur0+^O_6z#n z`hNOy{Sf_d{V4r7{RI7F{WSdy{cQa_{X+d>{WASZ{T1n2{d)Z-{Z{>U{Vx4J{UQA^ z{Ym{9{dxT*{Wbkf{k4EQ`g{6^`bz!7fMKchZnIBV!GyQwkg zE*h>F&ggF#&g*X*?i%hJqV6yX`+8)0uyLxeB8J0lJvz0C88h%*rv zBd$fmVluMKd$rH&JNr4oI zgxlVzaC6c{u_-c&O|emIN{mX3N;M~Sfx5sH#ir~iHZ_ZC78O+xRq(PAZK#JKO|7F^ zN3})jfK(&u9Mw6hJ5mW!sX3KJl|>~LBo$;K6(V&&Dn=?p8cx&r0+I$rv1wYt+=9i( zuPPWCH7aU+)a0lSqGmTJ}ds2fptqV7jkM%6?K zqHUv{qD9f((TeDx=n#UBzw2di>=^oQNrYvS~%sVk-W8RCI8Z#?qLCn&aH8C4wHWlnZKMv8J zY>(L!Gdb!|4C%>Fxwx>(DI zSVki{mW)M0tYsvOu_ThoQL@;zQKa?kSW$F-Z0p$ev7}Eoqct(nh};<4DYj>9|Jb3i zBV)(MPKx~?c2?~C*w12@$F7Op5W6LIN9^9%Be6fko{jxE_D1ZlvG-#uV{70=5N8|b z6eo&%-Fg;3`G_Yynw>AKj?5CTIIp;dbNt1eA}$0;7Z)3s8kZec5Z4x|Q(Q@0S=`XL zF>w=-K8Tx(^eNKvxOGV1#O**jfOI16EYcOE+i`!yJ&EJR^WtseUE(G2iue#DU3_eO zYJ66FL3~@J&PXLlW${DfN5@Z${~&&D{HO6GqUG`H;y1_dh(8d20_kl0<@nq2_mQgN zxe2xj&Iz6g@`R8CZ9;59YC?8Gi-b25IwkZ>=%4U*!svwe5~e52Nm!JiLrY&Ie3`I0 z;roOG2`7Nf2|p!VPPmouM?w|yPY^vfu}R{KBk=Du&WVylMWQM(GBG(ZJF#VAhs5H< zeu*;^mzi6#>@8o$*ClR9+?=>AahYgG;-168dB2NS(8Zr2Vyc zKWv$38`|HIc%p$HQIcnpZ<4}Gkra>=oD__mdU%p9DKaURdLtealG2j0kjn#FCACZH zl++`sPtt&-x06ODRU}PH`XK36!L5QnkgAZlh1|j>=JZk0M@e(dX@1iDq)*LhNfMiu zC$VXD5}Vc~v1vonhNR6%+mLo3?J=hVNo+cj#HJHTB%Ly+vq@~an8cnVU?m1HxoBxfi+@`XOnNnj|+#CSyb1zub}Nlq{lS z=9%P{OpXy5&vl84q_v*nD~CowkZb>4GvND9Ys_D(*VIQO;Qlew5E zOTLompB$8|O5U7E=Ff&iP4Zr)AvroZJ~=trn4FoMo1CBAGPzCi|LRG9`{dsWAK$PVSdnjw3oGd06tOdGg7ou2nvm$yV) zr(q?s*05#lZw^TQc^eMXQFzg$JxA@kJVJ=~pn8jaqP-&!_SkM;bqaFH<8J(t@3wWTGJ7kyd2(f2Z57vWOc z>(=JhEWZ|K-*G0CZR2@rS8ex_64vr%_1Mx6lRrsbn*4S0&gA3CKPRuLSwr8Kw$#~) zJW0==cXd}q;-&7GG(J;_&2Pz%lesC5DLyHo^m!E9dSkTiy+nMQvEHwqUjAMp-bco} zjKmlD)f3)P#k=Q(PsMxIq-B=y=7g^+;NlQEIFR~hM zhrACz0PkE|ez`?{w?*nb9}jC+*YAe$Sl+*kO-ZC9my*K%cFUBKozg7DfL}!vq_j>M zTfVK1`>`=A@xDc!~{>Hv0LOR&4zjQ!GC?2E>v zBu0%-nTRnX^*Kxpz+qN{Hddd*SnA`;$FR9Vtw{}1mrZP4eEAp}wrzYb8Euog#9|$& zrEX#rhk3DWHg*ovF!qLRv$1oS@rc~g_nM@-zLWMF)_<|@mWW>*x%#!Iq|8W}o3b!v zNy^HUbt#)twx#S!IY8$aTfecloR6iP!*3f?RHtD_ex>pJ8GRw;&(NzWE8K6T{87)7 zB_4dQJEb~>pUUQ&n0Z}kL&-7KJvB<%P**<}F7r;6rN$LzOZ`)WQup|&QZ=cDRC`@? zYJ6&PsxdV)H8*veEI+knYMa#dsYR(>Q+0Tkr#RI=aGhUJVDHrV@_wo1sY6nSr;bX! z6wq2WE_Fic0En%ZLpL*#zP5tQb1?)zC*;C)* zTI$WzJE`|lAEl1cKS`}g<)vArjnmtvIi$IyP0)+eywhZ9{%Ij;;c3aKy0qxD__X9S zV_J7ZW?GrFd0IhQ>$J9M6H+^*bxs>#y~m$o5obK17F9cg>g4j_#;97#Kob}H>`+C2Tmv@2;h z(w^yVr`=7vpY|y2Nm@-B&uC?|H98sHjAEmIs<%;Q^f!hWHAaIm+8A$4HX4nY#$01Q zQcGi7V+Z46O=n|wV{cWkI6rhHRNQyWuzQ<15wsl-%jDl-i- z4Kcz(vG8A~#jXROWe)_6v&&)Aevso$2dD~vj=9vYVZ8FK;>_ilH;ijD z*Jp0Z+?u&Pb64iR%tM*SGEZin$vmI=P;({oM&|9zyP5YhA7us^o@CZ!^0KV5Y_puQ zL|NWhvMm3spe$9EE-NxCJ}WuPn3bKCpVczUM$;y%eO6Ic*R0~K-dT|m{j$olhGY%T z8kIFJYeLrKtTOquteIJJv*u@gnzbZrdDhyj4OyGBwq<2UG>@1f-;uR1>rmFQtdm)1 zvMy#_%et9$C+l9;qpX66Cs~|qt8CkBhisQ@akh81EZaXjC|i}S$u?xiW+!H+W~XQ8 zX6I+O%x;t2KD%>vadz+Qe%a;OL$gO@mqe6AjLV*oJtg~t?3vkfv*%}jn!O}@d3I^U z+U)h&o3giNZ_nP9eIWZt_Q~wC+07M~vO6fQW#7!clYK9{OhNwMhbtQX^{qIU*0ZZ7eVEs*!h~3|kT=CS3Y#H@1o+58 z3@zf?30onC(0E388ZrEY7|tPv7VyyuF}#WPFQWbFXnz6v?2Pu^QTLEoneH(2Jg#Mklroz#2riu#m z{SVZAmn#%r#`wGkA5&rfKI|vK{v+5=hs{;Qb{(cmi}DQ^ zr*B~MJ#2O$jy-WXLM^{p#kbJ?h_MU9IP8c1C+H*i&4jxV<7LEnf%F40K0u6rBE}OK zzf*|uCyZYu;ya9abAjtFyo?x^BF12}9flZ-5aa&%4B_>-JmGct`VGhOR-8~+g|VrI zPi{O0CY~?6h4!1qI|`e?k2>B|;T(^-6z^Ce!Ew6|n<}*X8%+Pe-08+|D)fPEHQN0R z_FCBAg3SZ;O^$kx5yMmTAt1g}MKGz$v#!|9<5pDjJSr~m7~#(eT;Us5*1}GScEXN{ z0YXlKQpigfThS{K#{p$;S_KGY3CapZ0weqd`F6-B!zL9rlVI~JbZ?>lBGg}y=qQ|l z@*}y@|?-{m?&veh%t=k9yys-b~mmM7>W@?;y$_qx@s& zwnMiAx?f?p$i}haa~p2O=hlpHfwi4*74nO~$AW(Yd~3r9zp=I!?zXWL_P2Jd*bm!{ z;D>Fjg%_aPV(lorY~x;W9k#cCKY*WrRn|TgzrpTn$UoYAZkJ}a6#t9t7n8E=mQKn5 za)7b+ODBB}jI79Tc&cj?ja#{7kESF*KAGxo<|ID)Lqgn2&W-3L)L?4P) z&736~HuEFVidj=VzL-5yJXSnI{Dt^^@rv0~#ls{YN>z}5 z0S_Pp$N@$J?*i`w(}ACWoxsn)W#AX!7Vtap2f!HnR&asufbRiopb6jzI0HL@-M|~b zn?O6@E#L>>N8mK@lTmv~S){!*tE2YP$G`&MGhivO0$2ll1#ASi0N(*Sf&IW?;CM%E z(a#Uj5D?Xuiq?J~D?)7ri zWrQ11zJ_NlT!D4tE84aY+D`EE7X0*tpKkEemEl7h9LKE)qdrJ(5ptg+SH^I7O)K_O z$qv$CNadv6ia)5t=Q&pV8OJ?c%6OvIH}L#5JbxeWEc_O0+U|Jn>2g}~BT9~=!eHYd2 zgr*}jhZDKXZoBX8zL@=yn9H28KVtvk+;2R&=Xx;;c<3L;J=dQ~-#{vZ)U@I_q#{Us z;hXVx6@B8l=em+y#ru#-sq`hrWi8JI?eejvSz|4;OCTCoXmHsUHi4!oH0!C2Ujny6 zNpqw;0J&h~Hqf#(SZBaGhiXW<37RZunnAOP+H`@Y2Q6!wEb(Zy9d)d4XIaSwZ4nzfMP>JM~rqdYhAqX{n|$SfgXSbSq+HAHcEFRa|b|*o1o9T~s z`|VQc8cMV#`viOVqSR#H+Ma83#(uy3Ir~)bBap}%dlA&%G0c(62s{m*T*iu08%k{{ zwWrjH(ilqPC{3U=kK%9OeBzsLviu z7-|+XYTjK$K(4_gf)a8roGYOblpumgpVaH*`hw$+>kJ|SMT^&a({rXb_1&BL?nC81 zv}PY_*$0-EInSw;ta-Wg9L3eQRw6C;r5^jz$oqq~Ma$%RMq2Jq{NWl6E$WhMwH2?3 zN_LPs!;)O@NS_DLCsKm#)#l1d|tg7b* zlG@h?(e-^0jcO1XH)zO_8$?S76Z!gJsvkn-x5=t^?rqxg+qC5#H0lB7x@4^!LRZWp z#)fwpV@`HZoLY&gmFh})yoKl+xq@>MDv^Y!VUbAqtj?xZs;fcfL07ayD35mw?UCIV zr&eNWC7W7_$J>PV$R2@Q&x%QH%n&jfrH~)M2ic)qb>fq>*_R7)T8hT_%ZHvUzN_C|?qC+iO8zYQ0@Z1pUV+ig2 z5cHn$O=WjEJPwtxE2!0RYIU4iT}rJkh3AMGboaw^q7tcV(Q#^Z9G(=VbSK1oZOy$8;9!lk*R5nq09;M4ET}kO`O4m}lj?(p%ZlH7%rJE_; zO6fLAw^O>4(%qEqrSyoIlBkYT`8aL;B$ZE6`81VJQ~4~F&r1eu)3eX1iQgW4NaNU8#hor@mXC2L96=b$xZy+T* z5k><&?z+L#qjSQVOfiFd4yG5g0dxx&cNi$+Fa=!vPm(|@pfzxw zgI@)Jw*@)^oq=MY2T%(12g-qwz!>0N;5}d>Fr7>MlUDjr+J(Wr1v3L4KLNUi;!Sbz zZr~%qC!oe8pa*Qaf%gQ*oehI4IfFYB^qgE7rSzIDCD&v0SxGfKN^Q}9+&M9Wff?N2 znNbY(-^?b&Af-|)^!Ir@b}Uh{0r{ce!+_yH3w)K0{GUxNfmXm9KpWsqpe@jW&ttz&b{bZYl8r3)$joYLi#PDOqEFEJ|o{R7|vJb(`f z03l!nSOYdd6TlX*1MGpOfCJzNI04Rp3*ZX40q%eZ@BqYs1n>mB0B^tt@CBrR43Gl~ zzz^^T0s$4E#WB+Z2|zNC0%QT5f$YyY%uz>ti4r&toB)0RP69syr-0MI8Q>@2EN~7u z4_p8)0zU(nfXl!Y;3{wpxDMO^egSR*w}9KgufQGPH{dStJ8%#91Go=703HH=0*`>l zKqXKGJOiqM8UXjcOedfVfM*{J?vQaVa~RwKFxcJWT*G+a{K3AQ835o(2a|_wvF--M}7TI}ndCY5_Yu0byE#{|?>_=m5D0=nC`%<^jEd`9MFQ zFHi;y00sl2f#qn+3+<9~e=73RfQ?vxikNS4Zm#8RqGzuQT4kja^ErwuJhK6%^v|-SOA@L|BIjvMH)e#_?L?l9?0TLO$xsFzf|BVQ}(*p(|0*C~nfM_5DaF%0K!IeM|>i5TL z%flFiV~kmjm6=&XbDg0jt2p81R7=J_kVDo{Ji}u8L05)j&BO8SiZzc5PL4dDDKPl9 z4s#4R4xAA1aMX(Eib(#yTqikuWr##ga)pRS4+z&FdRC(Y*Fr-$DKVJygp+Fs;d(PC zij*bE!Xa)xi@mO7VOSd?4sRsv{K7#b}O5q;xRoU~nF#gsb5V zS3V{L-;3f=8Y0K>l!qZlXc)%XT8))QOmgTzecmWIx8Eu}h2^^_VYji5A=(kM!!LCNt}n>ndZ^27t-#0TNT3*p2M z;lvZ+#24Yj8{w=!5)H{KF$3AL>tyDg&Af}5cQx~FX5QV*dzg8#nfEmF5;N~*=Dp3l zkC~U6d0#W{XXgFQyv)o8n0dLG4>a>ZW;C%C(BeO3CDIVh;kHn)CInMdu zD4c60SmnsKQ-^~m0!hH360DT-aplA}n3X^TkO;H^#sQOn1;95zVm?+`pad8Vd;}~9 zeg+-_ObfhI2UG%X_+6R=Aiqyb2HF9|z-ZuWU@ve5IMoVQ&epgZy@4x18(f(IHy{uw z1M=EpCk8wQ9NOW^3Pb^YfPTQ)_PD|U_ke1k|65qqff2w1pc=64fU7x>2NVHizz)E= z6Z#8e1L0k zxY7gnftVqXJ z6tEkOoetm&^a1(wx9&;Wwh2fPVf`xrZvPcSCHVqi0{ z12_a611f=MK-2k{SAZ0V1+st^z+~WKU;&VZIc_ZAT1y#nKP+W%PsqHheh+k=dM~aC zj1yu7xBw5}0|Gz-2mvd=8jt}tKoh_gumkLYrho(B2si=GfGgkz>;d)y2Z6)D3E&iP z2JiyT0~Y~rzz2{5azFtD0zp6s5DKV(Fd!Vz0uewGkPhSk_kcfu)&RcZ&$I!?0^@*p zfoZ^W;6vaOU?E_GHRWt8^a)%A*aBM2bul;x2x^0Kpfy>o87FqN=9)Rr%mrpHG;=F6 zw=r{DGq*GIre^M7=8k6WWaiFh&NO4&n`GuTW`4F6tN+lP@14))dFFhHIp5x#PcC4~ zt-G_lq%X_$y;!a^>yJ%i^S-TFeyuCZgL|;t#%z}`gw5L*vV2H?mM@&d^7iKPuUfGA zE9N+cneDr^HR}hmoYS1;d(8C<&GGg&`*AXt-x|f*-3R9|&0FFQ^(}m*JDCST^)3UlDaxtSBJWoF_;Ox z^Ko{7w+DiIVCG;IP8fnS0KY*Sf>m@OR+je2f7JqKHh3gvQ88AMVASi2Gw_#QIIFM< zI5o#A09|LS)YoyfxR0_}U}js~O8{2}V&=hS5BwEE@70FHLSom^*mX2^9gSU2W7pHz z^)z-pja^S;*VBIMXRkMuJcsrFu#Yltxe*Nof?N(V%26qc(F= zpX6B&J#g2>!?EZOj;8|Hv+}SHA3*QP7Gb6N4EP*a3VZ=92UY^BfwjOoU_Gz_*aU0_ zwgTIL?Z8f8H$Yac{lEd>AaDpc3>*QD11Evgz**n|a1r#2GzFhs8`W7sfgon|3O8u{c&3^b;5C5yF{dyV`+v;X>e1z|X|GnT`jND;(I81%=FlL{rcf!L- z^{2E=9;BPF6JP{atG7W%dU_gkC+eO>2@jOj5>EPkfwq1JB}5|j7vTK}<+tHQfc$;+ z2K5bUeG!^FkhnN5WK1l5<>9!ney^(+QOjF2#@n#Gg_yWFK4dJ}(coeZZt6>R^@M6E z)lsUa)IezjrID0IQ5p@(&R~|4`s@s5Iq^aA#0%lX58=cU;lvl=#2ewPKeDSQd9ntv zoJ2(OtUt06k$exVRz)~Bx=>n#`&b^HS`^`o;o=^a-1YUv{Vgdc_pyYM=aht#HK~~1 z_m;io*on)ix=kv?Gq*OKOSnwaZkv1Yu_`%qkTg#iLVS=m`cS%sN95jAE-}+iR6}TY zv`+4*i*TOu@vMZEh=*>}1EE`ZEG6f3Z+d2zP}&2}E_kTJ^4@0N-OLA2PHK}o@!s@) zybGlzly-xT#7O8C9-+Od+?mo6P#)Eg{u3Qpnc2}GWkgHt$sIWBo)05dLbvb;?M>yr^oaGLbPJCh zz1~zVF&|01KZ06>Zs8Hyo603-x)0AzdgJK?AMJ)gBF|hp(We-l(I+m@1@U)AL}a{4 zKTD}i7g&)aho`?7V{&xKtRl0c4`>Mzp#wmB!7HJCP`ejulOxy*&kV>I6hk7s6rA+1 z2ioduj)PoN`_j67>C7xMQ_{mhbjA!sROAR1QBKanYhB4%NT`ldJ*5UpBPfleG>Xz_ zP;&OD&79OHdE$X^;)8JFg>d4BaN>z@;)`(Njd0cZL?&9nZ<&Vb~}-hkz-zwYQSd6u4o=YT`J@m!sYog^?GC|Hi~yg*mL!OnFFcHq!0 zaKXL+SOk0udw39uAc27CdK|C3||uo74WtOnKqYk@C;b--7^df;n-{C^}H zfla_Sz-C|zK>p8>Z-H&VcffYwdte8!6W9gp2KE4ZfqlS!-~ezCI0PI9$p2Mx6gUPP z2TlM#04ISTfm6U~;0*8+K>nwabHI7v0&o%d8Mp*o2Ce{Cfos5Z;0Evua1*!%+y;IH z?f}05cYzG-fga$lcQ5#{KG>OK?{%#JcU{<(kH-!$1N_e-?6a_E8aD!Wx40YpW<2gT zaVI!sDu?0r#GPYr#Ml=xmElR_SUjJdhU@xlJOzNSa`@t7ALfbun+kP(a7UGgJ=yJ2 zjKetGDdH}2Unlf)6z)FZb3b;RBJBHm!sk=eIgBcux8cfb=`+%3N|5_gsG zvmdtKqyLXSLL4aX)DCxiXyYfu;D9*8;Un@LJV8MDal|kY_0A!lgMb(AzQ0BJJ+x7d z@{tp9T+yFuv=xp1-@ty;xg*9BW78M=RT0MH3EJC+J}pBW_OLnF3wO?V`XGm&9VmYb zF>nUq4jE(l1^j%2@p~6G)$n_5H2TyTPiWw473vlvhJC;&?3Zt#Po*e7jygxC;b{$G zO~KyzSB&W{_^(D^f-xp5XJCABS3DEP;xMqg8}7c*MyDxwssP)mINl7}nuD=cpiX8# zJT<^mi~Xqk05O)L{{gW54e=>(e6ldMBDD1>uodm?#a#Fv$8sNhJwzXeBHmT-pMdzv zF%DM|cV#Q|t3AdKWAi0+#qd7|^UDO^`stW+7|RSCPmfs`@8+nFw!cE3HbQ?1bt>TF zr#!@f@lC|>nvMP)Mw}@FF>espFvRsP`V|KI35aVb=G1*48OQYFKDf6)du=d=B{&w2 z=+j$hCk%Za4EqrH6(hD=unk5(C!!B&7!wi3xB}%t@aKSjb|v!!Z8U@5r6{|GVK#R#vtU<+`Tr@(T7QhEb2tt&;pYI_`~~NNbSUnm zF<#&GN8d1>Z(v@1hdxJOp2eVjE8ugC?+L{GBkaau9DGpsE5s9vm~-Lx1j>{0QLiPQ zcwkNqK-mT4gE8Lw2jWQx#`GKj_(710L^jCE(6mL z*Eo#dAjB`h7|BcV6bfS$(jPL$Y1&}SLBy8<oNOoyS;RL0@Xn_gM6?yd#&%JN?@Q%skW# zz=*F$?|J|u(OV(I-zHn^h%I=zX}!Vx)+BX9;N#td2soW=+= zL(i>&j({FVZzs+UFPv#DfL0g{Pt1Z=z*&sM0l=p(u4GsZ3>b~oKtGI#upH3>-UG2} z0%w5-Ko;zhfP)yZwHTG5IGSxSyAv=XEiiNL0z<}QlrRz%z&0G^1Hc{NJ&Z~eMx#HD z?(AuJ7CHU@>+Rg5tE%oiemEi5v{2w8&|0bl5Qs?#7xI9hrQzn@8xjzKprAZ75CVvV zS1J|}0*E@JIDkdLQK@JPECU9pKm-*<2nu3&=@EHZXl$j`w#@2OhgIkE{hfR7EdQCc z=AU7$@9(vL`+fG#+57H&U=%$~+O$k7TBiNKyH1%`c1AP;9p;@nPha0mOBKh4sxV-q&F<1wShjh--i(&#Cp9~t$Geq!{r(NB$@F?!DE1*4xCy=e53 z(PpEc8~uY(mBsD&h3K`XQX5j;A0A763;LmHMcopmb+raB!4|p5A366s;pb;DdhrnTQ0K5m9Kq)xF z351M5w$a{3bBsofmKx=dOn}4zSN6a%~ zo)PnmSeg;@iI`8+e4^$PHJ_;YM9n8^J~8u&nNQ4oV&)SwA8t^RZ;APon7zd8C1x)( zdzr;8v$$pEQ)WJ8^64H5%!c+gS_)0+n(hn>j5L00oDYxl+v0qLc=vQ?q+;&yqu0;XA21NnC21Wd@DkDjuzqLbuYpX(&3r!wq@<5YwP0lqr&*VIl zdz;+bz6$1mprRcp4B?nXui<`(ayaBL%GnTF&H(j zQ6ydCNL-^wTu+kEd&hYWT(P9fyDZL2;TmW1*QistxNrCUHTL{)J&*hnkC*&34yCK) zK3AQk>*@8m>>8s=Poq@2#-`$nCv?TrIF+uEn&673(JEbBe&Vug%*w9Osr1C9Ym_Cp z{H4oZV^HbRc`5O zH}QPt)Z_55@s>_6c!ZHGsW_ndrX^{q&HEt9TP5QxN+35d1g#|{)eNf`L$GP)^;0bk zv^W3K0OuXCE7kO*cY4x0jj~4w5tW}(>Kiy>GQY7oyn%*THG_f3UBS;lC-fhJ4(Y5Y zz}?s$1P_4@*gJxDAPBw>T7lN!2cQjT3&!C)0dxmFK_=)8ZUKY9cyJ@=0g4>AbGkE> zd7_AU!u^S6Zuk0h=hlFGL%K5@s+rI2lJ49F)$He{r#mB{ngLyAdS*dpM`*WnXB4z3 zFp77BuAmpl05^e5kOi_qZ;%7}fDq^l`hi@K2l7Dy=nn>fLNE{v24PSPA|MK4pacv7 z`a7n!X01TYdVxM*B3K9(f#o;xb_D*5R(cV<1bz)Rg3aI}_&xX>><3rD6|jN0FM&Gn z8}MteIi23?!5XtGLf z1H1{iH|FdI?*eX*IfuYC-Dm-D7P*>Me-X^04|L{6G#lF2Xg{O5M)Qp38!a%}-{=6N zh0qk9^m#n#+W+Kc47;V;3!TRft-n|5?~H2Kb|(9@qLoIquU~1Wd>;%ACH9Vxe&ZU$ z>gxz=(I2xKyU0rCWmafzhgl_UW=;4PR&j-_(L1mP zUBl|^_Q^b9E2%ZS9DNCE&IPQj((tKeb(YI2Y%cM4vI4wymFkc^wPQ&9d&7PHM4bQ2 zIA3p=?ep?UKL7ZBpPwD&^D$F2+Ml6(Us3*_Qtkm@@Nm~D^19J?_plD#1Kbgm1grxe zf*1056ZHnN>KaYGD#84bviyWN4SAf_G;Ju_Glabij*xC0)=KX@))Z=S9>1I6F)$x= z%x5J{DFUDr+y&-=hrr@;PW$p7EU~XOoBaeV=gl&iRpDi{zk&d*^a23q|3fKYO0iDJ$AA*s0uxkOnq*soj{|ER1^5gJ+@V=mKtm{O*OJ~?qK>JS+ zpwGK$70&OvQEwA#(x~?Z<@^)2&aBLP0S~zf{$&B<9i;KR3>xP;G4BvXyj5A*huUnw!Z!Ll5Tn#MVFZzkzJ$?j1X z&|x|w8N39(0GsZW$4k`z(|g&cxP=t?EpNEDkvOAguor`6E_^-MV<9VO-x9CYS$Ta! zyy5q`&TZaa{NG}C?*lLubYiD!5SR`g0Ly2}>jruy53LXLLN&0}!P9t`!5;$i!4`gV9qjcW&FCEMNlhk==S+Hqm>F~FpSi9x z(o4f$i?+tJe9|7}b)dWp<}pq{-5~yQ!FR(;YIqL-%fb7g^UruE__^zpdj-T8WYTJO zZ-#j5$+zA83VD$f%e~#u(d_&191th1NZJ)VM|*!WdL1n`+RG)@4fE*>@HmJSQ7?RN z_pT@A@CSGTOzTN2-|jtY^Z@0*{0mxbF!f)+uHXXKxx-sXdUtpm3HuV885FjNc~VoB@J1G){ot?NCBKcd z)}7vK&|%(FMxVpK^G{;t`&3Fdb|LkAXIiGKax4;4{D@;A4D0gSTLR`)lNT9&?=u-c;K0V=U)D zevDRx@8R##L~kqQo9I>1es5xL1i$)M?GkPx-&gp%`!?vjf+v54tbHg`*lS7HVQhQB zU$BS0b68G3$q9b%I5n;C{!DtmTuDtqaFr}M)UER>{^G2rB)|g;!Pigm*X)=4`s;mz z$Kz}0vKpV8mMC^=MmKc+_2W$cE63)rXFJhr_|($kdY zrnQoGlEUxSGUA6~S;w0tcnACw93AR9RbGf(i=N|e0602~9ak{*dCl}kN#!5IB&Cw$ zG_MWM<}_~=`QC9WDZId6;Pt%WgHG%D1=@4Yed~m|wVIYv=`3o&-Vt}e^|gc%EyH%7 zf28rWlybQ4^pAn-R{wao?)6WA>t_E%crmy4>r%q@1%WMaeM4X?Twf7*6|V0H9D(cm z0UyBi1%ab*eM7+MQq23~wcK*xhAdJC-FN5$oQT;S-JnS7x-Y?P%%sk(3-r{au6q>R z*-Yx!a?i3M<(j14!Sr-;(v8^$-GoI_p55X;{!QA3l>rC&hjI8?Fs0lasoiXJu0N^PG~@{XDCZx}WFNr0(ZAo1An9 zwt?P}q_){{O zX7j%iu>jivYzs+2cWWEyX-P`;?`Bh~58$O+xC`O(`aREL1$W{&y;-NzgAqH&S-}a* z3}6Fk8t%B2ot%Fm7uf#*%{YW9Pcu0GT;namW|5_1TTOa0>a7FOa_An=fD}oDmg*ho) z5j@lo=A>{%u*2Rkr-dtmb^F4c6s`zH4una+BDk_K%xU3@V8)R!r-dtmHAlmo7_JEJ zYYKB}xFXp4M3|Gq6~VEm!kiwi2o5-Mp^o=r^W7D6DB;yZV;ePgRfAT+nqd7wrCc4{ z!2iajlXC2wa4PRZ?ypMI>9NyL-+CpaJFXiznRXa^y{u`ztUI@xI7jy?vEJj9S}ZN0 z=@xBWD8ebdb#_)QAzuld3vDHYvZU$WuEMwCC;2d`Y~}RZKDb&?w#%HLTZgWbef6AS zyP#a`OrP?WUQg`*kz+|&XZ3WVZ!hKkv&E988^F?ckmgxda*;Giy0^QFvvi5t`La^o zN9ukF8bXaBz7>LP7j-S>ETBqvEEMAepjyp8BX|sdUw=2$6pA9N^nb$hB%ZoMyn*)8 z$;6X*N>`~)lAh!pp%W%4Wu27NSx3KJ6Pj}Uh?H~>c|RrDi05hIsE6J_ikNgbbUZ2R z9OY@su$$UO)8u!RcOe-q??NH_GYr5}!CA2RPZq}D$PQdlmn4W=Rc8*4~Z&6);*~ytcSES3cEef7z(H9DQ#|E((I&&wlI1vbZK)MGrqHA>C!oE z=eC7EJg04qIC^bc&Z7ibQGhL=9&7{K!4B|S@EWK9lfYz938sK5FcnM#)4{!92DlH* z1l3>`m<{HDxnLfs0Y48qX-m@9aUgn0+Ou4Dto`=FSH+9Z&;Iby#^$P6Wvpsk`_@dT zjx)KdJALar`@a3^@Z7gh^{>dOeavzGndV&M>;Ip0t+T)CLhb)0#}vov@xMjsb;ey^ F|KE5!;~oG2 literal 0 HcmV?d00001 diff --git a/go/mysql/icuregex/internal/icudata/pnames.icu b/go/mysql/icuregex/internal/icudata/pnames.icu new file mode 100644 index 0000000000000000000000000000000000000000..58af6c0157ab926ca08b4a68709e8b13ad07eebb GIT binary patch literal 42682 zcma&P37i~Pd9PhvBg@9v&^?Prk{4N)WXrZJYu_x(ni**{l18JEW+YjX?RIr{O?ORq zSGB6TXEd?_W4yB%8w_5+K$T>+1OkMxC14|RAtZ3QNgxoG5FkL7n@dRY0XG4@=RH-` zT|JVMtDpXLw(6==XL--NoikTye|>GYCjWG;W*b$V*rh5RUalheWRIo|jcVF6&$n<> zbkE!pJ z(I|X23f~`vpNYb+N8!(+@PZ4%=U*L#Yol;m6y6bqUKHLRg>Q|*4@cpbqVQW$_^(ko zmqz&^-#;%3S4H9OD7-5Q>rwb<6h0S)zaNEPjlv&Cp?+cb+>4{IGYS_*;nh)iLlka` z!o5*gioyq?(2v3wqVRiBs9zMm=ao@-T@-GM!lO}Gi^7MZ@cmJ!r8R8<_!8-CO4ELN zv8J6O%rI#Fo_1M9_%Q~Wy-L&mgz!s*e?hoxt)~4u;Z8gA`v}Jf<$Wik@HxUq$R8Tkw09A{Ncd4gZ=a@p zh42G}e@Xc0e(Fqk@wld?=~L}0O-lib^n6KE-xTmVMBy)^@RGyf^RJFVtwue-%~ARr^j~Lqi$-&+_*I1eMm|lX z{z*Q=Nh3?6d$gBE5Iv38f1VxvkM5F=mGOUFPIM&fQ8^Pa5=aFTnQF| ztH5G#HCO_c0%@}2pa{xf3Y-G31^0pb!2{q+;LG3};G5vD!FRy-!1uuqz>mO>!B2to z^FM%}ft6r8pnJ7DK?%$NdG_PrIq*U7N$|(u&%xh-e*(V(b7`b5a22=_YyrE#I52<> z?gkHoCxH(>4E_-O3HS#1A@~{iHMm5leZci#GZ+F#zyxT3$3Or+0sb8PE%*oUe}T?; ze>qqPmVmWj2RHHjpMs4HkoaBeiTo>hGHANM&EP1Q29JSHgCBwa1g~UJT@6-)?O;E+ z6Ewhs;A!wX;8Wm#fUkoegZ}`RFc^EmRbVyf2ZulroC0qIe+Iq2Z17s}R`6c%Rq$Q# zYj6V-WjB}xZvh_yUj*L=DJIZrFbZ7oJopPRpGmbDfp8Dt>CNR=U^U#ViY_KJ_i02`~oat;;sU>g4@BX!S8@S1OEyZ zGO0I%VQ>eq!CSy5!QX%`Cj54ACpZBf1!uvZgP((Lh`|Q14-~-T-~-?+_&0DRgkm!o z24zqO4}mk_Ecgca-#~|$Yy`(a4ZH?C0sa7d4*U@OUvMb|=W=i@7y@?z2fP}*5u65} z0$&9eLYNkU4WJK?tzZ)P;5*>r ztC{OyIT!}V!8CXrd8^I_z0iFk+06ztn zENA?K4WJL$;LYH(;9r1t9qR;G32NZ&;57IY_%7(U9y$c}ffL{!@CbMc{1Ny^aN!Nm zE3g{$gS)`%z`Mckg0Fyo054ymY3smFPynw2Zv*cIKLe>7>0huLOo3;?cfqf~MJs7H zun$at$H4o+kAQv?>ki{1#plRBrmVJ$ydz)|oC1%6r@{NdXTVp%_rcGA@C2^}SAvz` zX0Qj`2?iKX@pI>L>+8sS6L=Q94}2WF03J%*M^&`%lJ`#_wVJsMZUDD}QBVNW;9>9{ z@GJnf!Bj)fe+pfeh+*Cd=~sA_;(;O#f7)>EU*}S2>KJ( zeGYZMnY=-85Zn&_GEx2_7(9a=-?61L0*JCVU(CJ@6^;Meud- zx8P^sH{gb~R7|5q=4LAN&mb7F@cEH5yz4R)d}3c2EcRgSUdy;19ry;LpG}zz@LB zK*tbt5?l_h1p{Cd+zFz6%e+KBeO?(o_c&!HaKI@b^yz-WH-o3ZyTJQF{BC*g z$4Ebycvf%ptgn$T&-o7cd+-Y&&(rsXzwaWH-(O9*3P?SLhHWLj4+tHbUFSQ={|swc zTyJT|$H{vdya)VcqAX3>m&yBI;QJso!t=mQU<8=J1$TpIz*oReLGOOr2pk1f*1Y&# zOL^Y=$on|>68JjMM;XIlD>x4B0q+Mt02dr!jDSPnQScG)*C2C{xeE;NAb1jd68ul_ z9q^CfqA~gi>;M+H54;^b4?Y3D0R9f#!de>FZw~caG7kR)2EmYdoLs{PmZ6uo%p>VaZ`B1E6Iy)`sJ$G zJlfl-bIhl`k$93Pl#(h)w3tP&v#Cp znwp&U51vdFMy+%qm0#uMy6T2o&)+rAvFn!IFpVy&Xw_faVXQQVowM^j>q^%$>Xu`B zX2s0c9oI^`i|SV1s1P;nx+*(6zaw|HyR^pX_qu9E-puw|b<E7OzH$3z9c~zrUv+N>8odwf5p0-l?Z4;MSX2Bv^bd6f+ zc&}41SEG|QzqS447hCt&7SvYNH#E{b?q!|w&J?kW%#2$tT3aiA*6E$77o2+CEF9^o z)+%O4eza1mKiasm#%txJ%~!r`0jUY6Qcx9`d!;iy>xK)jwCa3(@;4XBdoRqL?aHq! z95L+)NzVQJ#rgz+yZq$0YFejNtAGCD1?I`RX?xOs8XD47SP0=^pNR#9r+2=Jty-`TW@o4RPC*^6Xl}&et2NpN?KJ`#UbK*6gX8|1T#_54gIq++1V5 zv%4U@d+5p{l{PAtElK+Qot;L#w12L%*;!LR`?ALP)VEe?op!#o$Z(B3)yva8HOI2+ z2fC|NGoPxjnd&ks4O8x$shMYAX86;2|Go)5|5U*rE2r{@3n{DrL@Iyw(nhb9FXu~U zzI^u5F57hL)u`l^iIP{Uk`qzMD-tEIP$f=Oa!I1(5>-hWUuD&Z(-P%~_gmXfEJk}|fi5Guqy|~r#oypTLtIWxry(s?%^C`wtjf1a9;R(?P)tgJogrSoTdscxz^=v_HesTescFHKl<+?lhzX=jngn3j>$%$&zB z=IO<=bDeAIXD`fHb}GMzPAH~|E9+{cT-fD0joR6{Lp=~B*QxBDc(Ca2DRmed%-g)u zOjoUB+P5sSi;aq>URW{fGQeJ*KDmX#3;DV^T|fKsd5wxpj@lw8tO4yoI%_A!p~pORW-aa`G)`Iod4PU=~ac(zcw$Z=BswiKYB7r zmYyH4)@zp;mTN-djeOaxe{=h#U8*a3mZmDJoNm>7%XqDKR=cL#a$ScBx5=7k7mJmd zTIpNcQ|ZFBy0NBwHsxfP{HcsxzB|n-keXY%r!;Oo;P}q;ocyZthRQ?r*Eb$XO}w%C zSSCO5_?pi$VvDuM>lIS2D0EM}T(j={p1eyf@;7x%th2Y(`)97snBJ1BAhw>l_^PzC`07r#VJ>-j z`M9&h+3F5YrL3jy+Ai0q8f(*2S9hCk#h6**|A3XgR1bbozO7QXJIXgXtK7wn&T`$j zX+hmJL()@NoHCY9t)6ciQ;wBqky-73ztfzk2j4I3ER2*Mwo=YDUhuPp46{bgqgu6k z@H5>A{vj9qz0=j8RZ@({F0^VY&yF6_^REj_paHucK^BqbJt$9HowkXU)*4= zx?^>Ajgej#jQcDgYR|n`TloWcYIIX@z*c6 zU-+_{bq76vXgXC|>aA^ZO>3`CS*t3#d5g8zxph9hRClBrSL;lkA!pu{>ALVmxAsnCJGNam(>c))_h2hQxj{f=y&_N z9k*)K`xd!|qV3I}-;|o@v-YK&;h|H^i4apdDSm{ss7?WPftz{&D2g^Vc15_ zIAyrriH6~tXXdAkl)b^72QkVsA%p+y%$J-qD^ljemOi=FEqT4s^5g|WtT9EuekBxF znz-uCRC7+n3+nTj2e4aSu)FJ&Rj|&itNTYAr`J#EEOw_nztZV7YQd{gR`4$O%63Ma z!UYt<+DeaePhqROKyl_1^rja)-fiXKKK&2Rix|-0!*h(1>HluY|2@n9q!g6 zunVxl{{Ng8*Xe)GDZUy~RrV*Ve#P!AHtMI-sr+^?)2N+ZJ1=_QnYHudWym{|(T(eK zYrNntMtU*Gb+O_Yf!R?xSU)^<=gD~u!=9ASzo+?%d(vi5gAc0)AL_Ksdhm&iYX#rQ zSd}69-qSZU-*Q7&rBQ6`rJ;?b)>y_iZqak=Cbqg=rAFNd{-evJ`bWB9V2Y*S&3gX6 z;uH2FSrUaoqGhGSoJEX4VT40w!ODBNUd5HOCIr8mF}z$S$>4W;3`JFgleM#ZYtI|* z`>yaVbYA?d<6AZVfrfv2Mz4%kb8e?)OGH&OX@O2 zgP4Exg2_~6dn0Aoc5f_q1aIn5gVab>M%^oCYOXZ}wQ6`KvxuGzs=2`KFmA7&toeN} zS&wemhiQ++rI-s!`QZO{@ltaUWJ?L75%j`JYnd>^4Eh#S;pPI)2Frb~mc&fUgG0!!;5|k}?j~W<;9sEwpIOue_^^!p^ z${aK5`H(^Nf?uTEl~W82JDBM-3P$iPBy78@WK@jND-1VZGOWC<2$&cAs;eLa|CblI z>)jn*ftCtJT1|DLH*On$q)VD77`e=-(783ER57Nk;QuqWm+q@9FlOl4yx|tS;Lr5Z zrplg148NAzi*@L;rwZ}0W{eY2qnuejUv(o5CsS{XohZw68?)N6b<}dd{Geex8{zcEMd#p*Ax{a4J(Y&%844pD6k#Dyh7mD_l_9 zRNLSku6H-wvXu`$hnyvHwd$ldQ0w4dV6ux_8uRH;7&!UZ&xCXC>QcWoZZCv3HcNii z+pvpje4LrnO*Q!WZCxd+Yz6O3yAKE@Gv_)FIu+M98h&o3+pt+AZxh18fc=A9@XuzK z%yIf!5~;#LyQk<>c;}=dC6~ri^30{Q|)M&-3V;o|d{;ZK-@ z)dkF>sMDz4L@uy;k>YIAX?S{di&shfFk{pgl~A0hzH0wX^vpNG-SL%MYYUpvj2C<* zoVtNW&A)jeLck7hq@J5u0Evjk%$cjY;JfTIm&~UtsznE-!hUIvZ_SN*pMjCRBDUV4WfXE2jU&2(ekS+58J82I;)2P26G`yC_VT;;PWOs znJE4Ty6aZO^nwq_ptWFC4bO1TEb1|4n8DTk9fgYdYTMuCc00m&r}A4%3+N$7NikmV zS3M2aLvIs&qt~1;tb%LVXBPL?O{;3mm}UQyG7S8$H<#hB&o7u|t8A1TR`8E=%Qu$~ zJI5M@nT%yDDnsXM4S3Losb=t*YiY74x^opLUtXN5GGj}PGd;a9LTZh_+t`s?Y^JAK zL|y^wYN2fjUZ2WsvU}kQ4H>UzuIVxA>XZLZ@e6AvU;Nah|Mjx}&y_jGxak{~f704yJ%Y67f|e{yDZCf5j@PL>8n z<4_Vpbj&&~hc-pyL8l5NnelGYF zGhv`K$JlO-R&D3O2CLyTyg~5ed9DMAFImAOS1@zDDoaEt!<<kSL>2>?D}|A z-H^&ZZT80cgH++L7tP4iFNbX{B$R+>UfyN$nTwFsAg)4j6~XnMzMOdmb0ITOX2+*f z1y*(?(<%K;%}ABjxZQj-6X=h+)ME5mQT*83yoE_xe&qtS(uG{;N9UOJihrNuKjZq( zHS|d&R=2y_D3`2(Zr)V4cFv#Vo#B+fJZ1ZXPO56VT}9q3V>E{RMx5S;ZnaQ%hV^Op?GQweaSjU1L#-8F-C6!w>gAUHhpIIiP+zx)w$SLRP0DO9K2eXAX)!gFZP1p zpbD${C)`f>+*frXrEcml_f#ip{(x|5{=Llxx|e3Ak^ej*D#4$NZr=Zk`SH?hERuy% zsqo@e@Gnvm{qB`5EutDkJmWwvIK)7wChHL?tR;o#OuxV6?}LZR)q10c)KU`xjQgx# z2cz>~285&Vzt*IcU+ZMSJagf~BFk*V+y&DaryBhFLg$22aQ(i9f7{Gur(m=#+pMaV zIi6r*j;CttPR=X1#x&#bk#48zOgQgWQ+ptq+5?dK(srw}QZb+CWn8no*xp+*RV(X*-km@5vT&BbB1Y-UUCXJ5a;n^XcSrS@?LS%b z-`(hy_A0tg!GESV=9+?M^xT%JpRKXRhtrym;V!)}SbDCC+)?)E=4b>=qdo}rhDJpc zINnO#+&=MqA5^QULwMahrQt>{{(i4fnNYU{2Qv;bwU~jz{tU^_M=ul2JXE0GKj(TpPColT6pO0aJl1U-M(d{)44m7H5B7lQv~p3x@GbQaI7WZW<^{GF+Zw^^y& zjzVu_MD+aq(#1iRUV;|N4dWKZ3E=O1<;UtdD!P+jU@sg8Puu-9qCERM1R3qmqN^grI!y3Z=4W>M8Py z9ebZ}!NZQoqgC!Yb7QJ}O*J^3sT#pwrd+rqFKXR03%Yo^d6yc*zm2OC{Ibi$D+ygu`Ucw_23xqwC|gmXL@j{ZsW{N^W&i-!)DdQ z1{vGI>u8za`}12vth*?p+2e?eRrFns|G|i@{$R#6&MdsBnyqfJhrCl${@&?M`f(s* z7S`~Mm9@IDJ-5+G<(AuNbFs38ZcMxDIuZES>(xys1{MP|XH=$BPdzywvWo#@<+gD>= zlAkT%m&c4x-Or!d6!go@VH^P39H&H1tNq3VGvVrxaX_+6W_x7UPs6N5&W zB}yYv0GWlw_M?u!t9H>u_r!7QsI%W)>g{f77zT_L`F(|5=H7GkzPIRwwUzbGhBlS& zjj7r_^-fg29hFUXe{I<7R&wN#P=R_ws7~FOP`~WI+_V!c*zSqvCLXr@y%qoV>Kx;) z{A%+i`$nfHG-08Tjh@GHBsBfH-dHR>kg`^ycR}LoyMoD*c;fLil!@g#nRj$^zd z_MkK3?QC4;CP>-XRh@OWa%+FuPT4Eq-<-;=id&X}dQq)J&Rhzq}mLSLsYr?*+@>R77c|HPhWg_1Zf z1XJDtZ(IHL>37fg{inJ_uN925#1~dpJI!(_80O`{-VS*3t))HFcuv%3f&=O4U|-4( zMyPzSzZ>OCp%jc#BVUYC6O$%FTD>HUGB+cts8{Sj*4tAUo#;f^e^;t)boNTs?!*Iz=Pds z6UhQ2;~7CNw{^0=lCDs2yh7&f*kgmEM2=C<1G=-KG0Kl0Pfad&()OW+xLnZUa0ZRq z;&7<5-vZHxn^hSL>w8dM&|i-| zm@ciA-)^iz^ETrdU~UV?$Ik6)Tny-?o2$3m3t5I^F(FmHzDCW=9?Q1Tl>~)_rwqp~ znI$!jSVT%YY;@z0VeRpVoqo9T&bF~Tpy#)k4< zpIBvYIFT_Xg4;4?34OQY*ujysy(DGsY2bp?K9jDUMx1}Zw7obrd56>M7UapMdq?`@V*4(~@2mNvCw!Z!wA6BDyWlu; z%x#t9&LG5SFm$rn)pN3ri;j7`*N(h|cBX4Xv&e4B+2#%}G$*W+&=%2*?c0N6Kr6X7 z-55bcgS&O?{iVB0qt=6yK1{t?#Y9vPE8>iDfVgqan(SJ`Tvw^iY*9WKH!lzwF{G_k zy|5gUUEkLE5i@1wZ`qcb*yd%-TFSWE>{d4NTjw_`(Wh5%8=5MnCI(pQ%n8#h48R4{ z^8O*y|8_B5*_3IRn{{gm%=)}|rCr(^%Qeg9thv^>E_XxuM&`ItW^K=Hi)FAQT@$i^ z;M?Ochp+8|V>NHMuzH}n)>&S&-1VvI6xzV8^@a2zy{1Wc|w$!j=ixa{t74D*qg&k75eRzv!-!P*vW7v=3+>$^9iwV z27j25OR6RYgp&a}6X#h3b<53ImX|q&EdcM3;D!30hX3g)7A-5|)q(Xm{4w-D;)=#f;%T+9kT9*PnQ?>xBP3 z*Z-k6*LoB(=npsi#!RX*=+4QTW{|a3Oa^z^5OSt`+Ij0dgf;`S;FEe`YjK!C7d&3d zVTv;H>4N{fdhheSv7Iic>BTMX!I`v!QWW_i_`Ou&8YlRq{|VSvJNN`0@zlK782D7i zF8C)Redh_|_980IlySYAE(CAa-Q`oOrXNy7bS|j8y(6YYB40`(?7$Q5Zl(7mThZqy6V5VJ2Y>-8E41f=-l##@?pG(&e^O1BexF1b=@U~7*u10_EUZq73J`icq->&CY%FxeYxs3GZkI`eF znHQN%jr%*XMLmEiD)=Cr#s}$;5)1F~boDqUsk<;q zZNwxs9-E|C*)U11P$sEo#Ugd|9E;Qg$|4nf@?4A5o0=A>U^ui$-H|F?Qx1^4P*zuU zYejh#d_op8Sr#tow$j;4%b#CbKHXb6{n6@0&Y&~o*6a5+9-Lw6t)|_hU1G61ic6op z2EHP2nKwbYco^R)1`w_0KS|3ynK7-v%T((AX$(D9a2hWKzi9b?RLRU#56!hV*=yX} zy$bIQ4y6j4EPp1pxXk!0gM8vRz4?BG2s?Ox%hnToxU``>T#4AFLk!Ky;N77q=G~0? z-|xg46a0Qh;iUOi+wXT%`QVL?e^|}P*|wO&u`T9}Df60%;N$ZmgG})8ra=ZfRKb5k zZ_@_zhK%6^x6ic~+UuN6wQcnbYW;J76Vj~WE+H;VP)AUf-@~jwGqYvZWDH3&1Jr?t8Q_J8bT35@RqmK&tElURiPGdtS}R@^S}RVaF%wlE){V2}5Ygsiau8i6ZU80uXn4ZxK z!Ol`JSo3#X*{np2Ks&|P_b2UnMr~aB$byY20U&#mF)QQr4F;#dCPRB~09+<|OlHW&{*MotM@~}PX zPMxFXdRMv{z!xm_PFxXr=tK@WCm7ApOFLkRSYz&R4!U&MA1QTLzl_YOw7Wv7xNeM< zVZk5GE$t}{(PdLpuzMc-Km%t59?=k;^LxZ#eU6&_9hiNqI*#8#-M+RJa>K2^YL!HyDFSGYR^LKc6 zHQsuXYNi|4haC3}sR~LRd0VK9y0Ig_!5pluuCGDaSY5j{_J+S@Wx94#y0%VsvaB}- z^9Rksgp1$ijT1LJ%RA9MU6-z3Ek9Y3sw{OkF6~54Se8DyNmXle+POw)wXW6Dm51H! z?gsD9=~E{kl}6huZFTePwz?TU?8L3}?PA3|(QxWGfO=+8d@{{_)V{Kzy49X?!d6O| zOD5KL$bj0f{i6y9p;xgFIzPp(v7x~2!glNaCA zFCDRyj_8+;*f|(?#IBxDh+L9(SL3*OqHn&q4y$kM)AMWWtxHQ+EDsS~c`S>Y>@D+gki{jmkuTqZkqllpe%%_k{zcxUIOyDooy2y+WQYPGwWV4N-GwU* zdW+qL|LL6nLLL_j-1JP-QQ4TP zK;cciPbRvolV;(T1!QC6LZ4>b3`sCc;?pm4Sy+ospg`xN<+^D;(eRsAUx5(~-zE{dMSj0--W0-_Q8JdKEJRUbQK@Q0 zik?sldo8;&8q0KdQ+=>;O%HC1=#8$usz-DssMb^N(#Eo@*@_@mHuQrjcMT>MbHafq)myM<4-mlm?%!aSJD(wk3l>JujY}ymP%O=pF z{I9oj9>w2NrC(pgVG6(HqMc)3;PXGLbrjyKRX(UWs~UW?>3&Z8sMcn0cu%VKgrdSf zzBkoT*>3Nt9ga){s=2~}Z-i!tk7)_JgH*U(2S(i|UiiX=LRi-*F+v zi9U=Is~gMiYBx??m%jz`M1SSFE-_EMLr5l#@NCE53EFoSBuX=CE0;QZ@^6&aqlZ_bmu_6v`Yq^MY|(p8F>F(%H5Q z>0m}>G`nuWd%+x4mW}sn?WPSzL(;hMUQO>42L(-Cm79V#XG9xuv_Wl9^wQd#quNo^ z<9M)fcCu*t$UzG0*@N+}6iwfMSlbVkXCsTI5A{(#WZ0A|Mu+?NY9kGHvS|9=!`faX zkL=-a`--L?KByf=D63L7Ffg!sl@?xAVzo-F4olXk#F{X%RwdSkiFGQmE=;UfiS=P( zgGy`&6B|`xW0=^a5}U%rW|i2i5^GcqxJnJys2Xrp2~~qNVGY)(8gNx5ss?Mq8mv(@ z;HpYg4c3G;SfgsdRh6h3tO;wdM%93;l&n=X;Hna;25Z9_tW`DOs!CK1)`m4$t7^bi zm8cr54QsGg)qty%Y*M#zmBeQCXs(i2y((-{f-o7j)~cwvR)_6Hph{^3`AyUotHbI` zpi09JSRH-A>af-lgr!m6@uI%*{$Z`J&H~0BpKQPEO3DticB_(E1(Mg-%IlN8ODgZ+ ztjz2|Srs&$M<<`yAEwAZ80AOz4Qt^m4{A|HA9a$a$5jbayo)!f{OG?1Dd|` zWL*Qo^tkj5E*jaLC$&Lr=RBpk|Bbw8pQfhoaz0{=MYUUFbXx?1XX!I zH;iTbVK3a7?0%SkF_Nf~aaA&o2XK4Q9$JVN4}-YHhxhN)hMl7LAY}K;R$uw{aIdeX z56f;viX#g)$4p?=($qyC8Nh23lGyUzZ0m;u`ykxf0Yn7F>SR??KL~N;O`4l zXQH0np$v4IK8~QG$wk}YEng{B`;&2T96z{xh7g96!fsi~{DA7~v%#%geG+7a0v zoekqkLeccwq@}qenwr#@%8D9$Oq_XSf2B6S!?LZUKBg*12E&KF{&6j#=VRJo@yi#U zjvs_)6ll6V0k4Q-h&D2~BfDG0$SXH9_a<`LHxb@Bkhrs%xwn}s!j>8oW8o&daEQoQ z9yRLOu^C$gmhPSXrcGWmp^RcF%D!mGY>6eqGUk(DU@ta=opvCk<)1;o^D zXEGI4P`}N_xe;yHDWoL&fubt1QCiav?$!<>R>?keIypMhfDYm-c%Kr)3NkQdJ8mfT zM6(9kGK!O1Ot=bJNwtVUE+0`*IedXhtB8Ac+=*9(h{{V&NcMM*XgeF>7RT&}V)6K# zu~yz#?2SV9cq=;^@QTdzjc9$cVI|*uz`&Tsx1x+=21Kks^qg@Q?dMEpk7>=}+q#!Q z+BP`%k7)Zv+mam#cX#q-2ccE0mCbLNi?Sv1PJIJ;KO!d#`S^~>%J zYS>Q&vr$4V$r{6$UL0nq+Z=HuL3TB(cTG#gqy3g~7PVCOF)p)xQD(UQ!P4|4OZ#V+ z_On8b_Nk>a3ZoqctS!;hVvq_2^E9E{svTz+$fDl#1(Z=2iVE~eGg~!2lVO)QLbDii z&Gy+cy~K+i+A-rQA6cy#(fg2}MG?zTi6X4p;#|POtpe_l5g!dDO&{YKMqY~oeY|AA ztr-n!zDrCteIJgS$YWX@>bnqzWRrx}jOcsN@QNVB!5CQ;*LULTB{l)A8PNxr*5W1#^^PJP zhX(q_w0LnJpydiYHw#ngh8t&D&%$#(JQkm*w3VgrFS7kj3$OaY21Z>iyy|;P_?u|q zl`&?ATi&!*EIF8)n}}!unF*~?-6{#kRK)$sxQcIrlqdL=UF>;e`$szu!@Po^$v}iZ z)>JCo?c>FnQIZkLz726(=EIdAuXavGM`FBtLWEkkzVwuMiTi{-Yk!6wPD9x>)qL)G zvh;8PQ8ds);uVEKlwiKdZdvAw3Ka9$Y*Nt*Fyz(hXG zyE@uKEr#O%4I!A7q3<6!sO^^tPd^B|sx}!RqBpm7vcM@%OQBoQzCP`o9C25Il=M5u zF5#X4D0(ZdD{`9M8F}`Hq_&lmNm5mNgtB95laNf7_Kfh8<9=@c;5ZkWXDiQUq?st9 z_lw!2Vro&CjaWbGI5QQV_a(v0gNv5jeN>*ut1u|(%bF5Mmt z-<}R{7(H~(J)_~?`0OE(e_Fp6aWI*lmFyVhgwfN&mWb-R9NB%SMFFgfCys*fEn%ql&qRBrvioGSlr}33b&W}Y z9^Rm?ZP_ReS|nC92u;!-6J8;+t>;LS%AD$diMf)PQ?yR20}^A0v0D2t2bbK?B!uHe z(SxVr(h>87;R7O0F?nPJG*YC z;ThvdxDrV7#4W*KZC?=D+OA1$86b^XHe6}>N$bPtgl2B?p8oURLsP2dx2*>K|1UL= zAxaI@4`XcE8S(WIM7)t2pP+`n?eS&nbMQB*OG~rxFc*%#2U5taI;gH z+A;5R)RNO{Ap8=D8r48mQ*FHuEhD?tv;E}A@*$(~JXw5CbX&AIL8H!-rT3Y#butp| zVc^=6`XP~OM2u=ibXc1y5d`8;-|um}hKMXth{T0b2?I*2g6HO_CccfMda`@uYo(=| zsn)k19#a$ZT$P(*b(#~p&(6!f*lZ~Lg>>mWc??YKI&c#XTtj<#$j$QoZBL4GW7^Hw zi%EBDxWj-nIPeDo%4Clsib{#R&-Ob-m*RMvJR1_7|AK)}z9bT7W2rWlu+&USCPd$j z#;lnobxtOljTQ@Xsusya>kio{uiVVEU9Bjy!H7;WU@A1DEuns#*&MT02w$NV4Mj#G zS06E_vubu>Z%f8?S*u1-3CIXb#@nWus`7cJn&dZYPrOYcYn3q$+W1w*DO<*~BTO_4 z89$LgFrGj?T`VCPM-lxn+FK=NgbVGgB*s8T6X^ZO{^6*IBMI?yV7AmbkkqB@m|eO9 zfwRPd1>X;ei6YV@kx#SGoj8g(8M((HN5-*MW^<}H3*($~8TkRUS;)r4>_N1m$%Onw zoxUS9mQ5S#J6kaX%32=<9fhF%X4r&y zoo?PNOOnxuj!@DPQC6H8qm&&Mu_ub?2TeID2nDSQ&`_XGP?si6h*_MN8pLVo1bk&2 zPdY(GP(*p?n1*NM^dc<^(4nI)ve90sfKt(LP^toi)x(;x=DIbE zqZ5k;7Y1{+QAL3$S6ykeqfryH#i2gR#HVjj8s)^Ka%e}%)Sft!78HfGfj@lfkWY}9IY*(6k6(G{pYU*qii>4qtU6t zY0)37i|1r03O436gb5aopRN-+)}EvHO{qg2v^b+W6yFDjWcD$sWp!(%J5L(Ow5FrS<+^IRp1 zqSZ|iyj=oFQ0wL$O<7t*1X;Uct<9{0CY_JPhk^fIe1ORA!_iLqEe;vkC?pu!a&4`e z;aF_0n@vK)Ktu(DUuQ zt>U<;cl`HF&59KXykS$e5>4_RWr?g1b5(6}L(5$!QG=3E#+e3kEkWC4mYLf&p+icb z4nvnaU3HjN7Tu~Gp2eNBGFrDJM|tzMWF|d?)?8hL9d<XEGvh`XE+o7%xI7PUBBLa>n<&QW(#Wfm9FVfZqa0Xi&dEZVChE}a&Kj!F zc!_5bKTl>mQ!35aQidcH&!M8ENncs98sP~KS~H^Wq(=DnYjJpvN>ugIY_(|OnRr>q zu}9)mp+dM#hvTr|G*v*d5uT?-P8>_{Ragz%wfQ?mvQv@}=Bqd&OR&M}Z$Sp4h!BQ& zlXxP^pn#vw?hVgRW6&g1ise-tF5}B(K!CPI32{2KqclGdC5JwO(54Pa(^@e_qetWj zz_wMRJxARHbIc()F>fqUEJpHo3goy#*pWzMe7-y!2TB~)m>t4&Tf|(ONKnK+TdWW! z`EiWQMs@(($dJ(>yA{_*J+NrEuSSgFC^1|TQ0`vqgS%RsmAZJx# zstUu{c14rGO){2ac`UMJos+Kw{zwNh(5{ha=EBmua+)g~eHiLTn7~4ck#n_VT<SDb$7XDlBXULSfw@%JQ58fsJ^?7I5{& zp%uLCbC2;-wXr)prp~I?&dHET;i0vOEu3vBWFK@~h(2;{nwsOKLfM_06qNFYEob;e zU(udo;EUH70&93M2s5BP9SJXEa)Mx!o`hmcdmalySsI9D3PUm=<}t4qINA=GvH3RKi~<3WRYQIgG^N`S|Q}=;wLZ?sM*88J9tMlhZCXsj)YN1smPKHt@BP|>e;k;`)J1IjN zV&8vJ+8ou&J3@`jW~@2uj~F;DI*@f0FbLtt)VWr$kj-dwhVAlmw6!qKRC#5%@KxLQQFJfa<;!$wZytZ_-ygpOL*j`Ds&<>NsNI zd8RH{;2`qYLU(fTW42GUm{|%CaP$$D19dtVf|1r2ALga$Ly)M*kf-TvNQmut!Y9Vx zpdSezuNSf0Mb0JzxDLu6y+J!><4qHvzy-V8XgSD6YFIKWE#KZ&CJRyQg4xQ%dnne~ zhW2rO?L_{0amGGdhC#dc+dear)kUslu|%BShszY=M>C^iq#)x;&J&4fucrPW4Btxq zlrLDEud_WP@(GOG7LSE)V&bRUbUHl7&tU4?vTEmbMC=$~9EM&aS{~<-3hg$*$dhxv zh&r>Z*J96#tm&v;sWpD-?MmZO4}TNfU)wavqxd<=p21nU>eaS7uxnOH{E=->850Mt zrd<#c$?w};gvdjA6?aoeG-T0^nk>#3wc?pG>$QCOxn9?kMddIqy|qiuccNFEthkW3 za?Z)n`c6F`)^FFs_Ko6+WQE#NxN(W=Te)oHqu?K+%vVh7-gc6%)9W%ZIjNF&lWqn2wwwvl31 zo7A|O{-7b%1;J~Ym6@L(SMT*I&qDopR{b+;s zR2LPc3sg!%{g}b$s0)LMu|%KYm_-+(B821RNL3oJ46|6x1c0!yF=2+8`OMCJR+Q5i410yH2AqW z$*1f<>gLR}w!5ONN+)mFC5PH2pAu#yZ&6byQJZi|B{Q%maUNdsDTt;j zEBPEX?Gir=r(q(4i8kw#qMYPY_Q@n{eUK!R#WDjE#cDz(();0oW+juqW;P~DWfmur z2RIclS*+5@8xC@mVDbi)PTmmC=ENhT>D`u7*CP zQMe~rMj}x(j3;;Ncamx=oK@OhJW@>lR#J)Ij^NMUda$J03TKthDu(7Iek&BPl{}a% zBatW?^O9wfN)(QxqH8@xQnO`yw6Zw9XO9}Qh5S~*IX}UEK1(AYCUt$X-ei50q@y1a z{x)WQ;18p%bm38q@GYpk;b9}@h{at~n^l_WcppwPL$($-&wXT5elFx&`|+EEPrmIo zICypV8+_F6q;w$LS(=Bd!$YmX?!-^B{kZXw?2D3pa&iM{wtWG6KAri>s-cFHeEGE!;q!N%^I5(CeWgZceR`_h<@@m_ABKp2CTGl zt=TnFux%<%9uL?sjz@vSb=g>Ib6{ksYY`3i@L4m$=j6_3&uv3bVG&5jz;YsiSIBTN3>(n-b8(57|{>?ok%L0eq@h!gtJ4`*pb=~ zt zXKBiIJNvbr@y4}%L}Lj;>br*u?_nn+dlZM}mK|mEZMsN>>PMvP zXuFGYm~Fbt;$RkSF%mW8q?_2b!=9c-Vq=d`86h=_Y855MHW?D^FN%7F1b*^KA9*G! z;zYEWNAzT*FiT4WzO~xi+A?h5XUau{in>jV$qea4=hvc`q?M0Ja~BMTDW1^W&$GXX zP!esT5jSCF1CU5)P=ba>JCN901VML`xE7;o*vB#(5n;m^n-lxya({$;jt1TyN!SH7*~&c*PbkU4;G%(W=dB2hfhSd^6;nshVHXbJ1Dt>g>k2m;HY*cZjMKvJ~x zTl`1L%sDO$5*G$q8awu*5sPfx`pO>vECkq(moG6xn=6R$RDn`d=>MBo}$f9k#q0r;TvYZ}S=_liq*FPM$oesf&K> z)TvV~ZiBVlk_BCSjr2XNAey?+lC)UMOH3Twn%-^V$JTUa+h1X`Y0Ite;3Rd5J?q2| zk1uNa<>_Pmkx1xA%P6QkB3M&n7ea^@%Z0k=`*72uGZNlfvPhuOn(cyxGnH3wc~TAF z8q#zS*(+?2y6Ah89!(-O2x}CZ{PFB#P|fzXiu|W2L#a5+qj-d#^W(^a zi++XfCcO8?jVUnzMviIvu_~OLy6DF|D7d;PuOjBJa<>~P7g1l!MaD=@4YgVBWa1sd z?M)XEambkEm634C5X*4OzoLf7zE>`&c_Dx5qTC*6{B~~#{^mn#cEo-(sHvkm&2m;4 z$)Qv7xcOLqet4XW@kEZCH*#P=9XoQM zVdcxXSDR}0Kd*+3qL!?m9EK6yC1Klbg9G8u36lBy=sOPGR6mXG;5?1CTPk@>i1Ib~ zFxoF=N36?DNWK<+-!FaFE<{r3QF!LgK&2t45ypZkqOmdq!IX@Myd!@Gia1i}!AS5F ze{t2aTo4lnrI~1@W@|CsIoV66siB|yVuMsD&5n+W6e#C$@UZ?}k#v;Z)pv}!!9~Q_ ztP-xX$EIZoW+xF+GnPya2MT5PzHzN@TsQ!IAd;c6SL|3>cEMVvYi9|RR;9N~x$ zvGYMjn=w6yP^M0N8Z|gSgY&Pj>XuCov=B?OZZkeb8fJpi`kIb~OVkhITRzHeWTtJN zW1i%69(9IF%h)->r0Ron#oU5BD&~ESi87Y5R?dg62y73r_as73>+OPDwHNX zT$E`YN7)#UEp1jZQ5?E8w93PD;)WrimeJg7-ImNv+#I<)$SjaQo`Tk-^^6$rwzx> z>@>rADm<@N)Nkrh=jQ054dx|LpuC6G5xDV~Q0h3fEg&Y~5Xo3kn22H>meVGqGbf?h z9K<0yI0@oII--|^x$!eNwZs)?W;qpCOU5xFhlc=jaD_Sr7#VI>hA1XN?xv|VJU0h= z(A?z*`D?|)0=4L>>~Q&wj>e2n$b`PgvBV<7s!&(VQOF{(fGbXk1OG%SL6I;!9o+zJ znRQDtN1t+<-aC*s*)@vxO!kAKvJVg1g=8Zrh_)UbI(Dl6zl}4O7gvM1j zc40Yy8f}`0A)#q{{Jud@)3P(b2aH69Vg8^cq>UEk9aig9Zpd;CUW|r4J3c-}`=`)h z$#{-1O#(6kfwDaU5;Pneba5XGD&a1KGry#Jo4#%ZUO5@hI6bpN!wSea1{zne0x%zp zyViFiX2~E3d_1Imaghvlu&}FgW!#(>gTQ0KsF00#4pTx>xCo~frSjXZ-Ax)XY0SC1 z0QM!DKwizc&Nw#5wUli@~ literal 0 HcmV?d00001 diff --git a/go/mysql/icuregex/internal/icudata/ubidi.icu b/go/mysql/icuregex/internal/icudata/ubidi.icu new file mode 100644 index 0000000000000000000000000000000000000000..bc85f3d35020cc5084ad875edb6b833200db043d GIT binary patch literal 26636 zcmeHQd5l%n89#6Cxy!q69t;daQK2(DP+^!+q3XDnL0r&^U@IR3ND~AM6E?@ZENaM+h}84h+5O2B~)>VrP}kI?|$zr?{4osm&D9H zx%b-w4 z1vb50Kv2T*ftD3)&cH<{F}9=pSTIspHKwcOa_1`NTIYIagLA+0u=BVx?A#i=Kj}Q@ zyy(2F$~Cx$a8$V)t^PzskK<=-c^_@B{y0XPwxGotxa-B)!w!B!d)vL8-Yedl-Y$>(f&YTr=1=mw{9$K?KOnLXcnA6Y{sMoozr|`NdJK3sN&YtfZt11hMP85iPxw#!5BRV6 zZ~EK)_xxQx4}#$NC`MZ_F#zP`K=&PXrblv5M7KHn2ZsgyLRuj4D~mqls5(|rERl2;4PE)j@coOYRnjW&e%J~ZXWyL zSl$NOBjs_`n9JK9Zrjy%*tm9vd<%f%p zRrR{ZbHyiP^e+nCYmxrV;=haU3jTxQC*l~memExV2;2Fj5Ng`aI|bcS$lc)_sp*qP zu=NCUg?`9A&po=T6Yr736Qp!%-wl z^Go4(d$>NlPsk5iDC*v@^Qg2u6aG0uyb!)BxYuLxKQ~r$NBCAOO2y8@eb`itPlUHy z%9AGzvzLfcl2O{5jzxJY?j8-rs+77*`$_>|I z_|jr=Ods2vQ%l>NGfHQdF37~DM~b)pp!8!aZrV!z#KOkMtVgR~_1BbU^Xp32m3|>; zeFNXife*Z&gZlyv^ZnhgY9bGKca~sHS9+n2u}%EmqQ(J{maLyYz083zp4lT{J_}e;9Bqp(`@)X6kcEuP z0bDpcd?#cM^W^S{Uny&c2e`#ze-=C%*}@Zr=X2oQod5Vnl7Ff`s;BAwRydP`USjvT z>;`ru917VcDT57G0Q!Xe5RJ&pn{PER@&hct?& zdm?Tl`(g*x57pS4u0gx5vC*HMoSfC{-Dg)pd}OQ(I>TAvGh9cAjq%m7AT*!n5RXVy z2h6xE?=od^UoXq+!(0iH%IvP7ED&Y3Q0#-zSSTp&3k8LtETpk%Z-gZMpp+?3HdWOt zPCPauMmdUvHI%D0QYNHSVH!0(nv(`)Rkhg`YP|z%S?uQQU9ldz2D%Tj(t=;@t8h!y zVOlMqN#ib=)$R<<7;7hqPz!|*?r_vAOy(hfTr`L|CMI2Fx5La88j%B@qV0BRVNl>rada#SZupwWk_Sm*RiaeVyi zPhRat0fZelJHFD#*~zL?h|u`fjBngL>`L;-&zc9kS2j14F+-NO=~*l3{=@^N`ia(h z>Y8jk#zdS#>z&l&#qqjVt=;YsL{_5~pHD+QR?%!fyFbZQ+DJQ6nw;#LY^jY$cU++G zxlC;og{ZRB&tcbD$^u+kzw%e3&rF|}nJZhuj3Wk6my)YBToQzIZ+o`bue?_X zdZP~=sSNwtAUjdQrO~+f;2*zHfBwL_`UEQOb&9+jTFj&_>uYqV$f`B0!@@M%l)ha#}*r2Z&o$3d#yOr7aL=OxU)+0E=>V^V!J(0t7qX2k$Bc7cv~)?pOV z5!IuSPN!hc&aZpK@p_gt-mE<8*B*f z2|W3&t@i62IWp`z$@5URy9ksf;aAnvEQwe zC+0*cY@;Y-)vHlFp=_$Wh(m-a*?gWk~2?hiH*c6{E1q21uxxY)}#Ix`yz0MjB_Okq7hIJpgZ87bj!Q zWKlU`KK1|<=&)L|W{N;m_lH^8u~c=4q}YI?7&^86;dgY{ZGDnlTSKBZoqP4u+|`IG z)2@yDneB{pkIJHy!S*LPtvbR;O6w3u?afUpr#f8;E3&j}HWI8YM z=A;-%p#q4nDlzSZ8j={Q0;?rtT;>bK1Bd?!H_3VFcJRSikaI|8$Ju~Pb0JC@hyq?8 zfFnQD<2-;@(eN4q9aW-GfE$uS*FjvBi!4+X$fVP&p$ed)tU+DJI4UA=lt;w|jgk<7 zp1L-Rhnh@`9+xV{$^fV1siseZVik5IHE-LJB&)eV#35^L%38z0^`0M%$of6345B`@ z={C~hyBm^_>NwrsMCFsyEJ>LO z5w{Zb-V`vp9b%~sBNwZEK}+?%L_Gxx7+ofv5l=-<=b$>Xk~BP4nS4z1O|xOdGz8g_ zbAu&mBd;WTygrW?V&eU6Eu9JKnF`V*twQpQ?AS`CPufJ`b94R;Dp6NXlBfL}XP$4? zmT7VAdh|Xd@wShW%_?nwh0Ee>Pn5H~lpX7!D4+*Eej^kiY#QfP8>?0m=Z9wiW&}Is zLaZs7{&BXXoN3pe85JaqqGBno7OOP1&uKVyJ!hgnJrT#NO!ac`)Jj2nvLr5gx(@o6 zjwq)-OASx-Ci`P-JO`4GbjQ=F-ic&SwCH|uJUnMC{*;tFMt1F$44*xg-y6yI6@Y^~ zjk*ga&o7Twexd(&KzH2zk;AdP%FT zz^8YqKZ=s4-i>75%vvkW6ZOuE)_9HNakXK+K@V2ckT2+$=bUBw-x*X_x8l@1bxzrJ z&OC!_B^{fOr<{NW-rmR#7R4iHW7Fi?$m!@jxd;Q1^2S${(~R^wR1}I$R7Yj=o(M8$1@QS zL62%$8UErb-`&&oS4(Z5blLWY&lyR-aH~jgW|~9U(m2(jG?cO_yy}dy+8m*6y(z<| zOX{*|$(HDujBI2Y0_&_rP;{a+AEMwvd{T_ssH7oJlG7-h6^K`##nbTdwh;cTbk40F zEQO5sQJ*3k=crftDAkjOCJfQ|GcaCDJ~mbP?DIldkh48)*~Di>12k-LUb@!Kk}OlC zWCU2?-_NEGii5Nk3!hP%8D{*OGpbSflRt5x1+KR=jD43TnfgU?It6qVBHbQ;MJXSu zCygp}o_k9kOxCRQjKrH2l9s4xjgPNm$d9OQ1D^D0lyJ7ME~oj&+p=?0Ui8OoI&a#+ zQoF_H*0rXOZi$OW=q#LRPm+>*KzWjA-1(k_&8vs+W$6{ZQ3??tPP4aq2pyF!-dWkoyDD3qsg)6@QrY56uWaG-D_t!|Rr(9ZSGo%8DvaM&>F0M;R`cIf zmhttK5x%*y+IgzN3hz|5w!B+m&dy4g^M0k@`Jl4Q`KZ$0^54p`mR*(AE&r>Gw0vCI z((=;YtmTH8;vMMBE$o(=tJ&t6%L?z!>}RiZuP(gay`}I*_ekNN-CGOWySK8BXDwsB z%yD|0UWa#cUM`nCud{R7w5d~P%qSE}rM|vCR-Dp4ankN>J-xj&r4-7ih;ZU(i=Lid z9&I8hY#wc$v`v$F3gYi;ifzi2(q5(Z_V$w8CW>u$u^rS=?jAR8Oi#4+?b|2H*k>Qs zom7I?sVvi`if_$2l#E;pb6Z<|f9&3K#?P9y-|Pur*=>*MVKBL?@>PG&nQp6Z;|=+M z{l8Y7mFU#8LP3--DxNe+6gyFLu3VnhS$3ShzTTdm0}m`uo7Qgle+h-9-(L2;)6Y0< z`FFm1W<#?RXWfkAl!=pidpmm$X5Q4&lFmXq6J74?J^9pA1TB{er|0x{{F++ zH`z(zzf+mF=v(vW&p-0(i;p_`_(Km}u;3fV9((L@One6{w`1m>aN-dS6*KDeCGNlq ToLfYy6FrNu73?fA*jfJvVw-2P literal 0 HcmV?d00001 diff --git a/go/mysql/icuregex/internal/icudata/ucase.icu b/go/mysql/icuregex/internal/icudata/ucase.icu new file mode 100644 index 0000000000000000000000000000000000000000..011e6053f79610d269ecf36ce21c97b189d6e235 GIT binary patch literal 28898 zcmeHQ33wDmzOPC5%=Bain3+Z;FwhBt8qi%p-6h}^HnK)t2||PbYS4g=!s=Vu^^41U z-bZxzY2qpdPZZFI2dJn)T0!GIbB(!ZcsO>o7FAqc6FP&Tm4S`QT2Pfd!ycJg?f><#9Qhe>^3w-XxSm52EJ`A4ZPXI6Xu7Dr$)yf(k_hs)!j^Flv z0KCP!4RDvY)60CCkFs|2?dR+38|XXCcZBaKE)mtFhZWpvyl=ekN|J%I1^2?EG{SSYE=F_@svvKFnR`%EW zX#=%tWe^7SVZaw-ebV=1yd$`6Ifs=Azx4L|H2YRy-wNzofqg5mZw3B8tiafx^o{pn z%DZ*~;-9J2X)}19S=wyvD!i*3v_@@?cDHt~_Mo=Jtl(*F73g)^D}bfmH?()P%MtHm zZJYKbB!AQXsrdrcN+=Kx^a>mjC^8C6h20i++u7~V?lZbS)O~CB0X^z^ z%GTlb{hgEhMD-;Jy|3cU^&7L+PJQ(p;SMgwxNEs9$Qz zuMPY;(8#%40?mQDO?l?HmiGR@BAd_SHrlcw>Y2dGz}mn|(7&F><{eZ1fk{;>$9lI0 zwg;ZYPxihF9EBb4yTCu8RSTDU1BH=7io8FEL=9K^LB7I!P~l;wtXkO`IFfTm7anhu zRx6d*dC04#a53Kvr*iI`!V4T$vl86w!mBv-r^3G^By$Rv`tB~guW%9g#}YD%=j^Rq zb6j9`!eVXVE1)*I+PAw(o$<19AM90(PvO0zaCQ!F7JD6XZKRBT=C+&Hv1hNUk?{!s zn0Hme+aK%^+%G62^))FevGBm)VZmX+;lbmACj_g4rNN28+Tgjt3xb1!q&Wnd!NG%q z7ejMt@Y>*w!CQk@1g{S^1)GCc2j^voD>BU1T7t8+#lfe7E3?{KHRVpN4Ze~TcViN7 zkM*kU3A4+)PH#rGEy3-<-5IiWJ-WMRNpKH3%d!Uw-_E_B@v+|7`20HC{JbhS3ALQ6 zaGytR(j%24=_{O{u+g_mR&#RcPjrWvLcx1jri zBpw!t$@}7Z^%=EO9ps(tZT7DBe(x>wnLdf=$$aBG$r3j{?%U?;4AmGw*vk_$=KU~=4iql{?7e`@AvBavO;jdxTNY9xuoWwrlJ_933(@p^d0C{KI(E$w z@_8eDREJHJIyA+2CXFVev@japgAomn2ok&`xIWlBgnbx;`rOc_5M#Z&5j}~+xg18m zXym=UTV!WTciL?`I4tc^-=nQZWM?2OcHqizL%1#6$+@x!$?7?beAtK{Ra+w+kplcq zM*%B~#-ouR`KP6g~6y=iI&* zkX&emm0g!iGg#6fSdpNexqmv&zFPLEwC~7nmrL3{Pcb)9Z@PHWFIVgyCwZl0W~-Vy zeg$adGPmp&ss2_LqNqYCp{%r8#M6}{oKFUdm5;q!Z8EFKO)Et;2uL#GoP%op-)!=J za^inw1!CS3()=m6Y=X*v-DNzLud3iAEip;E*65n$T2q~^0?O#@D!=05noCI7>cYN` z`;pZ07uTq3Gmu`izSc~fSDkdCNeils@~6*Cn)EG`F1?r3$STBVUbjq^h4e{>-MrJ; z+T|6T>8iJkWcQaeVOhT_4Krft>3pBsUC2Uo`EvrHm3i#8?zEM|U7swK`SSI%#}U=h z8pvpNEz!yKr_YvNlN{5Xrz*PI()%XY@18;WOYClyUTXIfD=AkrIX~?y?#&q^ot4vf zZ`WKzOJ5V|lPj_2=d8t@6SERN^wg#1BAt-enNN^=EW6Y#E=5XhB(uj)wa6|>tvZ`e z9nYG&dg=TqgUScZf;vg4i~7~XTS=U5+_KL*Xh(gAzlDT0k#2R9)v5r_#-^#xAeOvmr z_YL$b>lg3W+^?-)XTMUT#-OSjj8>xqR9Blg8}%;E+(#5!oif!FHx#!PcN7EkoSzhsWv2W+7MTVA)3U*RU~9?NXBr}xykHt-SnAcl&dSP zE-xoBBkAIv(^&_dcJs?2e;Od4L76QZro3{+Wu25+)Z^3$nT$v}ao<~zbmVPsPA}3} z_OzqRPSQHWB!2H2`CFCCxrmrlO|2li2&qbI21I3QvJSE#x|Yw8s>rR$xd?0W6A^1O z%tGdrn0#qn^KH_fFtz%UmXh|8K>xD-4gK5u2M#Pd&>8W(1G~~GhVXMnOXb9>(;a4Y zl1Z)tE|L~l#&FJ^?5HJQJLk6``E%Cy8eOLb+P-xWQPS7pWeo=kZ41XTtBfJBlxM@~ zGkI8BB9pW!F~3`~qKfO28|1sGDv~Er_AYp7ZP%pI>3{C*{C+N--L?Zu2c*;G zDvuB77!V)WF))5m$3e7Ky$??JU1M%FQQipt4XV9Ie{goYj_zx_mUz3KXuF>1eDrSd z;|E974vgkQB5Gx61e-HylItbskfH zYzMN%kLx&&ug~V_=G)6v78oUV!J1L+qbh&JSlh2EE23|T{(Xhek$?R7qR~`q$LRR4 zx&Maf2Dw+0zUYLu6Pha6aWYm6w*}Y0(zN@GWur=1s3kXsn$hnab+Q4QL0Tok8vd zVwWIJ)+i}qMV7)wT4v8vV44WZ!@t)o;Jcsrj+4Iqkyb)N(B6ofbc(Q=d&;I$gkD67 zcBfdf35xa&XAF_Agz=9F+&HpZ`lQEEEpw0jJ#!b4X$MN5Bbz4LEqjlAoxNwvW7p^Z z-JqKX_RYQec2InC-FwW4F_XsBk7*ube}gO=vuVtpF}j!FR#|Inr!A>FCH5P49ZBhF+`H};TRz|7d@T0?do6a_ErOej zqRLoq`rKq9LT6|9(ce`rzhDhg|O-{Q%#mv^S z*~lxjH#k$R^n7O0OP!a$9puw&_uAvS=AvHm?v3Bdv!3usuk`swjGZ+0lCg8gj)2Fq zv75&38Qc3*nLioRx{@sV`G`{|nO50W+pZ^X`W+;inX}1zu+9`qi7CiH(9Er7#js>< zRGq`i&B9&gcHbt;+&xKZuhf1X5<5TC1GP>M_{$wr)_wwW_!0A=AyO#gx@w(vxmgJx=l$)BlM7dui;=^|x^|X0 z+H>!EWJiA8%Ir?%T8;G1FS}(e{pC|(y$^&&<)`8b%PdEVc5Lg)Sd{o#V2(j-X3^rG zqT2!{k&zF7ik=kI#>#unCdz}%bw?2)Y2`&e!o*SUT65bQyBbU0V=6C2)+9JlZ`W$H zysT=iTqKjZEX&MPelxVPO3Lk|M^%uXnt+*W5O$V__*Tj8V?~j93FkLgD~3HX(Y48f zw7j@>5(+cPRiyQzpALxG)n%}8Q)P7dporBU_xu1J-mv(KPXkPYJ&Uam`ta63q zdZ%in1>t;~+@qwgjFlY6t$u0i^iB38-fronN#_#aNL;MSQXP(r5mn*E3V2PLm@!hHPbz7}${xnzVpF4K? zWY3Xo;q=d(*J+y^DVZ0sl6{hYvQI1J-ja8^%)E?|$x7NtOz*kW<}OXO6A~+*d4P@)fB0% z&`#o-3T+(M__XE99ZI?O3An)}tMI=?6!*V~UBt%mei6H?UxoG}*M(2;u&NDfk)@Ii z^;Bq;pyJ4QnO59?dBJbl@`6+N@FC|o7SPssUi5SWd;vJ55e#T;o^_D5dDa4lG=c%` zWzS2Hz3f>J9MT9<#Pe84IWaK;+Si`lu=(1v3pk_^3~Pf2g|$O4j>RxAVi^4}#w-YH zCt)0dQGxMYX;>=<{s-`97{3BNXhFEwe!$TM?A+1ATa{+T!&VqH`yq$7Dw^Vf#DW`* za_uo#yoz;N3j8hb(@Ft*P5A~?E2!g@6|h+b-A+iB124rjSOxqR_|?F>lqy^o4|@*$ z8t`udw*eo-*L=v~FY@C1Kv9|$li@gKb9BT)h8`}hUdu%h{9kF)- z?~J_%cvtLwz`J7~0NxY(5O7}XBfxuO9|PVO+X6U0wiR$e>=VF+u}=Z-k9`LCKjmSE{p8}d^YwEz!kB70mH)OYGW zZI`|aaJRl2@N4~Rz;E<#0Ke701stYERUfWiPrzf;et_lpizK6Q-TGs-uZ~ITs+bQ~ za1iuot7ie8wD81*;n1pJSW~YLYoB%xYs0TE*QWOKu-may)N)ME@40NKazbJdKZ|L~ z_{1RoVpgb3NDSiZLDwV(@l)A~g7U=hA%1PYn?fu57Gr0+h>`w9MOPLi#$_5kKf1!W zL_#U`aH)h+I*Lmryh+noaRD>%>>JIB;5^_0CRYHi(Bu?wL6cL#g-p&1uA9mEz;!n{ zKe!&mg&g@fXXfLanU8a3KF*o>IA`YLoSBbvW!Tr|c&H;A;aXh=3k8@@|&YAf*XXfLa znU8a3KF*o>I4ASb9e*KmFGKES#__mA1Grz8;I9ic;!Q=`hd^Hg`WoeU&M#3$K_>>? z^Td<&DA*80jF3IXXuQYhEhxNaA=i53T5slh3f^-PxgG)C0JnX&WlNgkXc(SHk z1Z6HOhIq>|=rugG0;YTW-o)qA%A--lQQ(C?aT=Es6i+tAqqnGpB5LZ}buo{+8oS24 z#1Oj*?FeGXc*IeB0m*7MR%Z`ZXOFo$ZJ2Khsv+$L&^Lmc|*Xq0{?%Sc_5>T3PI2C$Xx~Xy6xFIE0f-2s>^FrGuK7*XIK+&=&wM)E5HYuip>&0RBg6pVp$c06wTc z2)IaJ1o)8t5a7f5!+?+Ij{rWZKMJ^5UktcJUjq1;{uton`s094=uZGH)t3T3sXqz$ zl>QXp)B4kZt$HiqGJP4~Gx{@t%k|}e&+5+tuFzKiuGCiouF_WluGUurKBqqi_`Lo+ z;0yW-fG_GV070QP zzY6%8{u_@(|O;8*%rI^C<&*>pCV_v!qtIvpqa(Kz3a1{V4m zpijn_0e;BgWKR~thG;1zJ7GB;w(@htEPN6ek1>n+zE72Tzo#6tynpakct>OW!7KP_ zppV3umf+6^Jrd)5&i@zePJ#U?ru|Ilj>eejt;Fv&Df}Z5ogWyR+mE5*=_L7HP;sUt zs2NPu@CUc=pxJuzrpyu?|VAk zFg6Dsbgsc)mjgv78vK3{DEf4Se~kfO+NBwL!eET{P{v*WMLRQNPbR2VRL{b_IcxF>_fEEy$%^s9_1!wBbSX*-sUp$eG3%rbBt{TMLQu=K2A^` zWwElwAlWG8GcF^$DoH7)CMb_GK0%FAPU93s{uERjukUXOs#W=pQ&ivA#t#PVg^Yaz z8J*l2yB1>>t}A0bvCgw_MH$P-S2BUye zR~XkD3a73#ZZK3%U1j{q@N(*E(ZTLBLt#PA4=Mu(lGHx^IOu|^B(QMFp z1i#U0%r)pN!r0Bm?FOAg82gKHhe2l$#%?k0H0b<+d&9WPptA?AxpB8a=K#j;F&;4J z48Yhtqs5^2KV$bA4;u9DXY4*>kwNc$#^xIj8T8I)Y=QBxLGOFU78;Kj^sZ;@e&bPt Y-t>$;hE=2YF#cX4D0=5Gwj0#{01+=r-2eap literal 0 HcmV?d00001 diff --git a/go/mysql/icuregex/internal/icudata/ulayout.icu b/go/mysql/icuregex/internal/icudata/ulayout.icu new file mode 100644 index 0000000000000000000000000000000000000000..598d347cc1e5376fdf8b799223506fe87d5c4f60 GIT binary patch literal 13488 zcmcgy50G6|dH>G2f8YPjF0gUg>@IJ$ASPjpkrtYUh{zO;K`BL18Zkq0(xy=(*3!}# zF%?lJO_f0^kzpWpB4$#iFo+=_Q&T1-L6|~qOZa1fV%9{VHnols(%<);Klj|XZ{Kc+ z_IrEpIp;gy-`{u6z4zUHWAx<7#}SjJWgoigBMQBSjLoTzW@$f8ZzS5ZXzC$S(Rj;9 z6TNxW+KbMi(P&3agS83AQa5!>?Rt8Uj?md^oq7Prt7?f}r+4aK=*7{WM@#g5(c$RK z_)p{Q@pExIIXzjMd?k4@8A{)q{;S@v58-G1&yO#O zS0Ve)#W%%Ufx!ducjIS(#7|_{F4PCrb3o>bGJ z^wjjM^c}$L!|5l}Yf~=Q_36!M??}Iu?oPj#z9_uemhljz`Ouj;Sg)Q9XQyI3Go6*a zqeJff*(%HVxNSd`ZOk@zND*xl&b`@gw2x#9l260g(^l$0b{Ok`=x~g0h_0=C zxw1!a-mA>~mdf2$Vprt}FIzbK7`*_l$@itok;(M+3=lAI^=&N); zJ&n79rh`;fZ&c?iO{;Jm+vhs9wX2QY!riY@J+9xXKd3*YzpB4E<6i2_@RvQ#^L)zn zWp+ODOi>nR3Irp022ycGIgiYaH+I7QRLjtt%A6g?yC;fcq9iFU4S~&G?sIz0*%Mj! z94lXHku-Wm2wiiKYjPlC(1UJ+fL=G1>B48oVeQ!9LkU5E58EcGJO3Pwr|J9~0ef-w z(nr(DG824^;w4E^mo%|_#T9$dHuh$1#tZg@mBBF3;EHsVr1FYV6_7-hL7qIuYNu;88`z!DucF}gJdxQ7BVW`Aj*4=T=8r5 z1gjet2D2Oxi$PXwv5j118SgxC&yirvq|6?cJ};dNe;^x!@8q~!8E|98J{e9pcZ(0! z;a(7s6H?~ZnPF==uYRSXN>$=)l;&8PutmiU)Et1UglN#P3H0K)#xaB7&R2}JQ>n0t zEYDIytR+&=_YT>X*t15JiBse_cSlO**#zTPCd0MoK#td&<)SAr@vH!)6vrHCc@z!| zuw|8FDvQ0cjgX~EC;|%RK%v>p^Ja#M*+p**#xTT*GG<(2fJzJv<>t#82W2E-#(EQ* zj)_*{VL>L(m!}u5hK~`mNGZn_Wcywa&dr04SU{bv5r%`}P>u@-M|du% z5+7l;TxI1+j)ff=_Y$1*EDI~97~f=gZ@_F6x^cM4EQbSA;e`>1h&j9N%_swx;g^x5 zzyVk}p2F&Om@}pzJInTE>`%M{nXrg_L7XuThMxfO$8`*GkhFM;abxd7yp>%a?+h%N z_d(}JC-lX%)V>e8jBY^x*gj8Czq(LuQje;JUZy{+(29CZrX+T%zL8O+CD)q z&>@OdziO+~)j5cErCO`5LCm+RJJo}T{8{yqI)eD;=>__?k;SEYwZ0m;d_~`(cOauD z^$+#Wk=NYll;}5+-Nn%r(Vrm48=@`I-N^Kb=!NJI^6ih?@#)C=!gyu87P)VVZ;kH+ z0*}Stk6#83&1585m@G+_Co7UQz+_``OR^ovJd!+}{0R6o(s}8EbaA>Yy)<17tgZ)I z+vM1p?gefK(_f_g=4O7jI9mn`S7lda*JYD5Nq>`V$?nc}WshgiWiMlaA5BgB@{#;S z;$vanmoLfL-tzqY`Q`b?bAC(L`}3K6wfa(id%i$V(p~v?MElC<@%*{`ClbXZ=?Yuy zuZ&hstNd1FLf-rpHs4u!5wjntd>Av#{UoG6Te(qmzE-(M+J`ESSDvpFI;QvE6;*9j zJGtfYQ1#cUX9O}I$j%K$OX;H0sI1$2bfv}R^6Dgg3|1!T)78JK-ctP@{Q&*#)g9Fd zdbs*;)%}?NDUP&ue2p!>p~h$|s(o2c(7CmXYR_Zz`TX+Qm9+`X)>YcE5G>V5S|8mj-gji;MzFW#wj#{UcXS@qwkUszvRUkhuKbWPpqd=ca8 z^V{lwU;hX69})KhJq;dDRQ5sF#SpeEe}oP+4mYAE^Ns#MbG&Kf z+4Gy5OYMm3^6utk&5w#~Xvg(g{@Q%~wDX&qo12rgt+~@%TuOVvnWTNq1JF7w{k%1| zbxMo5i&{%s&h~8D<JJWToixZ&TnxAdd-C}M`b+v-`bYXt-0x!gA39DgQWvS1uEenc$FY63s9jxcykNNc_4-_W zx!$0+=w13*UEH%~(W?{9^)w6lS%|=72MrH*Fx+sesGI3%wbo+nv_JE6yin+#iiv%y6Cs#iK>y%VvEc6-$S%K<}{p`_D0 zc%C#3Q{kN1vvCfi@<)U4D1NMS=)31JA0`|1D`2kfuh%Jfty(xo{*1wlnX_jQ-rif> zB?e^9j>^=Txu4~)f`fyDLFy*$nQ`y*?s2!aEgs!)m!kO)@IRj8IbTkh-AeMNtPqqM z*^{faq%E1VQ-+<-IEu`r7KMuf_JU`6yxz4qx1Koy=OAni*Z~Ln2KP?yh{Xm6TO3|* z?>2Xn_bj}3RK1ILlUXv=`+PM5p=!VTno?ydqr z!R+U$E_o$6;uU23ke9Nf{2-EbV7iiPh&{(skh3+rmSMIuBsR=|+9RN<9_mr#!e!Xw zbZjZ7i@*tBK6Kx%;j$s_VUOLDF0V1R7rN@!FK{V(xC9R4N+#rDaxlAb*OoOLI@0Q~ zc3bNqd=r$fm#u=R=uY1=Axw8KGqb(UxY9c^Vd;^vfuR~G39`jKv5xdulBGB^2%w-z z(w4GHr?`3XJPR`9pk$=5H8R3$3x*1KV7c8kOT~oB<1lEhX=!Xiae-qaurouz^TZQ} z`>MmkLjxWbg^KT_4(M2 z4(7(&Mb(XAzkpCcAA~Cw04%}l3kzLY5lK-B-jGak!<1f7JsI;J;C<`UunW>Q;@!ZR z@Rl_2xKOZF=Z%D`yYy5eQet;(z?(NgxHHyav7^PMGfg8%lD)Z2jO*PPt8q7Duv1upCl^k8)T~%L&w&VGexUQ#1SNlWkV-mX zw0_;QKa9P7?u_C^vcxfr3Q}7$Pmd1rPrN{}+{>Y3?^+H7e=7>ya97mXM0?Pz7hD30 z=K)lTVKhbnBc3*l8{FW9G#=WRXBRAZVqXlJ(^1Dc(GN!}2<55V051`d!qlGRE z7?Y-jT?@ChjfPb@oWMV?h*1}{lMF!MdnD87pesi(NrXjj=H)C96*KV!(V5NIAX$|Bq%3Qg1j!S7)erRuB$KF^84#~@mUcAvOXtTN|(@0fh2by z>lpv)hK=at=q=GZqd$tSi*AqZ$A~{q`}URSSM%xg)A{!N5?V!9(RDy(E8R=G0YL9x zSDN2U@`~v#H}iaNtou~&Piuzx-r9Yg%ei+jjEeU+ljzs!MjBD`)pok0BpRv3@VZRB zPhFvE>J!%9hK@+qLg%L?y>PGzS~q!(2&3aMI!Ig9J*p`dAF_6fc#B?W$@PobK6OG5 z-B7Mi%~e&%{Sp|> zsdJ8KhyEu-`0qF-_0Kxy`M*(9HF|TpMbKL#+$SdUndE!Tm$B<^@06&U)a-fvYJ*(+ zw&MC{KD+U~bU*Hi$E!Meby}o;7yps?YmB~uV~;wZYI^(_KZjxWZMsg=*)g7{pC8vh zgN2o_U_RIQUY#~e{}nsqBuvZx?hRzeHQ6l7a#Ke;r)2z6R}W=P6K5xHuS77er;G8R z_}WdJV=oM*Loek2D9E-P$81NvR_Oo7Ttu>QaSAqY6a|A)6w~pY&`k4A4sCZIa9tv= zAmU;k^M;GqT~`v&Y;J@rWBc;LR-QC^Y$?i_=nh>9Wx3brV= zVqrJmb)RPzf8YDQzyE(Pc4wb?o^$SV_c^l`>fWtL)ye;Yb!VM?)a)R>i&H;qNamMw zr|Wcgo~P5j;0e{~qIJ4|5B;D27pY6u=_i3&*w$qFeAX$CG4)Stzl1u?Ff54?1Qk6!w!V~683jkXn0I`T6lhVMR;@ggm81XJA5?! zsPLKLv%=2^zaV^J_|ovJ!qCq$eQadyP~h$|x2Mr?@K9I-Xx$%tJMZ$^9=@m0ic z5r-n8B9kJsBTFKyBU>Z8BaM;HNMGa;kuxGsk328(lE}*=uZ+Ata%1F{$Oj`IkK7)) zJMx{#4ZRy8}mra(=o5cycP3N z%vUkL#{3%-78@Iz7Ml}W99tFJ5<4N*66=oj#ZHQy7JF>$$+73eUKqO|c3JG&*bT9_ z$377IRP2t}U9o#(_r-n|`%UaGvH!$|#>K>C#TCa@#x=!t#hKz9aU*fl;*N`(6?bOb z1#$D^mc*@&TNk%6?vA(z;+~9qA#QivTXFm1zK;7L?w7bf+V3;@jf8 z7 zZ^GjV+Y??)csb#%g#8I$C;XJ~M?z3yRAN$MR$@_NWnx2OSE4b|nK+VobmH-eCnuhr zcv0e_#3hNV64xf)khn4Nw!|%oTN58o+?Kc_@#Vz5iSH+VoOmGd*TkTt=%mD?%%tL^ zLx*aU+LL;d%t@{!Z_?zX=}9w^PD(mG>B6LiNvo39B&|!@lyqCtJxN=Wo=)1C^m@`e zNgpR2NIIDGTT*CpY;sC+VRA)sZE|yRXR;~Tl{_u^_~g@)&rQBKc|r1J$;*@1B(F=} zl)O3lp5(2`k0ozUeku8l6DiwMcBH(K@@~q9DW9esNclD8-<0sw_|){& zg4EK~y43d6?o?B%D|J%pajCOYPfwkbIyZG;>J_Q0Q?F0mlzM0CL#fZCzL2^*_5IW@ zQ-4bRJ@uc|khIve#I&rmvb4IiwzLUp<}`QOXxj9&nQ5n`%}Kj3ZC=`363;lD<9t<@9&bKTQ85{mb-&>3^mNWh7?gWt3#p zWHe`VXBaaa8QzR38OLPI$~Zma+>DDdF3GqwcsApu zjMprd$-F!B z;mmEBJ2PL+d^_`l%#Sk!+BbFweaUXZ;cdwKSn>~-0+%*)Ix z%&W|6%s-dlO^=k3q?Jnul>!Ms27Lh_^X6Y?|jOY^JqTk^Z|4f)o5U;ecG8TlvWpPoM_ ze{TNb{AKx9X2y*A;Fk zyuI+A!p91qFMPG|t-=orKPx;?c(Cx#!r-FlqU55?qWq%LqUxf?qV}TxB5Tob(MZwM zq8UZAi_R*#sAzG~ilS?ZHWb}a^ia{$Mb8zzQuIdA+eIG~eO~la(Qifn7KIhZ7N-+QT>D~hRkc-3sIpg0tQxC2vTA14$yH}pomX{9)fH7& zRb5lHp=xv0165B{ZLiu@wYTcMs!ywasQSAqvO2Z8u)3d(w4y%c-iLc41 z$*n1>DX(d$>8$Ck8K^PUSZka$!!^E|DK$seoKSOe%~>@U)Xb~7v}S3|s+y~7Zm8K< zb6d@pnulw)*X*p>Tk}!Pftp`yf@;HR6Kiv7%WLauJ8O-#uG+EM8MUX>o>x1s_R`wr zwO7_&Tf4sYmf9_~_tid8`)uu7wfky6tNo$&m)bvSb#>u&@pb8S`E})W^>wXvy>+HK zXPvKZYTb;wS#@XDT~N26?uxqAb=TJ2RCin5J#`P)Jze)g-D`Dk*1cc%aov}7-_`wG z_h+51KD<7*KDj=#zOcTszP`S-zPo;)ez4wEKT>~0{c-gt*PmH`Uj6*~OY4`_uc}{D ze{KDy`n&2Mu79R}SN)syAJu{F4b=^;4V?|W4Fe6P23NzRhNBy1 zHq2@`t>OHJxeW^&E^k=gu)5)fhK&ukHEd~ksNwO3=Nn#Xc&%Y?!-ox@HXJ(iRl|1; zzc>8dplb|mOl>S}tZb}pY;0_8>}>399B4E(S{vPsBaPDC zQ(u$0$=)=vX;Rblrem86*ivW1EwkvziN<%bIJNo0>bC`^ViKkHUHhLYYA912Y)Ng&Y{_k@ zY3Xh;wm4h-El0G>Y&o@MPRsn3MJ-pg+|Y7s%bhI`wmjLgqh(ji-j;nWpR|0_@=MFV zEg`M3t!b_KttG9Mtxc_MtzE5_R!6J5bz195t#euzw_eq{vGv~8r&_nS?r43f_0`r7 zS`W1T(HhZ~+Lqf^+E&}v)YjhC(`IaQv`uO|w(Z2WbJ{LwThMk{+sd{Z+BUb{)AnH7 zqixT$?P%N6_I}$}Z3o+aZTqWD*B;g$-JaZ@(Vp90)n4D;(mv2`ZuhiLZ=cmZr~RV# zCGBh5Z)(4}{f_pB+8=M<*8Y6^EA4Nzzt{dr`+@eK+W%?~>4@n_>B#9Q>8R;w>FDV& zb~rk`9aB1v={Tw5jE+Nx&hNOSV@b#Ij;lLv?AX|`x#RARhdQ3>*xvC{#~U5*b$rtC zMaTCYzjYky2VeVtP~kLo<3^R&)6o%1>`>%6LS zedm_WM>?PDe5rGH=Ubg0bbiwLRp|LKnEN$5%G$>_=JDe0-|Y3u3k>F+W1*m_(&-kwQ4(|V5Xnb|X|=d_*+ zdKUMr>RH#bspsyVM|+;@d9CNYp3i#@_Wa%x(i`8K)mzwG(Oci!*4x`_?6vn!?48s* zz4y4@*}Z4>p5HsacWLkH-Wz-G=zXyF>E4%m-|XGr`%Uj}y}=XWCS*=1nNUBWYl3Nl zd&1-i$4)qP!ub;xO;|bM`U$sAxNpJ}6JD6`#)N$nzMAm!guf;P_eJ$3_GR?t_m%b4 z_OCw6+`h$q%lfYByS{H@-yMDT_C4J9RNwP`FZb=~ zd#~@~zOVXz==-hj-@dT^*#4CM?Ea$u%KnD__WlX|rhZ4ir+=&;|KC5e|K$F&`Y-6e zr2q2%mHlh`Z|uLNe@p)Z{g3rO-T!R=3;nzLclYn@f46^M|0n%l_J7;|WB+gcfA$K4O~93 zdSLCq^#ivI+%a(1z0SCs z`lI!;^k?hm>Mz%?*59DNO@E*MasBi9*Yxk|Kh=My|6Lz!h&7}e3JukURzsh`YVa7Q z8jdrZYBYGvmxdn=e;dM$iN7eO1(?6yVbCfy3oMz537nv)}b>>Sh&+6TRZQwNV7JbCb(!MTH%4XzrzZgA7!mcc`Zwhlfu_`=}s z!FLBg8T@AOm%)DrBP@xQEK9Ma#?oq;U>UTyEt4!qSx&T^VY$Gv(6Y>OwdE$u?Uws2 zk6E6z?6kaQdDHT~s=j zWj)<`uJt180_)}071lM@8?Col@3lT^-EQ4w-D}-v{lfZ#^$%;XEyk8=%d?f+8f=|5 zz0Gd(+NRo$wViA`$2Qk?nQfKrI@>1O7TZ?aQ?~849kyMz*KB)iZ`@oI4d#XLto@+0%m)ooD4fa-hr+tFmV7J&E_F=ovKFL1Kew6)K z`-%2b>}S}|v0q@HXJ2GrVqamu%6_f=M*Bwlt@h3Kd+ZO`AF)4S-)4W#{-XUA`|I{M z?eEz?w0~;<%Kn}GC;RXAf9%1I2uG|V$&v2JaTGer9Mz5nN2{aD(dRHatd1eaM8}9@ zietKChU0k0|Fi@7)@kcg*S38=U4soz-UcxGm1HVJoi=>7C9(ol!q*^Xd6! zNbj?nfz2pC8Qnv}d~dKhZN4!swpm=f=y#cIgKp1Ip>EXX;~qAX*=_OYhplxwvt?}9 zXVc47R=r1WfHtF7#yVtht6{nHCUrYZ z*5`JrZ+&`~#qTTBnQRst|1d^_&82SUv|02%zlSNBeO8-$Se`cG9yQCI><%X9_Xg%Q z=CL`QYA{BRZJ4`Q<)&Qc@(&qAT27nGtmj{!ZOF`k4EiBI^R+TiJr8l|3w6U*z1Li) zb663a%-7~II?bj+ore)C;aNNw7t%ql@}w~vw{bhUVnpw;Ard$55;5uwW0UnR#KE|| z6a9J_zt`+E8+~qs=kj~4etE%T8{}qQH`0{H47p9VK^Bq++ALPzsNQ2@oa%c-yRj+NbywTv%Gk2cB%CbBR!lC!+9coU4HkT~B$vnc37KX`S&7OcVIL(6y zYG_RF8WPEP^)7E6!*E#w>*BCE^bS978FG)9ht!gyI%Xvu76`TA)&{5Eh)^tMx5uJZ z6A7S42$2!#Js$UHp-u_OXhlgKW?q%&2Bv3m>g8z+=zp|Meqi{PNj%!fEtF8~jLT~V z498|N$$Zt_b;E9($XZ_4JN09Hv95Y2j~mjK&E(OK@_U47@((-ROqbsqm=cSz^Li4q{X*&9h8ODXe;%!|Qi&#jxLH^!Wq&Y3E6SQJCDLuD~)Jw(Wpq z4nJIZ6usZ0_gZ}(Y{Y8wphjXXNJn(lYBqSxEU{+bNYv$JATIRE>yzac`$N-_#h`c( zWg{*#R&5($ibG~I)ALy|ESZT5`EzUk@UYxk??L)XCc}EGepGy$-(Y98Wqvl7R~b4t z6jAuBZa?~K!g_fe>gc!QoqWvRXJ+6yGoEADV+BgIjL?K-Xa+GT@(J9BK{{|a+`!Eh zqEa3Q;|Yk#feUma39HCm+@Xol8qBPnQ{?RQi;gKvtE)#&$l zl}IrhTrA`A;c=7`$C#{slYSI8#%NguhuNp|nDGN%k)+8zWOKRsjeFQV^){4#od{>SI>2tAI4VP-eJ8_Gz2fNN0r1i`-cXdA`zuuq5?7~ zE6yIBM4TS83F&ymlJ!Q9d)VrB$z!a{hp9TiuQyu#4A`jm8}(kL!>AAo&;5tp7Fm}e zQEtQ{9$Lo4d~sRa9C;!Mugxnuj#S0*ssSKr30cbPiJ6P__{^;7kRD4H>-2~^$N;#L z7hN1zHd*9RZp{!3YA_bR%dSk#X&z=${KjcueloW~o0nA>W(4wF&0x`Ghe*%u^SO}& z-+0hw)YD_;dBilYc@j~9kniy;F?-xrlt3=w?v5e;^}79PElkQi|Cg}HZ8c9JgD?{j z#|M2lP+?0-7x4m4{Rs1Q=*d#}*EfoKX^y9k5xc$rBUt^2*{g-|v0;x*jLP6fTQJ&T zx5R1UlNL0H`pnpkw1#~85szOi*(_R*C-$IBL;5iZ`d;kCrNb|2t0|`I)06?Hq`4v- zmrEiz@^>o#?)JF!vU=E`+b*71@09qA$YdZxlM#yq2njDPf&*e576ovbNBK=aiA5in z3h#CJJ=mMutG9?HSdoORgx+jXD#^%fUJO{2%C5ZXpmGO;Zt-STx7$Gy;5AA(^XQZZ z;wHACVdvoy~V9C@Jf2zoEF zvw0mzPNb}!paKz+BRt`=xE;^Xlvr~Yep#tiHIE$u@Z> zxsD!;ACvqL~J5gAfx+FBBMdnFHtR-NBWczNBkLVrxh}v36EzlUk zoZF)tv5E6^>OGPs>0E6EW$WoNgA$nqgdfcrNzk@W4oBrpfMIIz$43t-qiDbk+ zJp&V`r#mk@|+l%k|*)SSE~_h9vi_xTIpDc(MXGUUGcG1Hbl|ErGmZq91GGd&Y)ExYfdYDG1a{q!ki#WYGL0 zH==Tpyg+YtV14<3{Q*w?#{FnFm#jMCxI$ax$U=N;n$C zEC`3P0$llDT8K*imq~cZbCtMU%2DEaaUbJJr*1Mit8N%88pbV;y3sokSU^rHQPn(0 zAjoRb6Zd5vUq}Y37ShM0Ec#}V*dTsJJQQNI5s@u=>{H4e+=p2lqVl~W2M5=&Ak5@& z5R{2JB#w*s#kb+Ku_&)(CfJh=|AyKrDLcnBH)Bvy42|-8oMh!VS86k{E3a4CxQbJ_ zQio=OKCxo-LkuY3V8wAT3J=pV*}RBYtv;1Eo=cQOKg|6OhjEbvIoF_Df)EdtyjKER z9m>rp163a}RCz10Vl`hW&&|#;O&M|b$iYE#W2gm1XPGBI4aoXTWahN`VF z%fmX4Nsefibbafn`V2J~7Ln(L?DnqHNwJ5N7LMowAKO}C=VH-0O zqr5h^6ih@3v`<<6xSOQ17|;C!xhubR521TTd;y&haVR7)qhi)(e35dCMz=pu9EwMt zB-iM%8ph?u=}L|?tVdT+G}Sbb3eGAos0t3ZXi>38+&D>s2N@d)>SFK`Or_DFR7;eO zM;eg`E8{`vUfU#{6h30bxC%mxNSACR5Of_b&1fZw_7CYu#Kmumc8ga;m|QB; zA;9rsAbC(KhINx+icP2=|0jltBXQuf@okJsR70*JEd1{cv`kTr-RPmgBjIpJMFZB$ zImR7$AeH{8wy;d!g9}lCO4%!giOa9W7p+icc;r*)qzYbvg+qRmYVIW_3*$LrboOkzaz2@ z@y`&kK@t;}-s_;ZCZqCdUfP3>DHm~AMTw$X<|ld`XER@=v4qRT3d7Ua0fMZ{Y6vK|B>gC2tyKVZ@J!jCqF`4!<{=1JOVg6IMKp+io-9 zYnAXtG_=y>EyFlIze|Nw$)e@fbi_=4uS`z)bHcWoljd6_Rnf9K&1a&Q!>obSxOB9} zo3vi(I(d9v0)I0O9d{xF6VJ)eWWq!qw)md7!53PprrL{y@5a! z8qe5tlWmsClB&~~B%u}wQ}y#S*1+U95@&EkKAX?4888mOiI=C8B2!Zxt$;Q&6fGe$ z>r7*KM+p|<93}R8Y0HQd5H;DB$}O~{Txw9&(I#QS+$e{*gzKdI(Xr(qu1AZ=VwhgB zcD1cBOcNBo+(J&Fk;yc?vIJrJ;;i2oh1Fz?R283Q+T&yt2uxLNQUG%e5};D%8`X#7QW~c$bDeHb zp2A*~%!140LC8|P7dD1Oumw~Fl@a}uUcE}BbrO#psI7z}>UY)KgUhiE!+eyh3?1 zS#l9N3*j<5a91>mL`_YYIzf}Y8J{Lo5hp*Ol2(<2xK$ef$KvwiYfLgL<)`W3>I_UO zAVZvoRu>^0hCO1VQj$E4j0k=6Nef-&n&O5e*!_5?8Y~hBn6S_xdTmccCpR!yFg=zcaNvKQp-wW z$y%XoG-T!HZPM>y|w=l{boB8;~~2h|j~-i5HUW*TNPg4#P|8 z!b8zT{SaF2H0#l47LH7W8?hIP#nCKB-b`2Z5mf>p>!$o1uS?=8%{8@{N|u4-pY|DV z=iU5FzNxn3w6>;NcG-%O(92#4MKS8FWFx<#mUb|tz;2U+%8~hs60j0v=;ETWW_gRs zYa~%H%PJX&m`n|y>~fG-i{HT8%I5L^id|Z`2CG#%1BsiDpx63wNe8^RIO);@M`Z=_2tY%q_te)bW6k>l#OU=VW;Yri@J(59Q1qw zn+uGD2qrQbZ%g3sRf92*Wz##OUm-T4GGPQuF)fNWnHMFLiJY}Wn0TS2$GX$FU<_6p z20E;I0V?UDTSrGy@^faW$}f4aco}rEPU;D@?;}en*@-;Rp!E znlt0)s_`z6T)>0LjbxccWyBXCTcqx#SAb6!2QexNhQ+wabHoO?z3P&9bUvaInnatH zn@U;}XpqU=hGjh17{3(3iPdT9f|sCnuE%DQsVZ;&n@eFX{oI4Y;t;P+@&fe)hE zV7X-LU8NNwPZ9^0N;KI%r4flLt9F7zJ~^q;Vn^E? z&BC!9)@M+AUE->2z~D5^4!KTslf2Ro!KN6+Fzr+ZIA~+%m!H+DAOzx%YzXtOws(xm zsftaLH*1N59$~74Z+2DH1SO~D`Irhn8$33Xv`s0g=^JszMKzv?qx>wdk1A)5Z4i^j zs|beYB|f2qlHMNH5s?>3f|NVTv57pyVzkmpM~{nagE;1tc{5q5=VeNai=#K>3EWm{ z%JCx92 zdQ8lXhV8h1O39-#Jdvhy*HWdh$HCng@E~VYSQuZGZFFGL%tfyo?|s}<2` z*@JS=S}EW)v#ntuFGe#+@e!fNk1fd9@e5i5N0f;+o#e>m1*-g`Ju9^z6Om?8v*@^| zSpo|tY1Ow#9CI-e83n7VLW_P>JTpU{EHOyhxcnY1Ry$23Fk@mcLl({QTwIFi5*4i0XJGZp$9Xie>peSCWbyTkv$Evr8%xtt37^g~6*^!ha zSPJCBwKB#=*r!#3caT(A9WwDMH!rFeXcAL^5I)2V<-M$ngnciD>~d&<28E?zumsxPvehODyKXYBR?{T1T`f%_gpzLKs~I;(F07~y z4xI(0(BzyHnYUE!fw(-Rm5g$LgpsNIL+hDom8NK&A1}b+6qk4nw!bAHINTKC@~<*x z(OJn&ES$rV-?A@9??bO+?pC!0%)F(4fS!=z$w-FAkQRHQS|TS2zNCJvzZBg(&Ll2| zrID5fGREp_vxIa;z}Z8V z!fiy!oU*e%$mv9RCD1UF2#601coo?el6$D$q{q#&p?|6f7(WH6elvb~puwp{Bvor_ zo2i6NvrWzBa2H*Q!LR)QxJ0rbZI1-tNfC~gtFN}1?5!d5zOTN>ZBYYNm4S0)24O+ynbY= zDt&HC+N-uXd1GJ?UpXAvu_4;xcd(Xm??zs)1};OFHxe*$s$vdW5{LzxByb6=xYmEGlsgbEm z9%w?XZ5~t~DVj-9Bo}Ov;o|*Vq=ej#4h*+yyNK&4s9_;z49yDJBf0&`Jj-0?_8-EP`Z4UUp;=+jy){ zkJRdan`>AEO5Q4kk;*SrOHJC)_zm-rI4BA1WC4SNfg*-e2lT#W7+Tw2wE!78>o12N zNWtXy43g|r>Kx*-dWx4sMB4eIa-2cz3SAuH_vpQ{dUcXahRcZ~Gp}n6LyQAOmrj*d zAk}HfwipH#0Y$#-HqHQvVn@Hp&?n)exjdf{r zp^-9M+^1?@s)IX>Cg4FhBCJLiIFKweMla-@L&G*2-mVsjv(iB_YgGs4A!o{bdOsBd zdXGhsT;p)4NK|&sJ`SVNnvK}HEZOREX#i+5odUYC0m!{;!o2vc011W9rDeu{tWv-xn!#rVWwTBJe530X>1 za%HVllC!Sj2Lth04rwVZ7hU3QL>Z}Ej&MKR605fdmiZFpu#DWgFBgV%c)sDIoDN<|*x88LVf~|4i-2Na2{2-mS+xStGU=p9!$M+- zwB==qTs%Qbj%2rhTrJ>y@CvvM$=UtfMbALlQIvHUWW^AQe1d@hOX{VwsPbP`WLoD0 zaiTgrLv?^9Tv2gjCCzdiN;VwD!!azUUk>14AkxK^$+6EdqFEK^f~1^6x3uGHIMIl_{T}gNh=|QP zWx#+A&NP{=0yBHdVigF1 zHBiYRem4-x*d#FcrA{8-lawt>wRwr_q9q}oR*Et9?L4d~hm2KkOg@4lzY?ugCn?2? zGB(OdAN7nfKvkC!QzSYW<)j;$PRvz`A}^~>j>tC<71*8P1}eGXI^3t8;|?-$r&dZ) zEpko;YqO^zw;WK9~X zm#K0vd>pqC5m0?n{7S1(jyL>IPuoEL;STh|2s44bYHbIFERsBb{H!7;Ox&zA)1)>D zIB7~|0-Z}dg&hAQ7nT89NwB0AaY!$SOpaUxPsHz~G(-`#L|JWVp=k1v4OUah7#ZO4 z*mG3b3d5FSUGw|YLL{Vg0;zqZs8A=gd9+1#za8Sq#URjN>0^j>NGYro8Cwg~8jM|X z9xo>v&=7jOvc`xDQL7Dpu^X-GVB=QmF{hm>+|CJ78He-=0-3mvYzONn6QZVOMI_;5 zOM`Ztp99pqqCAk+g;g^Xp~$H_HonyWnV(WcrZ35o^!2DQ#dApfk#4FAzBpa&ti39s zC21AQl*C4>vS|^>ooy11+2tA;e-^8v|^=4>lzFOO#Z_&5|%MLrHCD)R%}AD z70P`5pgO@O0nH)5l#hTQ;c*{AA>e0$CEHeNk6IF4B_WYUGs9%=qz4kcSVPI<`HTyz z82ETmAV2lWK|QnVk4>IPUy^?f0UxEgettI7ZxDCrr`yXov{F!3Px^6$P^qy|1G|~_ zE0(3_862QeW5T7WEiGond^F#OnvCyN;jWkgb8wL;;nh`UL97B4{;EP->N-Jt*fGC7UCi#TKY--wdxt8M#a%NSp{vIQyGEk z^2jUFFjU?Tn<5GXb`w;)L~B{#Gte|?=MC$t=r0Yq^y=&d5Fdk zJMpRoJf5&B%jT>U&u2}={?+aw-C}WAB*;EKF{N^0(`4cg&fJXN^N}G&D7EPL`B%w6 zon##12at>^qwFg0vC}p3NY|UZl6DsFBfJsrOvp_+W|Oq+kf3xJN6{j2B76{1=Ba1X zUq0H&Iy-a@(M-vEw0%be%XtXL7=}Qp;o>?QC6{)bQR*4m(pZE+1vQLJK9QyQ9_^Qh zD;tI~M!Y4~@JKl@|} z+T&UPV>_7@VO@zV>cj{)JiOl`eMpv2HHR=ef;(+;BrHc;)fp#AweVyrqBHejmRphp z{JpAY@T!blELL(a<|5%=GG$fJvw14HFP~z2FrD!|+{2ZoS33%>87(Emc#d|s zIK%;u;dr?RvS4cg&!t)!9A)`$H-aZ)wmgj#)MzDcBPk_pE&VyXT~UW$6kAjdL|Sfv z4=8J?rFvDSNyL)$;PBQnGIe7zlx*k-layP_(NMkG15zTQc3tEF45{8WEg=S znA#@6Fyves3q47NHtA8Khw=$FoPrUfpm9m}gbzWBJJF5;$tJW4W+;f7p15Lrj~J&IX#N_UWknbLe|b1Eg*f*-od5~K2RwwA0^4(!#T z-KyiKo&N}=B2pu=y@RkgAV?-mB*jI@xj;rOcOlusZ%WyZkmQ3H6J;Mt$~4v9^!n%p zQK4DrIgROQB;bC`jFBo{Jeyn97Pv5{uC=ubbCT)^YXKqw+73S1;}r)nNxG!G#1~z6 z@mEd~Y?c}s5GAQQa10nKRpXdmb}XgzQ;kfqYR(vmccUv~VQ8GyQ2I$)DQd!NXmw!! z77}_yXCVl(%q4&+QIemUM;hg zqD(2AdakUV*GE~;r`vd&b|%m~#w{gz6P=}3&8Ws()v}m`9t@X=sCEd=h)SAeG=z9v zL_#X7en&e6|0|lN9Tt)%3r9vsibRX#(;HUe))2DQKIO>aI$1ci?-w}5!Wh(cJTXUF z7JO(}4$DZ6%|T|l3(6Zf1gW)eC3WL3BFHgkn%S6(QIoIc+mveJAG9MvJswWqj9xZYSlZB&C#7f#wOT5;&?L>9bY=%KZqMs)xwY z6*=+A+}MbbkArf*@hVkTjpLT;PeX73N1bg{Dj3*Fms(dlHpg};<}Yh=xKoT=oE)4K zxu_3`DUBKbY;_>l81>z^ju{kj% zS~N(Wt96SEZiz%Hxuf&;X(6={8kDK zx{c@y=@_OU-*E@2n$RmP{HqnjD7jVv?D%JPRf9K&4|U>Jd!Hn-Pfgo!IxKB4T$RS10~X zDm20#Ly|)w+L;IX*-TMNZBc)z;0=^n43 zG359x3TNf;qv~KXyCB*}J!CJM{Un;GssQP!&iIp{?V_YC-Bn=Xw$K#8*1x@A9O*Eg+f{TW$ zofZyDB2Vdpp(K3jdyqThC5b*{CTQIlTLFA@Tg;Zt5!DD}cUAMSR5ZMfNxCIbk%LL> zPmntDM+E4Sv9W>qOU9@k#DNMUr>|uv&h1m1w;c19P$A>g&Sc@}$Ur3mu$8Oz6aw#J zIiy!87prccw3KNI(PtIm5MYSf;>qbjsT)dREvZ|ew@ebCbwh9nT1SI#0)O5kP(YxH zC<7f&%rJ29oGFje%H)ptY3+~jXh#Pn&d8rOpnzaJnq}cMR=!eAZVB2WtD#>d`bPL)>KaLAs5>uw$M4vprV!pKrvG8;3HVj z>tp-LM%a{CgLlM{yZK2*<0hA8qC9X!H6?i9ki-|gsRbL7dk(TPoPVGbfcr(*nh_|u zOQ!(mX<~hH8u%;8J7%$5>nt_390a(#5DL@Qab!8IRkD@ z$rPq$!LhV#n4O*jKTXet>oW47D>EM^Wfj2pvI^mb>>_wvPBBc)ErIXmmcqyL%Ha0= za`9L!3HKFM!Pkqc;nO8G@Ql)0*i%*qd&}$LtcnI0U)c!fRyM&)tD50xbqfrs zX@v)B+F(pwJ4~+cfawjL(Ad}o?M>aVp}7Yh)6xsSX`KL*+WX)m?fr0e#{fK|Qx6SY z2AJDzgr9YrVQtSK9PYKi>IqgD-DiW>_t{}%zXN{W?}W<-hF}EiuhU(icf)rnl61OF zhKVrG=z*&^H?Grt!QoDw&dQ&C(CMBtjX;BW6t3U^icXg>I0mmDoD5qnQ{V>6R9J7F z1~0Q70YA6$A8F9l+m3|mY)8RT`_b@b`!O)WF#~RM9191X$H5)WnXr85czFKM2{71o zBD~gh66|)*g73Iz!w-f}hWjU;0^j$X3hmz0;HAFP;bs0a;nI<_;H=TJp>fi=@SjO@ z;3Jdy&qnC>PB|YQHT41*I_*OE`?QPTwj<`k)+6V^=Z~BZXCHM5>^yn_Jo=c0aQciz zF#p)aaMQ7u!lC0XgWn%_Is9$r5*T&D74VJ|mco-yTn4L8S`KxyR)Ftkt%NIQuY&C- zUkP74c{M!dl&j$1r>uc%PrVwpoVFIed)hVdoYSv`g=bs`pE=`t=s9y8EII22c-L7s zLhIQ#!98cMhpW%o08cu1BQ($11dGnQ8HS#J3w-bVTj9nFZi5RiydBQI=ngpf;>|FB z?w#+rs%cWc4s>>dNNlPAv zUoCkAPQKz%xc`dBpl9jh@R6lYK-aP-Vaf8Rpl!v|@T(Qu;Dsxnfl;fr!#7qv3op9z zIr#YM=i#!ec0m7{7hvqwFT&@q-U&}$`w}d?W*6Le&CBqoYhQuc*S`wCx&Afy!@AvY z;|;IFV?TWNj*sEpn?Hf4-T5h8yyY`E^RCa~k#~OqPr2tyxaHoj;L`iPhL-ye zz+dkF2I?OC7MQnw2U{Qd9tJ=B13d8XL74UEk8t3zpWs=K{|xVZ;urYblfS~hp85?s zxBU*6Kl2BCZ2O-u<+;CL`tyIo?j8TYe_!|)PTqM4+FsHHq364TVB*Wc@P(H{;Hp=| z-A{Wp4F2_MIE>pJ0ngqY34>mbf=O>g!#CcDfp_nTg{SR}lh@yjhrhm=0PlY*5nlLq z5}fi*GE96o1$Mlb3a7oF1|1)y!`6KnFz>@mxay-U=-QtRFZ(zLKKV&5{Or>_SoC>5 zO!%SzR(x3qN4_e8>%K0A-+WyHuQ*T&bH6ErQQwxsN4~9qCwx~4GrzBbo*$~=?FVaM z&`-6HKQ#?weyN8a{L%oQ{IwCT|E&og{d+T<_D2iM|Fab~{M80){%(g||8zj-znyUY zp)Lektm}p;K|L@pxEHpBOn_aXeQ+$SA9jQfz`_VU{4~M7gl85G7^ zgmG4RJ>CYdOt8c4i4Ith?1ZaRhTzqyF8D{98~&0$3_~*~!qzMgoSf~2qd7jfEY}bH z`6KX(f>C&X;UxG?(HMNZc(S}+G6gO!oeJlcO@o`ukAU}8OovZb9tpp#I!a!zJ{r!c zIR;v4XTZ_AV_{SMaWJi6CQNBO9)>oZ0Jk@t2+wRj37*|D3+`)~4QIEW3^UqJfseGE z3eRdk4NmPi9X55I2|wyQ3vTK<8?NX+2QKY77q<4!f#3I@2Vb3VKD@Q>0(egUh0rr_ z5p30844=}^g+qpUFw!_59yDG8&owQ8QRap4Uh^W@H@FzSI(R9ZZMh8Yv0M&Mw=RL9 zwkzQCwxw{XeHpBFEQg;vR=^{iD`CmdD){!$m2ih^HN4V&6`VG_1{P1e8b)~5!gZc& z;0o`x@J!!zaEAYSc=X6R_}Rz}aQWzsFmKYynCpbGQ{d;5br|Z(sX?$~S}^=EN`q0e(&3}CGT@xqnb2}_7Sx@R4WBtB2QEJ~7fv}X50;;v z4-cMR0B<>?5RRQ$1T)VnhF_mm0=J%B3TK~F1{2RMhfkkd0SD()!npIQ;8W*S!=uix zfw~K7;cXYx!LbYL;qMnVz-umQgxwc6!S63_hAnejV8gstm^!}={xrWGuD+xL_Acmz zdlz&;%c5>rvbYDjF71VtmramYF7JbBOZwr(O9tTkOZ4!AD-7_OrAD}DnF*e-+zfwR zJ_uK@u)yzDSmDhpZE(#hJDhW+1O9QP6TZEA2;Os*3$9<|hUZ*845zG}2$Qezz?-h| zLi@Eo*mRvAI<6mq1M5cN_3I|VV{aIP@7*vN?!IvftXw}8ezblXyn4eCaN@@4@YGFD z;Np+I`AN9v=BMDjx4eM6KmFDh;nBD4gr3`9f`&VG!RE~`!{R$%fyrB5g~4~d27kOu zg2cynzYbr&=MDJGy?fxM`}V@s_rD1jJn%NOKKKrN^TBuFJ6qp_uRZiWyy@W&;LVTh zgSS5VA%8 zUoRYhJ6`+-cE9v3+_>vI*#64*aQ!Pkz;j+b2peDf5l-6u6D)uIXZXVFzra`C_!Zu; z=Qnur-rwO#Z~g&)e)CVb{;j{D>+Qc`&^!OYt?&E`&wlq1{N`O9A@hXyg5Znq2gA&L zp-}%}7!2AU4m&@NfEzxEgin7O1!sO14UM12z~jG&g=c&j2e17q9^Us=0zB&LRC(n< z8Z7@N9q#%j1NMBI3IF^y3*PZvHk|%_4($FR7e*Y+gD)M-hiiW`$KpLo%kqr!%I(i!zUdPh}nj_hcOnf6Sf%T{*|XirnMiBe^r-s=VW& zA^!x}Q*Z|SsNhU^b>UfXTG6@ixuQAny5jR-ddY?G*^-N3UFpSePU&12Ts99bFPjg$ z$}fS>lrMnk6$|0B6^r14%Ehp#>QeYv)n)MV>dT?IW(nL?a|N7OyA_7aDM9s*w?lZ ze%H1MMz-G!*RQ@Zbi)jjva6+I6?UGIZ%bMIEz zKjC3GZ^9$6x9@5AdEYj;vi~{QHt;<3>vzDj3@^Y$&aVNab^fK%-zXG2zzY0Br zufcN$cf(rC>+na*8*q_z4@|f1g&*49gkko#V58$BIM=x!Mh<-p*A0CFQ(T|IdtINw z8SXFPLHAeip5d?INfQsiA0~bS@AZ5OhrPeS>%ITLHs8O{;X4G6@#~_H=SUEY8V!ae zlR{weSSak690s$dgv0i!5%7#@k?^7;qTu<{qv4bzW1#J*Sa`(Iaqz@r;^9d%65z69 z6X7+-CBfTgCc{gPPl0PsNQGNZOoP{-ln&R;%79PL&V)CeoCTjaB^z!zH3wdQS}wfm z^g=k}j3Q_|vlw1{Rtfy;>{9snIc0F~x#ckGyb3sQekHVDPz4uUSPk#GXfFKxqIvMK zi|4}?b1#82<}HBj^B2OXOBTV;FIfzqUT`T~v+y!_{G!XDe(@5Rap@KCz@&gzPbD=c=w7m@RF5R!;w{MVeOUIz~I%_!Z%l6 z2XDXXdU*Mob?}6%Z-Dx>H^S^|Zi2sEvmSnVZDBOt=DH%d`1)cvbzKQ;xS`_T8*AXpn`+^vo9p1&x75RtTN_~OZH+MO_9pn|?alC( zI|gF7=N)=@!kq^A*cKyZp9HI)9fPsYO@>*|Pk~#X zp9J1xA+GB^?_Bh~ad!4Z8%^|q^O&6T^mK!#` zJq-81JrS;a#{>26df~x$eQ^DIemL>{5g7ErD7^QBN${wBV{qk%li{|Hroe;yr^2F7 zrooAy9s%clHXYvk`H^t{7e~RQua1WLuaAMJ9hd>ve{(F{{q1ou==-^GNa_1|aMcg< z;qbvrV9bvT;4?oigy;OU2-g0*7=HBgrEuLZm&4=;WdIik`0p#>ZU3x>YyZ6pUV3OvT!`*m-PQ2;ppRg0@P4=>_+xl|$S1Hn z^ivof_8ELBY;JsrE;4)`^heBxcSc?U8>1J%uVWU%vA9JrGhs1&G4WD(d(vfabMobI zeaaH}aOxHCm9(Ys)AVI9By%~Oo3#QS%vuS5$zBD6bFYSdxohElx!1ri@~(yYg6m*c z;Y~25Xg%Cmv;nphZ-h@3Z-O&RZiWe^x4`pDZ-p6Ux4|7{w?j+$9dK{?W;mQei*B)3j zycgaytV;;dB~1*1Pfd)3k)9|R;f;m|z3Fh0F9U}82jKmFJ{tw&hlCr8-imFaeP!;#0po}*^KHAfu_k2(5ym~_ku@Y7>Xh40Nc4W4@J>F~2- z&w$?J&V)Z5cNV;L<~h)H{9JhM@$=xS6XwH(Ctd=tI%xsiFl!<7%w7bKKY1~{=#)!g z!Ks(QcTc?>t~zZAw48ng%s*o(eC~|puJi!-JRIA>UuV875wFC!Df$3tYbJF1T{}-EhH*d*H&A_rh;i-3KkJ z?}t}j^#HtP&4X~$)m!0~wGY9|uXz}rdF>V8_O1V9loOaOI|F;m(_%gLmEX0<_)wB0TrDo$&A5UxKBZcR}}^FT?gN zufY7fUWGMxzXtp6*$r>M_jOos{~Ivofj#i!2lm26555Vbx4sQ$ZhZ$@9(orZ_3(SJ z{L%N}NsoO1=Rdv=Wa$J86mnipWFsNf9e_d>(kp|_%qMK^zF~V@@Joi?a%Fi zgU`PJM|Qjjk9%P!Jp09$;G&(o;F_0ShPUi`1#W%$RrvfXufaE8)tw4o3xYqr77TCP z9Rg>)9tkJB5d~jXp#RFd%eN` z1g&^6Xz`2Hn?gr6h3?xF8Wa4%X+d*m@}H1C>y=Hx9btPyT|u7*R|UUS5VXG_=);1b zPYQy<3xZ}91YKMZ>?{Z}76c6x1lyQ zJH0PGPN(O~z?YFP6JKV&2Kln^W#!Apmz^&MUrxS;_;T^(=4+U*iF|qZ^71A3^Yb;r z*C=0;_!{GD@@Yrz4{i^BYvz&r{|L?unjRAW-$>-U*20jC;LP+`1{2F*Vi`;Fx3EFz>F)hZL|BtNq0g$RX@BU}*{M)7L`);w#wn~#Bm>6xTDQJud zZ`j$L*_~l$cXoH??lP+l+Oi8M1w&Q_g}fMDU{|F-NJK#lrs*01{rw`wpdvM}XwWpM z$x99GCeTP@!X{~@ecjTedHa68&$+Wp`-45-=Q+=L&N1cR;_$)3Snd$wGz2KW1tu2gh z_FnZqj`$2O0r268-fyy04_8xNbMT$Ww>{zK!55-`jCrrVR;jF8Jj_4f-gU#ns`OvY z41eE|4UX(}eh@=cF+>$ZRLPeEdF@QvFZQkCe<`_I)=F+r`^OAWF)}mE2L8;Y#jk zHT=@Z*G65?vk+HuM-{`B+)>%=N?sSTlFM^xrBYF&4?7$lrg|v*!{{23&%4Ha5cwis zG#|v;n+mqlTSZd~RJ8M}!tuYQDst5Ik1L(uyOQuU{Y8F9&9746RWqv2WQe2YH&gJN zEl^9{Q42TY@SAngQ|gW~1qIeQYJN)(zjsZw3O4)0!^?+jrXm`grbV9W_v zD*`(lHNV+`-|P^T9gecD1p1CTzq+ja>atLaZ#3u>yLQ3wZ*vf?^c1cxTnh9Zb$*4* zAB3yc@K$FgbqbA3&0V4@xufQ{lH<2dA-PR50b?4jE|6GVD6VxJM_DPYBrH-!-E5~4 zYPhR}HteB&_|<}rn%_DceyI7a=I|R0XN&8GE6Xt#CC5ZdXp;|Bzz;W zpz+mC480^2p66+0HG;38bzalNjR+mRBozIHt`r#cmIBmLsM0qh1BAY<#*WnjVQcr6 zAPq;|;8yGy@+t~nDML8Q(z%PIrT%yHDZSNH`Bp}8$@M=AI>NwUY(T^p;0 z)z+#N+EBgF2oq&r7FqnZ_?MAQk(u7y_l3gOS$$#CM#7AQxH@<)dI5Q;x7^WjxbA(A z{%+adp#|eb{`GzNeR@w>dw|5Zy^k8@MazIsv8NhuwRT%T_yvM#W!Igoxy>N!TT{}! zrZ`&1U6+}!Ln4~PiOk2Eq%mg1+St6BNj2m&Z7A4LH>l*NrYu~vc13mEIo4?*Rm%km z!KDKVmYkDboWl2~b;Z7^E{aV-+5xwJm(|qprX9muhF{$EqUpT#l*|8DGaaYvCz?uA zLBLp#R-3Wfs0VA&Q6U_4)4u$g_Jcy(EI^@c@q@A4t6MK`GoqzF0MYnRp&hl0hD{Vo)yIW{V}nvjW~EYvVodlConDn4-nDE0 z)PHIVtzR*>DRoCJ^*WmvNn4C9_D+R*6Jrt!a~BLwi!R}`0>&-FE-6GoS{UNA0>)YC zv}B}9ixtFa1&mvJoED-$i(cZi0>(O4U>Bl5ORLA@+bLkcxOKxM8v8JeVGh4Q!-!p6 zSo?Ez;a&N>&-L1iFAl%Rnl!v@-F;|J%~y8$6nzL$LANV_o9ZX-OraDyESd&^(DB7YS8uaZc)=fw1+nSC<8Q@rGoyRC;HtH?Z z-B`LQ8955=>J(MuYXGoVXgC;g*E716B!!8q(bXUh0@6v zkT-Sn>HQdZ)6gxDWX}?4#jp}H<}6L9&GRgC+>+D+@q&|RxvLAkJxv15@PR-^rIGO_ z|MC0pDDS`H`A2$|`7i9%Dx#6AR{u!r`$Bb>w8Giq$k%+uq%9q9s(=VrdzY{Lc+cV;k9R8l zZb7)t?&1TBSZT>#wWgU!No((_Qu1vJs5#^M;+ra^mR;GbHNh?zSY>$x5F|vW!PH(s zS=rd*fT7$UEd$_e%K#+Tg!aLX> z`i#gjf1tZGuy%O)u7S)4=Us*d@SQhPc<&t`k1OI94qdrpdRU%qgyp zS@M}Je)IhK*S1_Ya9ycqz%Q;{I$O!lR#nZWp=O5<&JI63JM#E!@0!_fZJiyybawck zAe*+%w(6YiT{-*q@Jsj{TIBHOXKSUM{igT$*4c7zJLakI&7>awyX_<|{7mGD?X#B& zJ`?`qt+S)Qh9<(Bw$G*^5z>(0>-F1jYPmK&zcgA}n(41p)_x-0I6HD_{r1_~Pf?q6 z=oskmKcWAE@GYX(;!!e><32t+@;epZ#yL!h2g{!|&kR zg^y>NvzHr&zvH#f4&N63Ly@c_w?(copAq_?vI^HJ^_#cPE@o2fKa(ee#mTMYdysm1 zd?1bKD}SeSq<6@FqQo9ls)l*5sT#nA;ojK``TQVO{w7fO)&h@njYKG9QqKuDgHu?z@M-5fory@E?t8W4n1bDyTWyuZ* ztRNU36vVSOFuyb0+TfC!e|l!2jt)pZvoNyjnT2SFkw79dR|K4{tC1zb&n%4Xd}g6y z(6M6wMkYo=HJ8BxA?sd1$O2>kmhlJFZwAu+J$LjDtb1f($a-Zw{^vBqhyR=+d>F1J zlYLYQnHMH}oAPK=j7(n8F6aPcU?(31V|@Qo|DrNvx}{PCN=ilfW?F!(5{ah; zUkAKHK<%Fa(fdXv{0tB|DOS$F-o!raaVw^!&=M$}Ww87yZhxD#|-R z7(i>@0tX=LREk2HacguP9wQ0~`~gD~JZ#}c6dw2kh7M4)iUl>PkrF`X_uv3zzmn`% zaJBiFc1eVPilq~@;Rn!6nXN{!>(I>FTM2TMPF2FgX5s0V2cGEr>WZCP?%X-Db9l?` ziui8?0nrynXJ1)+>+CD1E;wv+irH6&m)Cv_Tq9(&mu|LC&*1N}2(dZpSoDu1w@b<} z;Ha=2YD6I&kDI4NE#813*9T|Td`KA^HUG2D`>bqwkbA)!onH)9GGk`=CIpUZATafuB_E9bhlYnE z=zcY@E?^j5S6H_;qA?oS7BJMskW0@|>g%NXoX~h~g`HOnBla1m?dT;RtQpdHafG_r z>p~qhzm*fem6Po&HC~L!G8<+_=2AT~9oP@(r2C+ z`T6<93BcaK>K~YdO)^#w6)WOtTP<22N>coDC8W?64 zWF90mGGxnw5%byfi5Dt=U*H$D@TwqPmAE-LGUQg%$dH?j9aW5CkbfTYVP#AbLpeCg$*5dJK3K{Xl=wd-v3tDpARYRIj z+vOE;CA5i-65ezed&^BA`&WRRDF&uf06Xr1ea4E*lz>tJNIVI!;|!S3^(xxpj5M01 z%b5m#rN7@_#sj=ce`n3L&1s}r#apCszN41_Qxt_A^T4u_V%I^?I|TGd+dBuaS2VcD zDR(P=(IhlIl76kDFhl3|AhrJ&juKCJqhk29buy_4o z?rRQ*pM7m@&@ffg5A*0p^};obp=yV#0cwY8OaZDLM!B)#s2#3D3IMgk_@#DurYLi` zipLJ_+N4YEfYb)p4)R-V5T*ih;cubT2A31m1_S*NKyARC15_JCbjs8Q-M}QO4RR+p z?7y-9qcvM*+%)SCt{UDs^72hdLZ~3}P%>6Qx_h94WR23Jf*6?x4%dBPdnK~OrlIT+dWJ%$+k=j8+hPn3zt^Wu2i~Rw02V$w@V#j*+_#_ z=`6`I&leD|j|P~_`(;R)ZDApOY!BFd;5@H2u~||RLD`7CI|DTpr(nSptX*oNlnAZl`kqt|qAm`WD-~f9H+$P_{~L_<`D{I~w>`z# zgLrFyP}%Jt{wQBEM@xg;52Q<-2lmnk8RZwd#CsLOZaqnuz3ewe4Hx~c1Lt4FfBtC+ zqaSruDC6B$bwQJdqVob^n^cp;DfQjDS>wY+|DwikU{@2^nv#z2 z$?(dX_{g=94~(W-w3$dQIojY|-@rK}nWXoL1|2QI!>F38$cW9|x=B3oJF#DKPLA!Q z+}=LsEo&Lxt!|A`VI9Q#KxH9HM<7BFHdv|KN!Ad zruUJV@ejxoA84qn{mq8D_y*ki2JdJ?=(UFM(zo`4ufDc)czCEScqg&6=#e8&H-u^i zi5RN+MMH>WVjtORG@OS<-R|WvhEVN$4Wan0xa$bo z-4KesiR_58$6WaHRDd<=99<&X`qXeVHD%T z*Cc)q@dp~>FE_-$-w;3EK&??})LG5khWO_i?C7Oza%+R?K;;j&Q1GxS_I&ts;_n;! z*dU(`e9H}2)YtW(cSU`;E@0l;-tabh(VtSF$ijwbC-DdLFVj1)V*m2t&70Tv(s_8d z)$okrS;M&DO2dspFK>LaVWZ)m240zMi2oIREczl4E{Fdg?zX~tT&st|c~cdoVsss8did2dbS!{F(I)l_R?E-yXjKTU@RaxG4O+)g z(qRPa4z9R!*@ncXL(3|=AAj=EPwY}8tM>4tLCR1X@^*~Trp^r<`z$0_ZAvm)I|E|D z%r*29&+>#&yYju|tyoc~PLyVbM}cjA9QB=c)ihMYeU>o0laX#@#!Tx_+^$dtd`i!R zNyjMRx6}){18^N~Q4lXk$NR8lE*)6%A)aH1>B9_CY7$w|9po8J|LiAA*qvEqKDYN;`(hfkY* zHX_{NASP4Q5)`Dd@uKrCvF|3*8s5LQxn*=&<%a#GW5?DlTXFPmGTAsY98;xk)Y&$o zv43%0##n8hzf#+3PPxx zLsW!VlO6aeE?Tq76*z-%3+Y%DSV>47k9fP)xaw2fZG#CAT_)EpqA_hc5Bop4xk7QRl zCIsW%&mIjIX4;9Y>7=(*32J)OI0*&@)bnhr`B6a61oUh`;{jb6(2b6I`M}>C(8hr7 zp>qG(jkHINcs~-U=TbyxEJuH1iq)}9)$PKQTt2&QQP(q8@mueF?Z~5b^{o%y^}x01 z^g|C`Q0?jx(SF>y5VKDPE=0en*2Cxh1RnT`n2*O=ya6NOn+dD>zbPhsGjGSC8wlB7 zj?LoiFo_HC-z+p`SZ!gr(3;}e3X=`ic!|Aufm@9W+#vGn(gh~nfQkO;g%AgErt$$v z)-J!GOD6V{yudW=xFjjy&1NpJB3)pOgg$a16#L=@T|{LqPT5bRB21dz=Qrj4GSjj& z_hh=189to7KYjBBy(`E`(ihkxt^ER5QWv;|Xy=>{B&()4^6139Er@*o0yns_*BKDm zNanM!KHbC>9O7`As~IystkXr!>O^Vlg+ysPuIBm+iEHs(i|1NA-A&qDd}~0rUkE?n zCySEdzn@~+M3M{b?hDyK)zR(FKR5A~l6;FK-`akGV`s9dzK8GV8MwP<)3=WvUitfX z9lr6_Cy!jHdBc14LKl1EX?xz(g>Ju%?d`Iu3t=`yY2WI|_6wSD1luoopXIk#!|fM1 z0VCIZ!A5x-<@+hSyyZHe#8)-^w1w%wdqcAoTYcvGbfSv3l4Wi{s^uV<$4q7T+fI&b z_}hd96AqfWi%?jU1?1* zX4LZ*0;0wou1pk`<8|dAldVwjl(mkKRwWRyR2b8vv@9g>Y5|sF z;H@ev39fG4WOUudz8CP=<>5tv7y18da32zK{(@+^MMc~#sIcGwNN%Nv{J&J2frcM_B90qWvgFVpX0D}B)iRqhODBbtr|)WPqF)oR2FM1UODnVP53W2&r}8n#6PpN`gfK?;Ma8*DZ*B z8Yqe*F4!Vy6Xa9h@co+_D!X>=dT99rb-#5}sBXqKr4QbraSSid7~_pQCL3E7FjbwbUbK-;HqeQmehu&P zx3+q0H^#d`KZNSdAn4_VLXwxYIPw^tl5wD=ZyxV@l(rEs&mmw~$e~Ey)pjOWmFJu% z-tHLnu4ac+b|oI|uW;PWveJXOo`Kx6Tb5QPwMe&2@-|p$($?mVNw*?ThH_=ZsnjHo z_z^0cJ#i;lbpegTNh+0dmM68sPO`#Ivd5|R2t|@3LQ9Sa>fcJI%ST=7DoQ*OAVdkA zM-MmGOgLX?*8Z~W1U-4)duXyhl&C1H^Nw&EnaxaIQ7>1s6RRYk<1Zap+LA7ZcHZ$o z^jKx!(T{`(uuznojs&C@zRmm&vp!NEY0h+nas^!cbCc&gIBAbPlPC91F{u_x(lxo4 zB|DRz+}o7$;n|5_l)YxlOb$#klU(&o?rqQH*%h}r*`-q@C-?T{7^stbtG-mGWpXb^ zIVb@(ispb;Qs9XbPsy^(UM1zB5w^+i^OxD+@(>&SdNqMIG1iCk@21TkFI-Q!Ieh_c5U8w`|z%p2bSH=BJ&2TZg4f>ZqFCH)kbUP z6vw^d)Wr(7DO$j}Ravfu`uX5qOwUtToORt|cllFZyBe4=GdAMxk=z0CER0v#J=79C zBH-eL%Lfv5_p~tFM273QN(rCTql2g}dh{qHxT96}+x<^0iK=vElySw{31dUT_>x)UX8Pvy=Rc3VZqj|{AMbmbph&NHX@E(_=GQfge- znD&o?az}qLb(M@LaYN7N6KIzK>AJ}u6}0q3Pat1B1CdQ)93sr$aAp|E**zdyh` z1uHh+Eq!}H2@Rf6asZJ|B1|WB|6_NTZ^r%y52$`!`PSzzgnS3`_0WCLJ*%0*p>?ZSE}-|WX4PCBs#&_4B^v*N)vO)R$5ykhu4V~c z%?iDm1$s5B1~jxf6hE<=hvcMp2)*&utlO)Z;Z`#ZX9QqtsG?`x^{b!2HSsg{c+SBD-^Q*f#W?X3paYuBqowLigqx0H_1 z2^%Ag5wkaaBGs`dcY;2MTZMb~L@31{F2^6v=MT5D8<#&3$`FXO072S@o;PuVu6Ba1 z27mknT@8Bf1YPX}T@8McFzafKWZBqA?S;}Mdr=F3DV zl#WT31AI9FrLw*RW^j~@J|o7bD*l1;aUH|Ah?th*V}ozKc7o1*f_BFWwYfS%Iz61O zR9=5&-O@(Yl2uclB&aNF^Aj{S?hNiME{vPk8XbHM zH+sT*C31-9M$S4CcVwlJcQ~-Jm<+FuT*BoeC;V_MOQD;Pi+suGZ=K+%66W<2Y@|MC7&C>Jp9DUVAMy62(JM~TE8r&yo9-3qXk$9mSVF!7`FiL+=$=#b z3TWLadIj{}Q}l{c^a}U|r|1>X$4=2JPSGn)(JM~TD^4--oMO@;jYFsC8K>wOr|21{ z)GL5oiQnHkV}SSZCGF2^6v=MT5D81R%49fF|`+< z6(QOaErs;UuKr0zN$Pz$0p+w&463L!AWntVS^&yXUK(+X^IU^bnsGXh*>+!MouPg;H{YoSXn9+7sB>Eyx2ITuNaD zF$!x_6mJ4(6&Kkc?{T2n0?xrL5*MANfJq>-%C6dV<*wJtLw>(Pb|_@MxP9XG03}PX z4lZrny>Q%xe24}HA`5^f;~s-+7grW^3-W?k2#9Recb0QV~&qO~b?q!#c7!yi(E z3v>Mgx5sXd_0vKzHrMy#CZx2qw}GK5!^y<(D+A&v6J(6x=%rbE%j#`HKH1NZKDD8(Nx#~;q; z50~K&mqJqD50^sPhMsqPoR%M_&EY48I9rTMdep->6go^jfCoVFb2qHdh#9H;rl zX}xh8Z=6~hr~ROzaW3S>X+hFDM7;5Fns1!e8&{J7no~JZ*2bu&)sSR4z?TzbEr!zVfXmmVLgZtD|qvH(P@vBFdrvFq|L#l53H&H*^ z`}eIA3`yK7+`E$eV(SFk`BD87p&WlWpFiBrZd`sMl;IDTBBa0{E=6b?`rgC@<8gxF z2!DKnCV`%tpjjpuiSUzzO--+^X`&_MJCLu3?t|``pqZd`6AVD;y%RLq1kDA%V1ls+ zeQbiJo1p0?Xu1iSZi1$Rh9+pb37U?y4xu+bLDNmpbQ27@2{j*}3ngnlro`LRH-|!! z zCqDHk$WPGXxHGu3xHxVlZsPJ=Ks`3PS?lxhd+OsK7w0?d-pV^oDQY< z!{zwH`TXH_cH{D=LmB>XDMAYT;ZlUQq3=zcriYxShrk~{O%H*dJ53KcO%H*eBw`3~gkq5GhFPSZo6b*Jee(0fnQLr&8};1`^xhd>`YO%FLu4>?T_IZY2aO%H*F zPSZO`=MXyMr|BK1=^dw;l~2<{POFCi|EY)2hEGeD1AI9FCAA&`X5<7Em);>Kpv3f$ z)AW$ju7{lFx{043Pt!xIt1@$x{Zq|Z)#Zsu1zOiQP1nGk!JWm$aVv2fPt!G^&A3L~ z9*p0L+x%Zg_lS+<5n2wn3is}aT?UVYQvBg^{Na55a67wk`H@hDKU|8C0)My^p>61U z6C!AyLj8@rg!pdy73(LpgP*oM*5 z;e6O*Cx~0J()>d0-O@MwLj0@Zg|X*5s;S5R)Wly1PRoPLJ_BKCZc#33d^C#z58=W^boPdDHg)TV&=A%1m92Dv*= z?4JMZ-Kh8*nX2__-#}<=gPXPwMYk{>4bK>!HH;gsG~6ik^2RqCHX80Ri!Qb_c28{M zp}o`vcT;(KZ|x)dj_lh%^5hf8j*Q%QXhZGZUEy1wY%3%=a}_53T^`m{8MclHMC_0g zaD@(IcyW=3I<4@$|CQwJ47l3o_;FXkV)%V)N+hmWNEQo$q>zCr7kIDSlsKE5h};z2 z`egX$oxC011{c4^d(a4HBe&XRsA9*j>CAF+X-a(TPxxUEN_1)*Cj9e~1;no@acHA5 z6_;{0iRJub7f;qU6_QOU&UaS9Itxio-Gr$rlD=uR6p}4fi51EO9Y;oXJidS5{p*jc zY+$RbyF#ID7K?X&IRa2twHK1yySd_7K{z(H+n1zUxpJ^P+g8d9e&fFN$=*V;H)W+z z{qX1?8C*9%?m4b#QER`J&2=h{YKj+OU<=8F%3S-kjsmBVi|urCQ$aB!e=D8HGn9R9 zidT7@$Yg)ZZ(34tYTOyiq2E$Z8DxCX9*xmDV3v8O5+)8*~l~T zgXZHDOr!J{h9WnEw?0V=W1zaDO4&ig6oWbyDPW={vD%6Hs!^S5G^ErWyQfu5e8J1Ze{ zDa&J!b|qOB1RdGM>#j;Tm(@w~lJp8{ZsTc%2+u35=w?st*NjBY@lfN9ko0Q5CTk-5 z!*6gsB#1nt#*@#ybX)kGfnix}+0Yluvz<5UBjtR{Zzm!+36u=LO)FH4UO`UX=X1MLld(^4NaN+&d0B3N>M&{ZFNBl`QVh2QAN z@lydk55IaDa7J)e5ErZzYy{|KaLs~70gb9*EVTONz|4iMmcKgot=kSiHEYqKC!em&D7073$G1m+75#Tk+wH(0e$-1h|-==&s87 zrXsIeB%{A-Z!O?yOU+Hz{2=f#fp9i?nXeEy!9E%L=gN6rhsm|)bZ&9%^(GEA?yj6y z8i@nK*z1mWl%tWE^D-StezELmWqT(ybgXf*RbL#L{)gtK`}}k=zC6*z&Pva!m}FA( z7xh|IlF<`lc{8e2tan#^k(kNWW?u1_ivl*AbS~!<2+!@?iTpYI& zx3NOkfHvbAaeHtH(@L0L!ZZ`6n~*lU1W7+(F%lLh5ybkDT7*{-RJ!xLYEn!#^-+Jx z#4+Bjq>iWRtoD-jTx(Lbc9B-GwP5vfKDO7|leaUHb33!$e&u|tkM{D`a-n0IPZcZY znI@AqpC?_|#eTwh1mCR+&A{?In`2csQL7*ayaSf1a68DGThPb`_~R9B2chRG+zvt~ zk*|X8fUbw`gYMzgFX`9m)imh6dhx2lZ6Evsy&4C7j2GXa(L=b~D?Cr(eYd}=@Hnx; zGiTl!<5@GiYj0`{#WIm#4 zH0;m}CP$RFmB-uURBvso+khn1P|tDm$viJOF|6X1^PRj?M|`z3ygcY$iim(-FksVQGl zTjD(q(3)`P0NN975}-xlc8FUqZlAb4K*_y6M@jWBvuoR{gPVxB{elRC>o9Zl3M#EHTO%j zH}PKq)Z&(g7>gM6fi|B@wF!)U+2mI)@8+79Zc@3tQ{NjVTh+`} zp<2q#js$D&rfM#aPAXb-GO<5#U~cVd$F?$kC8vC~mx#65Ec#%YpUr}qO>>-Pcbm~$ z={^5mdlPH?)FdB>D(BmEemE<@lpt|HbN#xe?lc{lX^}$xm*wWa1_euxzAlaZ!agn) zF<<@6$U=Q>8vBK~%K3b7P9WAc~b8{I?bZ6hUZReLIim;fgFE0pFdoNz>fh1 z6#O;ZPCl4QK@4&*s4}m}2{O2pB(aZKKAm)zg+qc4ldU$n~+f_qtl%ah=o{SD7XyYL)df zYXYHH_eaz0ylHm+G<)qddmZLKWI52s99LGiacpbWa%;`-7{z;CtsH)$pcW55p{&DC zWauZ}OzYh<)xW)&>fzo@>)SK?j90(*W~$$MGu3Oond-CN%ogLDQUhC-S1NZtdQ;u| zR8Py+uD#iGD6uz_j`8(+Z>GA*6;SJg-b`5l(#J$smDhk?9c2}voFdmb@-dP4o*-0= zg~#_~blaq;R5xhxc%%zk`!3L(%78q!li+c{H|`(h{e|G?0H3dek&W{I z42VAqEXw7`o(#ohJGJgUppvq^(Q|XuhyYHfGemc#I?qtu7KX+vMIUim{wUVl! zDrcznA926H{roN~ql%~!s(@BLD_)4wS>Zyp=kBti@R1V?1yMPWkfBP!tJ8O>AZ!rD z>-%?w-c$)9mkwOD+xsKj&#u}X z`5Wks(4XQ)aJS*V7y2Uf+tAWgyJIup&tA1V`U-RmcLaA2?yqngaGRA{^vA}L7JgFj zj!7E+oN`F~yEy5_|DKR73Xg@6y=^*gNB(AjFMN4jdthL>2IJWASIC=CQUe`7&*^&e z0_i-Z5`J42qBBnxLLFiyJT`)|5*uRQfm#~gJS(-O15c?zmD^JE<`GA=Gfydd^Q`oi zI^in)JS+b^tATlz`aG44Cx9jP5o_ly_2NZ7x_xUP%5aZ`*0jz=f~p!duykOc<=Cs= z`s;yLUR}zXxs*nT;(l93foiPZmbvg$0H|jAZRx~bGCx&-6jMQ}0Hg|H{a5X-xlVMv^s}jrKhQ#!bCvI0vqTA4{1|UnUbQdwh|C4CH1-H#u}8?EFDxr9}g9Aib2=EhR=P_819UgY(l=hGbHfdg9B3sS1>G6)D&z`sgO6 zl}$A2x3Aj7FxzBJD%=2nBeIdJHZc{T$JB6}@%I{{kNh6PzcM6-H0Us!7?Olrnx(5Y z(Tg|HQ8v-XH+e_WEt|1=^YZMl=)eFnQlZapvOIsF9I;13Uv@ae5_qM#nUN!2aWrLt zfsIxMo4{llF~DO=lj%(S7FP_?~9cbbI(qqTIeaslm1JY56wJO|KKTvhhFqj?vahk7lSxR{p(cdFFXGS)K^KrR3GislPW+ap= z$Pt>CLnDs{U)TUr$;+XV??qDk6uOp*lHuJ;WLcMzC1q)Gu{mc(yBvwNqsO;9fD&t$ zBeC{g-b8VhX~|Fbq+909DE6UN>?2aKk63ukuXx2i17a50^kUA8ZaEaY(c}9?Kw8~$ z=F>OG z+1k*Y8C_^~k!%-n_}Bf&b*@QkHkpW7tNsiQ-WKEA8Dm|$19VZ1n1%pNp z8a>h|$)Qm~qxLXVX{g_n&?rfxSq_b6G@6x0vs9YpP-&j7EbDhI{j}&}v=$R(F;NyP z%3?)XEJu{Zs;tFIda;TMXRN8HvO4f~5W9mw9!dcU>X0LLha!5DFlBUV2E>?B*|f^2 z)pSTW-PJlvt|jNp2tP;U@N+~AKS%0#&99i@=M2ap{G76AlS8o$J-#6Wq}3*ePMgZm zW_l_^?FIQvK_zHY1+=LK+EfL2&2QnVg|@{7^(s}EdYGt$mYmgzc#GoFT&zSD+Dnqk z&q6KCLNKUT26;a>K*{?#T6oQ`e7&C=psCTO!KSfhL#wLfLJJSm@IS=TKiR-=?rvNo|MQHzw(Da*pN94cv4c%K_k=4m-p z(kf$QJ&fs?lGAc(re}(1R!t1XY44mFi-^65#21nHA|<{^i7%2P@kL6!RSuO_RQQe( zkV>l@Ds1!tn4W4G&itmLispwJf#O>NE(Mo_rD2IET84XsOp9E954$GxP^>z}lj^Yc zD^%PV?j;=Q#Sbb#nR@3KPg04!UyL%ZeFufFLr+R#)w5vE3?9#+v`vhZ*j^mtKmo<* zG{q>>TuC!w?HgjG$gW@L#M9!?D={6q>DVTgjNe(?k5nC`q#jaI7r|?O#Vo0pl+;ZC zq%CqNwxGv{m1Olz7<5`tX_2*^a%gm-!TS)5%{~}3I?>=+2$QyYO}n54m>rF-xPn*I z=FE1;2ab!1^PeqM@4T8jLvzC4er--~p*vmdOfd_E-|?=R<6S?;J37bv$sA5pYeAZX z4Zr%@9No^%;qD)I78l2@#BH3zJwLP=*NEFQhu4$lTReY)47(E1+y`rW4MI~Vq`gqVmghv zQ(k_1E3u1JwR%M}8$+p$AuzWwRM;5uH->sP^5AnLH}O#9tpv0Z(n?S(VOiu^{G63~mh zjJ%AzjJ%AzjJ%Azy=No4D`o^aK|$aP6wuzo)8K6B+HAI*Ee~#bxV3-QWzo5_F3Zfi zES1X5x=eq#0)M#j+;Bv6C`0%V@|4tOUDk^?{3qdvXfI;pp}mUhJn|9KJkVYx<2*3l zi1~X-%4AAX={gS#3i8V$+0aW6-@(ngY*lzed1ueqvR=)QnTJaLj)%T1>GuWyuJcP^ zysrj~ohBlIq--i{c-!s8Qg@*(Q^dzXj9~b-uRIHhS^69`LTz+V4Y&d>h3ll8Oh8!E zX-+^fI;(YL%n%bPwUkvLwxbVJYswf#rz@i|z_d44zO{VoUk+NKGCLj`-K@All^Fv} zdzE*!@{9qexUWNlFBnSsO-jZAIQ~<^%+_Rm);b8_A#qR7+q?TeSqn06_^H*H*20V*rv>`5ecETh*J& z{%LO4gVRO3mD;-C0!6(MVVg3aby@tgu^+M1S-|9~Orgr8V45isSIHRDY)(Ml&DA0q z(@gd#0o5WI(_Af*F+flx$2C`rWDF1#$#GnVJFATWroE|qmtIu9dk1|L%V{*j?k-_= zJ7#mcMuzR?1f<_yHQShW(nJZUnr#frp3B-8VA`u>s%9Gl1ZF#qb+a1R(yf|}9KSc> zXnUYTYS@|>!SC&EXQVyI?6LQ{pl8mytbdp^jGKVn?3bxL+odyg30G<*T_FlP5AtKs7gG^3^(n(TKYB`sv{% z-D({=E?=!fV}PIz9mgHIv)mY9+N*qnuvwS=47P~PTnb`p(VT$9idCD9DH07OplY)* z#j4H507TZ9a$K=$qcK2Wqqy>|_m*x-tQ#IGwX|e!U4QHCN;sz?02WELfX2yRIQYb~L8pBDA?D0a2KA#Li%~5j*XPcEk-#K$QyPeS zCvqs86A+m0stsgJxoWgA0Fl)|jw@FUHU!P z=mZhund%pg>vlnqpW#|alG_eaYB47ul@=x0LTti-sU#fNLQ5m=z_dgvduvNN+p<(= zX{=q_|LjtF$5&jKC_m?tq%<8D=OTocfk{15M=E1z-|P$$)ihe-7$`&H)qanvpwl4( z)UGAfOG(XHDq}+AD^doL3LR1EUvZgyC2Lx>Pn!r!crU$&?chl-OaJFF6j9jprc@l@ zkhDAcm!^BxvZJG93LvE}8QVoF=mKP%Hv{v^6JDn9hFegpSfiFw`xqtEDB-+Qq|7>6 z9T*bKE2^M(oYJ0`4=3<;YqWX0ZM=EAHQK!08flR~Q>{Cy8kS$RDyljaEoFOpTC(@u zH+(;>V*i-LWG`SyYFG}|ye>{9%TpyaYhBu+N?X*mn>gj-Bo-5i@{>gE=t6Y0+QX|L ze0h(-GpG)4203~&Wd97Z@n&eefhN^>f;YKV9*-whh9=iCqrqmQWDMP|ZEY+J#-KG>0Nw4i9xpLnCQOBMmTAutw8R zuYzUikW2>*6)eMtvKDSL7`(#t#LB4-drcazgb=^J074J~1G+L$6Dh(d8 z2&K|06<(`0ltPO{YVea{)u8sf&@Jw1uZlA#ECa__1-V$)tYa?+;BX5YALpix=L_}&L0aa>vl^nOphBB&Td4f*OY4IG&&uCca zUw=>k9VqrGPM@OmNtMTxLh1J@O?8=d0$$=ZlxEFmjdkTQ-S~4ziqg=9&X6vEhH9CL zhI9cmq)VNl_%{GeQbU@yhHC#Fu&HZEa~3SX-Mpc+;%Q?oliD{J$i0;EYmLkszoX@I5Pw1T2>V?O4Os(G-Wg_M=m!@&V z%*8@C7lY12Du^_>L5L-UxEa;Pbikrs<)&Gp1wy3|ut9V!Zj!9hLdJx*b6usfWy@U; zukG7^pbJ^D^wXohxObc|TW#wDA3R2eim5c90HU--7UckaW@L?N=1y?903lcP&FO?}a)n|g zfT%6SH3PJnZ-tyM*#^^!t63%{!{(Aon^e|Pah{dwuO4~vTVsb;M)UdTYhPVCi+>M2 zbnIIwCHkal6_hI`*}EZ`!>{r!;xLSk7F2+|Z^yc^$r6@Mnn#m`XWU-JpiL9;is{cM5N1~8;O?#=# zTj=gcxLmfJjO=Q`etexK!C^;_CCmW)_6lPdgu_T+6`Fvm0fiHe_J02Oq z{w3W>d6dJHC!g}$P|I_YX)<4E?M~*=Nc0JLca`)U1G1|pis@9QH7UC|nv^f6I+AR# zOeqTkPi)zBEIn}Rkt3P%F5Aiu9Y(357lb^DP?d>GLYQ;62&GvSYvdia1Y%?N>Km+Mwq%J`5vXHj_< z)U{)CW7ikdLSk}NCZljkQZ_WiFiP~9a`1AyeGKV+ip@_y#lEMXV$ai0F%W=Ofp;8W zd(%%*Q2-l}eu@{-0Jbyz6s-ZU8|kNbd=0QU>8F^y0QM^V6fe&TNRJm{U53i^V5f5Q z=9A%kGMrC_^OfOzWjJ3M&R2%>mEnA4IA0mgSBCSI;e2H{pWzFT>wM)pU%Ad#uJe^E zK%#u$-R`G53+co?s@=pr%foD~Arth^iXHOyV#Z#~*ozr^WyW5au~%m7l^J_w#$K7R zS7z*$8GB{MUYW61X6%(2du7I6nXy-9?3EdSU#;lQYKAwE;SFSX0~y|+3~x||Hz>m! zl;I7^@CIdggEG898Q!1_Z%~FeD6boo*A2?+24w?~jXiD`@MBuKbd;2j@)9<*fXf0b zcSq@@NY6pxYfm2Kqqw8whinH_vhShT0aHR4se{7D4&E4A zJvi^s(NM7n!wb%>Yx#O}?b;1>i2=Q?=pUtO(8P76j)q6B=t^}t;(R&tmWB)sR#zc-k>w^8GSJZr!yoITwdfOYw$qCAG0@Qfz zTcGel1}Jm%i@)pG(VZK9cf-dw#E%?XcXa2mFNX-pDOfILpo9tqdHtjHd@h;dMZKf- z-5E}gp-lBkoC>w8O zbMmmMy7sy~TA#`mX+Rx^Q!cb6d7X|Q_>b~2e|Ij`ezd;1z?thxiD``uT`eXHwC^9MkR>%8u)#Z7hsyrhxIQTAh5P=48<;r(;B~oQe~@aw--q@#bH5-G=Kb z!$VQo*@%6EiKhl5`v<-22X*Z@=>23c+!t9m=xvLfjLseOPV$xr-*%_E%gN}9L2og7 z-eS}2V4H(hf3Rl!kK?};J`>Qh0gVq{;eBF|qT|ls&f?;@mAH+A6dl@(YsBs05OY~m z{g)XGr3OP_ZZK3B4EZ1)AD6*(5SC~gWK0e+O$;)i2AM|&nJ5MsV1talL5AWW<9Lt( z4u5Vivn(*Ex0zEH!;W$a)W%PGsp($P~N&@>u>isVPIfwrN3v8gZK!@VoU7( z#MlPKdRv?pM5Hk(x9e4ibAULwh z`Mn*EM%D*(kE2#M#H(gu^@cyPAkZ6k)M^2_RS-0lMZQfryVe)yYD!#W1EGt>nLWmt zMaD&ERYT1>r?+qallvce>N0k0;w>Q^+ud1JvJ6Zqa&J`@ky$QT>tDuB=U=rloCs<- zoS0UNJVsnh)0Jc1`i#ZaU4DT_z9~6Dub@*R_U*8rH$is&&<}J13upHZ?mnf&#veSI z6BOkwrpg0%_`S<}m-qMUM!(1BsS8ix6~v|-!d`jG1`@E_2dUW8w(l7MxM%g`(Os`S zfy1MlUVEOc5Z%Cb+YN8GVgiOHbz$7sdf($2llMJ-mh`@-pAx>OU&6kp-<7`CA>gM5 zfcxFpL;SW7xgJ!D?XmE~(UFI!pn8w|iuh-~Ka;I#TQ-hTPfqf`6k zhnIV}r`Ib!VBm3&k!y*_6A}w{k2HB$%Fhpaaw=2xED$o(YES8ECW41xy01s3V(=JA zPZ4;xi6`@!R1aT9*G%Lu;(1KYkHI=tBw0MBt3)1G1pn5C!ngDkdNcXCJR{-}U3d5v zCQpEE3~yoT1=xoG?{R=_4B^fJY-4x}AG`tVnF8|RI*N5b+PHh+xaYiuNn8O70JbND zdkkQU3&jJv1$-!f3llfMxBjS%1MEzA3s+SD-y>QE+P-XnCyj2qla+OGc8<7(>kvUs z;0xG-LP8U8_aNTM#p2hennGo)PdUT4=!|fo3ds4!tAr{ICuSO|0K9Kus8}4r336%E z#wX2Og}}GzJEVY5Jf6E*?}qTNy3n9OUkKmA0Rq6Q04t~9jNq&wE?6np2=IwNT(h82 zu;=0EY^)S6HuR2*j~)^hO;R>28Aj?P&l}(ABnu>IH@?NN$1rQym7-o5jcY@d3SX7- zPukvuFnYVN!}JVt@@F!JS)nf|6b#Mp!o;S0<5L!gueT)QJ1R%6b>uonKE|r>pZAQd`cpf?q| zWGZw?K1F5n;bJKC%C3=-kzM;9`EoTPgDOaZks5Fe%^U!_$hQ&p`pg+NfK#`UWq?7vY> zw#xZcrZB}!Y5AG*&Yi=<`|0s^X|gT!NDx2gltZg}Ob}YdIJX*F)e+G4fVKrR9ndDD z?CBY7txbS0$OsAo+G~_O74q8u zWi-Mm3ekEVruSAVy^lYkZGm7OeviqIIbVdS*nCX>&?Qc~gcr)qYoV+NQfUq%oU;(; zV=8lc5_*$BH#^TrB!(s6Jqj!DlB$=dtS&rIV?j}{^D(>OGleFn#Tui;A`6VLzDU^# zmjpfwWBpM`Idw;Wf2E@1fqN<&7BoD--ene(HB7b%SUBF(3I-V4Yf-sAKtfLAMqIh5 zW_&RvVM>G9?%I)inpd=4CTa)ZE_3(Rc6QRKK z_4-w+bI-uKhn5fTd^vPG33Bd6q4e;3 z^Z-HR8J=92&%ANI(U*^$|1VwN0v}a%=bv-$omZP!8mp@fTP7+#p;DgK6=af`WQI&; z#+giBVofW|Oq8~n76{YCnq7qid5J}k6s2N745?y75yNT#A3$Bp)(^Cc?Yd$MwOYIW z`&jFv`~QA_=iUkacmIL=d!2KB=XuYZdmg{T{*aKF2egr<36TwefPcN<7DDmKoWVhy znS)El{aGXUT~o{u2%KsbI2F)4jX%m7Pq*<5;ub8c*Mnd>iLu-o!bXT+z`R2sw&~B6 z#@CI*Xsex_=s;z#!=b~Ut##<>GnZjWDMK3iW5pY)XqU6~)M~yDwH- z>FWBcaY1D;65x&Fun}lHUFlj*H>Bt>bdomD*pcZ%ACM6=FilZ z*H?0jD_Wywtn8Y~9D$WZh^$dxd46e0lYe~=iv8?AijP6Q-_!fO?|rXt=ov(Ktz=;9xav@KsBY`H6b#?TYPaIDfm9&8wFGl` zb9od95&>rWC=et9Zwri&LgH=l{)r**K1DG|hQy)78*ChkW?Nxgvx3CE4griGnKx}k zm(2$Tt?1u)c`Ld&K6pV%%#+8@g(0~91@V3p7vMzdNO(>$ytmnNDiBQsFqak!+#L=F zH;e{@5nRi)cXadqEAG)A=+XBYX4^2KxD~@Z2B&eZt>_u)gUb|K(dTW&Fb?tjOI-lH z_g4IH@WDOq2!~?BZ^%}BUTj6b!0c|t=fzh18u(NAptcDHPvHrp%#&yjSS#9!e&9X; z)QpDkpNxP^rRyjm^5eeu2GDltl91xPg#DE+-!YN}cnL1*rZp1~o|{=Cg;hH?cdoJx zduJDUek3m9({r<}D?#(qkPD_3oUW<$6Clq&ae20K!o9&y0*69ieG5dWMo?o&P%Az# zDGj7r(2_{vEynW(p11J0@hrk~$5y<#z}0x>;(2gu0&~Ig!5f(sq| zp$>z_JaR-tEVkID&hE?lR{Q#Q96WgcDEvE+U(QB66%JjbJ26 z8ap@^6os}|p41fHk>d@-ZDb+HDskC;C`=+03x*OT`0QlVeMjB{Haa{=N`u!bNorJd?EE%VGtTK^O&Qz$w*!L zIGQdB1ST$RvIVb>76!03iQ3f+?jf|vdbW5i45g#2cV-&9n?vE_SlokZ@AF6|5mRUU z4k<*E(Wvpff#)qeZajwW@1=K+LAX-p`o5)#Efp`>0&%tB1S%nN{f zF96DV0xN6+%X$La2Y`@`fpi1rZe%v^nS~|Z!&@S7Lp>oi)PV*t0WE8S%z6P*iF|Vd z!Y98*2n41{!U`r}NKJThTpd9@L6jgykRV7BG>8mVxhBJ)D0uvk)v5{O(4zvHJ~d&d z?_ZK}gxraKr2#u#uqapWcVyL}9gQ_N&OYk7F4NIsfh5JZ#ncr8#P@&Nq8oi~FTcbr9-66<`YOY$Nxf(r4 zgenjeXwaxukf%;_5)NJheSO%J*V&7Yy57zMcW?3TFVP+Xw#UFco3FQVli+AXP$Z>M zSff~r#z{fZw4k&kt|T4QTr8%s_8JWd7fNUp)Sg1xPbg_Cii>N!L1!girP)S}ByP_h zEN{c|v*gkj@`)bbLk^WhuPVjC6R6ntA~Y6F#oNyMG{(Q0%xSv&($0guhtM~8dVBkN z`<5;1xD4~Is~STf?&@`LcOQa?=^Z?5bZb7 z(Y!-T-w^~OM-qkVe zg+vq*b4a9U5sBC}K@jadk)=H(V#g%Xme2P0-Gh29_3if^8S?ye*^2v*?=-)S5f*+e zLGWi;esoEvX_8XJ)$;od8GaxAu3yrp@gt4B%^Fu>6!}YBlQtD!2v{FNe+n7|dNL3= z=nK=zAh8tz7qd5FrU|rV`qJiMx?5(T7WP^j#jvef=?KRt_p1(Vn(K84igToiSkzbNdIUQUr(AfQ|Yc0hmBpk1`6oNODzI>#!clC8mFTQYiOYOmzo0i1ZLuMg;yjm?J`Az8uZLsc2ptYJM__cX-mk*yTi|Vk zonfd^M`-*$jct|KLIUIEb|fA@Jzq$KsZetbI1N^S;s+YOwWJ2sa=4*v1&_-y(T&;k z!X@eD3zx3U^ubKXOC9Hzl$Heqg!p9}2$431s)(uyf&|_rgyKWR1w|;BM^HzAk@z0s zdV*6i!Z?A7$D!8vdku>meIqugqQ^A7;5;fl@qns?Oo*l=;W2>?B*p9BYv_FSx3XiB zSdP`Fn~v1L0!_mL?q~`&G|Xk7MuH}SR)SD#`1}&%Z&7T_;GS13+a)6SZH0cXx_&6|h08#Pq6Q$j}aKZ5el)CBHB_!TT0sH z13=vX9@7YEC_RP*lp^v{L|p340w7X&Ba{*+#}WrdDoPP!DPk-&{+{p{L%~K%scQm& zMR2`w8qmyOV?<}?65K<+#kV=VsIRk^ZmxR(U0*t1)s&V*1SSQh1vXw!xivu$%9tDq zR?s~feF?RgX7j4@m#I@NR6^8d8q1!68~5;uwnPvQUgo z=k8iS>|!m)q)?;|8#$BlbZz+jQv6)$wrZrM$s#VaTo?w_QnkB$kG-4uArjP}jsx4A=MG9|_?TQ@Q6tZYjh}Wi&Nt;5>Y|8tFLp2s<%|#0N zwf%imuJ%cy)ME~X0#<)$Hzp&cA&|Cne2hMY)>L$G#~m*ymZIl)kw!vnDGJD@+_wpZ zrg2Q#ZZ)WCf4Q|0*t-OPF+|c!ng_S9>ThgrUDDjJb0Nevc#P}kp`bxrF0C1_R4MBN z$SMKg`vssS@M(U>m<$Pul5uN_S*I_&ex)c`VUCdqyT1exia0Ma5OyHGp{2$U-wA2a zC};8E-VF&_k*zW9HyFL~JOE!)+>UK)=PT37r>GK?`i*U-N;SH(XPDi9(chzo2ato9q% z5F2*~iM&e)Yb)7v0%ApfQB~yb8M#mtD83 z@wf#WD}S3u@bvYKjF39Nj1ewhuOkM=nAnedw83kvdAgIg-7y=j1tc)k^q0HtYggTn z6-f-HZ9N)mp25g76yICXV-q4VuF*b#h5xG^0vH2NwA{_@>gVuMB#TnUZcDqhUkMA5 z6IQ)KWe|{%a7eXrwU%iTLtQ)7g1J!Iw^MkmA(1tc2YUOI)y&5*iem6#h_9$OIgu6=|$_CXPHa ziOx#gHP$@c$=mKa3GCIRm4T2XXpHYGIJ}|@rAQW~EOA@PLI>%zoFZA2G9R{-EeAhT zk~NKmmz07BhKXG}sDsJA)myvA?{j@}-vF53o zJXHgme02XJ(-em>Ck40W;W8oz{3<~zchA)LT%@Fhfu0y4|3;v zDFuO<=|npk#Q(vO!9g^tK{$P+^Cd|7`9EmQ3Yh#~b%of3En!KD0bwWIp~M$tOsST!$~5JKbyBLOOsPZJKAl+Yl)}~Y=$b2% z;*(N4B25;hPE-{CVT}wcP>$@;(6vdoW^1xzr#}{ymOAl6Dg}XlKtvkrjzeN#w*f*V zRKYhKGEEkxOrkAiVc4XjfS%9R!4%nEm?BL&(K{*PD@nB)V;0O#p~k{fsrDe+>65B8 z#w+@3cSAG^flX*hqb+De%<$=2sYAF{@|VMT+QECUE#&4cgR#Iljq|u|(LCkajJLw>4r^A*q9k$dV?XuER z$s|aWewk=yS=nLTHffiYEoJI-SX6}L*HPn?6~4u#K8Yob>9I6|bkeA$k}xfGNGFXV z)yhR9;uDR6of3pdYb;77VU9*e>Zp-4vfbIwi1)Lf{Va9A~C zR3jQS>AZ%HMn~$X(V3I3!s!=u@%^q`^T138+NSWt!l!K>SooC9153Jh+u?zQf7a%K zzd3SH{{C0QGRGth@;uHa(NQ?$ciPP_jB&Y7ad6x5DYvsYl!AG-o90j!I;L4HH;{Kj>+Lr5r}t|EGVI)sx8+s)3CcGJ1gKJ7exy7Ky-*}bZ-@6KtgGHHvt%B0m5 z$+4X3M(X5LhbY#b1Uj1=%cbXeF^#!7IUS3MwTH&x zy2Ngdn=~#a9__|%p2ajnBz?zY1DGNO<-S?W-trWffmp=H+fXoipy zev?qrk6!@EHGc`1Yha{8cLSQVdjiL|P9vg_fPxWFu#QqX3*9{tA}Wpo`bPSY%@wzl z=teY!1-#oXaP+lO;OIu|sNqDCQMIZ4p0FI*Zc(AwKEvG?ofRkXhFwO0zu}92}UwbVp{= z^x*Wy_NSVEWc{~yzWa6WeCxlBPu#B?pBNv)!o*}!o*swt9d;;dm+{F?;}f6H_ypG? zC;i4J)>`v@4Y5aMvh{89*0W^znB|OpYXh;W&mt62R$yIp`J6Yr3Gb4r1$%hO7Hg* z@r2*w;nE=1AdygU^=%ec-)8Gao*M7D#wXr}dYV}A4|p5Q>E?t1>u}CGtQJ^@PvJkv zQ=~n2ArJ5MshcGIYQLsn!kHWWV8!`J4hN?^OU>=RjWsK0ErtQ!>iUUyZ3v}<6BD}{ zA|cmA*FXD=KniEqBB}at3rt|g{N~1|&7Ju+_2Y8SlUa>&JbHDQhp_Ge20`nIBkCC==iAi5!`JG(bBs@-UkG}-RxB#@ z9jYneGGMyV3|rs!{y30GL=g>VdR?RLb*@q4TGy!S3MpQAF4FrDCOcmMAblD4fg8PV5?3I zcIwnXX$_RtKxqw>)<9_ul-59L4V2bUX$_UuK&dRm1q6l!#sxMDbom5@1=b0S3al3x z6POT~64-F((52ZsTQkkqUAlSW3y%z4(b*nKC*zZi7YD3s2C8AA2Zn~?{(%~{wXnYi zi>%fDfhZPR{R3>f{(%_YTreoM5Ek19ti=OO@tA)=9m92`H^7$4fZX;vK&}3Px=6x5 z(2%ah0-b-r_m2axQv=H`us;)vwTKS*sZC?|LM-AR2&8Gn2pYo4mPQ=(_7C7{9>}AS z#w2dmjANrj9P1(jDa4JmASg~w)#7c!_E>CZ_ai9{>116b6~?XLOk)i8(6Ni){Q<-o zXz&jt)7TUPeb~(4AE=5nRL5~0?f~{_$X~iI9c)^%FpUL->$rdbpu!z^-RmpWZ8lzg z`&ScQvr>0aIGtH|JORhtG83>ji(vgN1|1kE;!&E^Cw^ue` zRm6H~O=ZvROlvqN-ZSdDauqPvR`xhV{`U1qGnS|-)#g23%Tnp8d7>LlnScF7>A^nr$p?ZYA8njiGlHZ|To<4IV3 zZ(58q3#m}D5vKzDN6ZTbpfeakBSMdtid08KP|*q_5pW%dhwIVuO9fGV>oNSaTP1a& zD0WhzLF0BV99V##%gd_hUL8s`z{U@+l^(vlG<10x{?{BUtGT?a{_@hex_Y<@djp4S zBlUQXhGE!?oa-|0_3Fa18(_gdg=?PKfPeaKX(9q(oJLv%BqGbXN$|<*%o{gmv)S7+ zoyawQP_On59{cVsWwiv+Qv{@1e){g*vLHaI0OV+HDfG=PotH?)X(Fn{Z=Q3^`mDEM zAW(;2loaY7W=M{e)>L%;#e2Q+ou^NyuyBcs;z`DMr9BDn2|~|Kc1-~*65F(sZX2FsyAf+BP`(A2Z)!dhd)Gj3h)+z zn_v;a9RNm@R5ig|f(HT2YXPj;1hW8)-vXTe1VFVAo*peF{f5A|1iA$-5_kup5f!>x z;9P+Z5}MZ%TC)YtA~b$Wc={7U=oe;mW9G)u?Bcm4#zhx5z=TgIK`-WnYJX6CjRN8l z1;A6MKf!OvDW>1(!qJV8|7&#n25eD9TWp&CPOBS6QBu}g*Nw-p1r(3Fr}t=YrgyM6 zpY1iu)cD@NsWW&E^s1{gIMds6d-(T^Hu% zy+>~^H~+(|(CC&y&1rNCBdImkf-`E(3boqUVflKE>4K1%+ts@&Y(#p1jlIgs5J%AG z#!tc%q3-Cc^PLXwsC0bhEjF&`>w|sH=H&~gO{>A)>|k2=kk15pBgGQYh0@dAd$hYZ)7?8rLf2aN?N-j00cYjB8_F?ocE}ErNMd(Rd}Qd~eXTBU za}FC13fi2*O78AOHI}M{gDjGxz56oA{O#o(Y*QV`F(LSz2mmPo>ajlD(VHR2X9?#; zOK030ZhT~8<{zd9BeSAYr*vT$sjs!=WwzEg%ml2m;U@;0?NSXXOzi*~e=tVkHO3|0WyGZfrr$HiJC6R{$4HfGfr(?K=-%Qr>=wcO_ z7_~IST}!gom6y~GEU3C@dT>K#Q*!3$z>E&-O5itV96^Y@b(jyyc zR*a732JTukK67w1xZ>cJOBMwWHQ}2J+pEm0j7!wh=DF%=ca`!o!8ZO#xQ|>rQ2I}i zHVei_)?YNu%^37+V{kYY3W-{vo~D6Bas5(5-2IFJh)Y_WuN(6x81g6MfofcC?>Eb$ z_{nQXHUw=sC?Ax}IK>>NCBU}SYLJrbNYBb1spg)JU~E{x_w zBva{X+?EzJ`tjm^jxX;=LIRQNXT35%#^)T$EWn3)z{w{3ISj?+y4WbR$@+A(A%b4Q zdswLA7o_*F^^-O>o;GUT9P4a4uY>U3hZ>x$V{Y_@ydY<32-PpA#E*%QoW zmoVFX^KA3Ob*?McS+VKUf*Wi4Huh(7qX&bN-kCmY$~yPDb?}}w-^iP9DBQb(olI|F zSjrFPc&t8-ol9sV=@4FAyrgPDhp{(nHs^qHz`CsBjUnR_dLkwfEJdmu;swsV$>R|hQY5WRp>g+psaIlYkC;Kgp5dyriln{N`RcZsu zz(QAJKJH#u6>q{YGH;nnir*qbfoR!HLOLNvw68KaL zqZI0tw_SQmwAgavFq4hdk=L_nz1NxRx`#Sck`ELwPoRrOU`OiWX2km-CN1~ZOiSK9f@`A z&@VQV>d?k}Me6+`b&0NS#><~JR=FWEr_#q za8M(#)&I+?K&`(XCx!7<6^QG~%#g5_17US_#Ymy6Us8`nuypXXMHcKkMq74jUAwj9iUDb zmkhMQ`c$&o^NkdO#gpa*`g<~zv~IUP*BA*k;lx0|uU1$;R}aYhfd1b?Z=42nNDL0@4DU-7k;yQkRZl8~-%iY~c;}NbKX3k0V?CPD zv~iKf^WqJfPz#Aqy6+Zbi&Hp46vk%U7y1S}`@VPgs%3+Z?p^J@yKA(AzN!M%b&e0M z#N4T|b&kgJ3n*z&c!MeH@f{?}Xk>oAr&s&T2*xxES@gz_py`70oYo04Kd;7n?$Tb( zjN`{nx|?b0>eY`Aswes#&#o8)b}*0)<`rMB7xBnCM1*yQ=$3@td*mQ=?ox2 zmpCoNH=e%Lc-KVj*C=x+OA6HWL%JJJh_Am_bwvP{pH2M_3 z=TF1INb1u9?>6|TPsCF)4D=6ox0S0wtTN!!1l`fFs|j;-{$ZGnuJ;em-e&AAkHyh7 z%vKfVRBQMoIGhG*V!zRgcQWoDu8+qsC=77waX8$Pc5MOe{*!M;h1#axO2un%l}|D>+56*R78e#^{@J`((u6TqV}#jq zZ^0Kp15hG~UJS>!Z~;clKa9yM40;_dZ?ul68v)Jv0K3(VSrGu5^RwBd%ldMCq0!NX z;2uo73_=PYTtFBx=MW(f;BIpcR}&y3LO3nU<2U@rR8G9YRVoP(*#pwWGL!rS05XhHs4F0 z^l*s3bAGmGbG}(=Ty0`l{EVI< zpN8H4gjL9RO^qLvl>q1WQ-pH-cu*kSQm=yNqiqy4dg%ep0%Sg|r%sYr~7Tj88aAB*Yb6#wSQO4}65G zu!2wr1NrFbQM92ZWCWVEo1fj|*-?t1Hh18G((&J%c$P-jJawaM9_;Z(lCF9Du7IFa zW7yqq@K>d)>!KmoJbbqJxu80)3YCM^FdUw)L*~(BaXp5;C^;`s&w-O)J>Y3~%`-p5 zQUrbmlYT3Qf*M9FSk%HS3>wINSjE6-*YRMSfNRb=hpU}u|INm+emrmCapPHp=Z@JE zj3{t5p1F7)oQ<1#XJ2W}#xo1_ukoA#9mewo(7W)wi{}rsaeMEr(1+(OJZ?OT@Z51L z^Z{4nnTzMaTcPh(=)*IMX~||e5q$7q=b`S)(rLK<8qXQHhVgs>^e#N_;`zg^h+)EM zTF9P&BfmuZG}Ee4iy)&*bAQz6aV@-w2$u&7@=O+FJt_!SRKYAFezSzD!HYb`xo#Gr z?|wmgA)@HjyGo>3lR#r}!W%!P@K#K$2MDSNstI_n?x+4J3=z~2)DnaVA_VgY>Ik9) z^#n13I6;Eo27)9(ilBiYP0&ctM9@soLeM%J8({+kRRq-pL4puL4M8nIm>@y`ySAVH zqp*%3N>EP_BZw0u2yP%q5~K(k2+{0;C9KLVy&Z zObF%?AO$EB0;B+CLVy&YOeD3Tu{2YN6rqd=kRp^30aAoAB0!2zMg&L^%LsNV_Oh)L z%mOffO>l;6nBWU!cM-fx@CN|n!W@FVLGTuVn_v;a9RNm@R5ig|f(LU5cI{qJZ(^W> zzy3${zm=n->e^Lj)PDf>3p``&BQ5X@rf5&tZVn|f?T4<~pRvjDSESC6vUNEW#~G>( z>ecu&?OD?+RDXNcSSQijj`U?@Y+w1JuYA#$ILg3+x{NYMmr+*K#Zd?9!USd?^skO{lx24pJ;sX5Pv%B{3x%*LuapIIb;lqfzk~HBjqusL>{wEv=D-IHu7~BQ@G&zYP1Jh(+rpu|p>T`ZR@-fHo8< zZvra)%+Af5Gtd6;QT%7QG6pqX$H6Kv$~2AA(F788R<@;9?TjObc9lU7@;H_OmM9bz zOB4|Z&0^^gih0l!>y0TtlXKd(Lr|hywnOB^XHJBezj+=d`#yrI@i|bx0hJqT`Ebn_ zUGqg(yOkEh8fC?>PGiR0iW8Quc37tocNBwmNXWbIXGyLa*^etr@YmnZWH=4Is0e;- zJ$Cby1nmg5wwqIm*8NTqoDovDVC#M-`H(vKJhOcy ztKQwMeQY1egVrzeb|c3M(GJ+N1zJ0pQ$hEOykF$~BJUUZnS8ru*}Ao;Ki{rHvYnnW zr#5DqvbEX9OsY1MzH!x4t45SZ{dV_h^{)aC0q!$Mi&(V?z0bUml0TABr`2!sn#_wl zuRY}%ATMF_rxo-7!aGiZso$P4KUU|NALAm9I=}hxh;|TY#F2LZe80sQd2V}G0M7Z)(Liz`hRSGpkID%}D}GrUHb&djt;7l-cb>|A~6g65T* z@8M??73zv0 zE8m?J0=3-%8=ss?<}Su_q*^Mlw!rGC(1A6{Gm3t-Q7uG}k-Ygd&~6)Lg&QG4IZ%jF zv5_qVVzQVjrm-7epey(piWYYs+lfGC;TxBwrjA~E+r`tDPWobcWxBDk;Ci{-cf|88 z_iNa#*o6ITn4$_b6pZ=Z#(ejAxHykH!SJP#toOa@`9Co%GP%A*D2qHXCR@5pZVB3EgF0Q)5Clkw_0 z7=iV~%uU!Kn(s4a7qS4|q`HkVLgCeTked$-*F{S!uH6(`vOPV2BCbYF)uHkD+dcn@ zrD`Sz+wt%@jD=p#{{}K+p%2>-*Jf$KAZi+o)XWO%)MRSDgYRR z3<0^X1F*{ulxv7GAAt2PLA(jKc4Dde0bG}eZHwLRsp{Xl4UhV_0ztRbh&4$aa=+yL zEg`=0FwxO6br!Z~WM(w)TcO9juIPXjw~h|XOJf}im4vIMlCd=Mm`=q)iH0-)teM8K zDG`%~0o;;^b$e__zy~HirIXQ69G5)R#-kB@gkq*LnZPbQSWv|U#|V+;CpBhNxL$Vy z_EAERzmpKD09bdBMc)J0`>f6DS9DpIf5mUP zH8r(JO)FB%YE2FqL2$!E}Nd1T*pGpHOXp zyzZxPvb-A{kebnLUT==?LPH$i1zUmUi(S5EFfVpt(FXHKel4>Zaqu-m*6kW7xCYRm z6KK$!iNMO$k8p*2GMGY(!K9RWfS3JYOM^5H%&&#y)u@V8d&k#e&e!q9#a}pKLc%{> z10!R}$-&U%D9r1%q8r)AxfD!^FHmho(#ZTU{ZeMMGkxop^Pg@EwRf$);!aFHZvQjThfi4cn2|BWg78tl}l!P~D^-ojgm|oA{5pzjtg+h=Q{!AE!%Jq9m}`qwr(?~j5Er=c9>E55Lnsx;HU*eK zS;d`gxKSy(&bK9#!eY@g`a3qbZbMbOl z*I|~=pNd8}mB>X1>`N-|HgdD=P;9cXlo~{=efx9xxD9do>alCwyEwSRmd|U ztimSI*PlgWWf5I6Rt1~<1O0l4kylTM#&zZ@xH6%l3&$72_+i0(xAm^~2`gvMe{&8G1?N3w4&ky}p0!A}@Os{=c%Y$8 zwOTtch`0+@j>l4=3 ztsF;{-c#kI~J)b;6XTsRUbC@ z<1w#j?#E-cUT@_3@u;{O1f|jWM$V|{1~gj%9_{;KXK(Go1*y!k!XueSeo!BX;*G#2 zv>etF9|#7plMQD#E;5u_qFUWQA?|kw)yf}$$&?ka4}Kj6DH`F$OyUPQj+xJ352S_e-GNW63vS@&NUm<~c=B9R_3OSn!g zu@eeCn(Eic=<@az+PhuL3a({e-8bnUmfiYfdUO<*hc`4l`{*d->Tz!$lH*!7ThajC zOdoP>eo-xPU+(4Rx*Gg6qfPkTm-l1OS|ge`NIR$fHr55C9%oRm6jhNl29F@0YT@2> zpmt|D0-H?1ijlKujDfXCP(dAjUq!lYcn@g&?3(9xqmJ)+fA|cRFVuMe{e0B($BgZ& z7k)rGRIVt?IU=0)ah&(%!r{)%BeWC2S>Voob+hBmc+4EIUK>YcjJDt0q;bK_stfxu z0x&mXcnDe#64iNFSX4+2Ru74+xi)XjwI%!}xd}^naOC%nko{Vy(=0+wI$ryWKF{R2 z3dP`OvJQnfxx6T`+Qd_w+j~O^dP^g>MvFAMLlAb3i0>)!2)Py9J5=krxNiGRV zFbJJwN+i)FB-tb+;i9?Un6x9)+yCl7KQ_Adn~x9l%a-td$hbcoKXx_p-DBJaiQY-l z5J@D^4`+H`V@LwiApGuu7s4xh3!D4rwQjt-u>XrsC;GKR0sAaI3}QoNVzPBZMx9Tl z3Z``9^_nk0)?Y9l8*GO@;G@9!vBjwzgzR@Mz{{HHRr}3*yMf*lC?d z{kvfbo|66%n=ofOBK3)Dr8(1)&+dn>Cfvmunrz;T#6NG|EO{oOS?0XIC>IKPDWHpa z5w_@5NeSS6#|K4>;~A(71#3f-jemsKG1B_bY|ZrMn1Vz121jz4)^u~Y6Fcft>5hHH zjW_twv##t`6~=xHZ>G54R&i?vo=hG$VElrcq1@+W$4Fy+`3bXzCx5&p)(qow-V)>I z*jZJ+ZO=Kz>*k{>%sn?;D?G>etNG|U?wA9JCozO5cefjd+qp3g8@4Qubv}BJTCn&E zC(3)T^bKbT)M8?2B(d8Zn>JOrr&op1h{8Ar z0R}tk!`ORImQbHzfJk=5)7>VpRiCFD;DT-Ui$nMVZfwL(`^IczHoNS$MY8UA+oH{1 z!84VZU>XU6sX_^yP8q>ep#)A7Qs7igYczv?38o4qaGH<;r)pZG=^__6Rnr5JuosO=~nw zGa4Z~b{N6aHK!4>q}?<|E`+r)b7uzy@xSD67V}3iRVabegcLYc(;7_|xxlHK)@Yh$ zG(wD8lg2ZI5ja)T8coxTM#!HXM(}jaX@vY~H;s`WVQtL(*+D@#^~SnewsK`ZvNcm& z1y0qpM$H;qwi!rGX%X$RHB49#kUs??4eqZV~o+tuM&y>vhg ziMx$ivpTp|%;6vY2lzv4QRaH_zsXf#bT8liq| zC4#4GP9xN>cGDR3DXfiIzjjbf%+Rbxs9)`BfQEw ztj2hYby(Zg;rV*g0dJ_d+n8^*4sH|3t*yZEQIs(Mv#xR8Yrfkn`}=o6=S&?!BfQ#L zp~iTtg|#u?a_wvrcw>dPG2e9UZWDaBwF1XSQNng1Zg?Zbi;ellYaKShS6l0Fd=w?} z+S3iSt^GKJy!l#c}$p;q_elqZq;xX9t0eYq9WavUVRVt3Z% zLI|v{x!^I=xg>ozue#EIG{7hSaUk>wIS>k%xqV38)`O#wKo}9;2EGTv@>kd6nBuGJ zQN&xCaY)hFf-{XrS#?GXR34}1%d#qf0r5ScacY7B^Byek2C!hR0(nhiB^?6sBmO_- zN6RHF^)RgXM_BuKY8j8O+DA`?*2h$_f@nna2~;7qZ$wq}t!eEW%fF6qAsyX(1svUS zXjhK5W=Yz*?p^SC)dRwMIG7&)-~+37o#oeP+{{!C%n;4t{8ZX zK}N-?m{=z%#k6S`)y9s+SmmCn-X1rEM)ZpDM<6ySoNZzQ(@39l&-?9dyw z%gLyZ^^Wc$BfcGP_((%o<1oI{1n2R7BA0Jwfsws8a})?c30?99`Ebi>gl=?ghk3bq zj`6U%$?_N(Dte(@tuP+u4t|{Cx~UuoJ!{Nz>&m`qjeUpl_cH#neaov;hhI3nc#7*W z9Z=(R;+S>iDeFoEQy-iXXndC*(g}^tsf?*y#m%2urRLD>eX?`nf6B0T;M4wO3&#JH z{SSZHc0~2*p!xqP%C`THgybPQCC8V|kmn|xg%VVHa*bdZ0TB$#A}4u1dp4%;X1g-8 zjhV*JXS?4%DWBTLUT$6YkcRWg@>CkfcOpEjfUn`^*}OEcnu{;Er7kpCU5L3Jql}Gl z;cTpe&o(!vhnC(_y|D4%veo1Nu5(&CbD-~@*=(t^d5q>{dFe^o{ZMUOs&YQ}Ce##R}OKCiJ3sdq(K8x2LUI~XfX z$vPg%u8#*>aHUVAnwvB1-207mLQ}1ZwQ|RLl95}q?nshqZ$-|pG>MHnE8&A^M+-Lj|uPdE|bX`HR%^1+mZRS3l zm7k$rYuCtFujL3`k9!Xl#cnB5_jEAsv!fgjQ zCb+XYg0*yBU5e>+-zv45%ZgZAA+MfurOG{@cNaWOz9)onxyBgd64YQc&DAj+ANM_B zJmhKm$hb{y;ACS6E0<8n*b!w92Tj0}1^|qu064h%5R395G-b?>ZA6<@8_=C(Q<2Qr85-g#Gc@xd2f6qiJVcBM3FpG=I$ULA*bX$|0Cl^`@1Felanz+5;X zj6nev)D-jFaplJLHAE-Y3-ebj5>xoPdHt*7OuReT*g^uWM=flWgNrzCY7C-TeuzM8 z2-OFc!tuXg8V5$o)yJNH$8r}=*ohZg1-#ihW-|mX;T1^^odfy_Mj(li7nokv6z^l^ zRbpa?D`NL5(?9_0LbU)=PsV!hJ&ozkbl>1e@JKe@mwBYO7wrR$#@ttq zsb|6x@B%nv2?6IU&3!{C{edCtC#3dj2AI4VYj^#I^)jF-W2t%q!F2bco`5z@=Gt&l zxXt>>S+4%}Vi~;F&SH9vld5ZSa=KU!8qaW_B%=h!lfJCls3BzVY>tX60l=N#<&hsAXx)*!ap;=9&7-3)&>^QjC~XX(Su3qY{39)p zqdbFg+^>u^qX5emh6%8e6hJqF#$X*{su%Ex8NMepfib}zjzQ=`#71H+G0zp#K-e~g zn;vm$99tOZ;q7v@MBj?R+NP}pyp^;&fT7H*sPQ2h8UxKSOwPK-{3S}_uj=g-EWD-m z6)7f$3$#PBOp7D_<%-6w_*-9o(yZZ+zhLrMlUgGJAs|77zmh27t4J5+>Rrxqzi{Z# zi!b&aI<)!FA&lV<9eVj?6x<634jekvx0}jzIPI6Qj`0%L+3crWhp^A_Q^9e6hz@W4 zKsx{E6PzTDB~iJ1e2F6I045P zgd|f8{Am`h%n{QrSa@WG*?4QGA-T4jIxB_nsZ&pa6||u z9NWtWbjORv!Xp)P3PwHQx!?00%4`a;kTO0yl%Z$R_A#OlFfg z_x_xF#OOzdXnc;Kzx>s4>$BbCFIf4*k!S9jw>{l>aP^|gwzt6*4sb>Xy&jAp+b#xQ zgEfY28bdR1k%U1gSa`hB^d;}ML5ZCl%4|}kVv<07>dds|vDAGnEZt-^R!?A=>Io}@ zzP2A#&)(w+<8zwAhmDmnJ~xE#8c_VWA#27+N_UOwI#CWz7NU%`MLQ1uavaX#VxgQL z!>>NJE8$3B4US^rw-g3HZA#rn6B@U1X=+ipb9G_Md5zsKWt$GJKyPv2zOcHDOIH{H z!=4}v{fSAJ@)QP`YJqK0BX2f?%>jw^3}Bd$I$^}}+VTK4PH|s4BX|TqOQP=)HcBdf z1hWQ=8QaqV7*kGt8I}M}X+&EK`ry02{8TIiLmC`fm^*p;_~aUa>qYO$^%yT=V8%fs z#zqT}peo+G+v0b9S2#Gr?&mdh(=UnO;;V>;uZvs1nRp}YdRSmyJ*H@*s6sz(VnEo9 zaW6+_#OX{m2KqIfxGtk_bExqaOx1LA0;yXJPYDE_&vy6r6nc*qdIt-fHzGJw5HA8j z=d*=g%wvo%a0kauyWRg;aR28v zJPfuF`)IKgNdSsFm;tz_xljrQPhp$e+l{Bt0gNXvR;~8+nCFAiYlv6`?2*YlOymjX zjGmq%!sH?dnI_~264Qi0GX1Bq8-$%MzQ9a+=?bhr7^Gumby8sQp%5xm9P@V8<4I_4^^1ZLUIS zOwGmJGpT?SW-}<-W=z%+5botN?f173Hh`(y&gO!_CTv!R4THXt!I76&FH85l277dP zGI)?cG&umaIUux0fd>FN2dNQd2t{LP0uho1(F7XPU)w>uaB8*2@Fz35@Tq5XZ4BSS zOAIyz^+M^G91^h!NloRsZF_K*QC`lzAYUDyLNSUrqjE2 z1%t<@ym08ip#zzWB+g>Ssi_Ah=Ri^eY|4S64hZ4K?1~jSjo?Kr(P`9Em<^L}1Q~)s z0%kv(#wG(@5FZ!Ge33GC2R^4Sy#jvF`*9vZC~mE?DeSBQW$!?jlhW*kx&Ib@Fwhx{^4Ar7 zzfu=El3*zliWH9qoD)(O!pCYKTl!{bPHT0@DBO4m(A|@RK<52ub=0x z&_E~L>M224wZaI1BQ<5uq->(ekyhl#4;R0#VhF_{6< z)is=;X)%zeV0;k!fukuL(+@>((A=M(O+f!}b_w>cFK9|P!h-73rgZurap<^}uqs{G z0)(rULqMLd0C?Xf_!Zd*!Q*88q$UC^6G#W|1psw{x;T*g69CsW0KV@4#ADR$tt5Dl z;1>j!1ElN3M{OGY~0K$u|ksc=I8$oEqK@6`atvjFaV(~{eo3+WvlPA=%@#{jrq znF;av!wl9!J*Cz%!WJC7$8RsCOrCY4mRp;|*}BmtJO&(H9|16GY^(<5$y1qkCvsCN zg@l99EF_#wW+9cbkZ`@`+XTNN8zFd{te@0G02UGsM^knIfD}?G3kgR5uW{ve!FvS1Ah;ZWg$D9(08(i1i$cTEY9>bt4Jv+0FINMg&=6N10EL7j)GQ>> zEF^FY$HD+~3JKCUcwI=qEF`ivfkHyEKLT(H3AsceAvzS&=1nW`+r4~wd*6yhS8Z`G z;1z7RFEM9CaB|xwM8a!*WPn@=eKtQs&B0OANs9DsNYUw{+< zu}N+T%Sz0L6MkzdTx_DOC!WQl^B6;e-<;+-^ygdpF#?-R}PbPLNKM=RAf)Qb5qR z@Pf#NSvG=DHyEV}ZxiAkVmZMBFnQl2!UBKWKBB4Yi0{jleoU>x(S`V!&{C(KSm8xu!!KGn*D>3!~X3 zogL!x|3pQW!L)u!NJ~Jcw9au8q9t&H?UlGCBFb9nB@hnO;Brz7g~=EVR{ zTHnSv4-J~lJKIL9M;Cu48V_9jnR)_PR=)T%DFI2JX##jnr2+_s zK`i_O62zgs8p7&oc&e_3C$PfP6HZJKz*B20gsrXc)Y=M9V1=ir)>Z+cDJ((5qAM)Q z!de^7_fk_>!?;dmsF>P|KXXG1g5J=AkTgcJCp1^|l{hAcK z)V07%Grcs^3(j*2D?Ify(^E4&)zVWfJ=M|^SmEiZmY!-`@H>n@v8U4{F`uRBQaB!KZOfbW|C9s?kh znk>HPDMIhwp8>e`rt3(59iSx*U?k{4t+AE?aTF;s5`|zUw;-5#@rogD=aTS_Od9(? z%t;oEfZU{>A(1yHy_v<6C;MhrpnAu>&uUliV498i8S7{8XZ;M@Pk#0$q$~339q$VG zCB$VhJOqfTWSob_WJJo`Hy_nzl>ga%3&FixaxkL*VDPwW0t>cVowbk7^4wa{hl7KWil+_<}obKCJmoItXYs>s z3?^{V7H*Nv`c@a|yl*wGu__BS+?82So53XysEAC+6I?cNPLgH-8Ya&1Et)vTT&La; z_?AHT#5vaJ#5wNp#5rQ6bK)GYmmUD&!Sm9@IjK+}I&n@Zg{2~UnxyAxn}!0=Q0zMw zq0WJSTHN*1k4-V6P2>SVyJjFkfyBf)=tAp42=^+YM?66q+@Cl{4D}~>Ea-b7ecO$D zZp~yCq#H9U+Ta~$esI_@h6FF7Hb7`|UpH}1BG4r935HC4O?fbVfHfAF|E)vFcoWpe zYbVZ`hZ&NIbE$mDHy$M?nM%|Ky4SlUc3t~?$N?_WPrSFms0E3?>$n^@)Ezka*~NdJP8U%r36 zP7T3SrT{9tlKQYU`F^gcW}^jWlM60)^!x*oe3>o7nRfQC=X-EgFv-b=AN;D=zdqMQ zMf~l3_$>aIOy*R3z97y~2yb58PEo~rosxL}4O)W;Gz*OFiqy38I3u|QF9a3t%<*lm zUPoDa?4E<|cDpcf@B>?NTPv@dpX(}{W-JVn2mQ1wi2If5B!ym$Y4mOH57myrmt ztmkP8JE7`ju|Bba4D><@nPz3SF8k{l%b~aJ*nxQe`a*}&vTPz?<^@&C=qFrjOzKb| zyA*DnLEj|<65^cdrviB4MV>^K(MfGsXR}9jB8^kKc1=I=?#5nV#WWV8q-lO&LXvQnm(xh&GDkt*Iz;~mQ3gkuN@n((g!fi#kk3?UMn8P<9V*53 zh^C;q>kkhWx9u+OD&!8%zq?b|s%Gl6aInTjAe+8uduYnFZlfJAq}0bt+U7SYw4v2& zRZ%&(iD~HVGr5q>ap=nI5kp=3xdPb*f*-?3R_b*O>EEquq4#RFW=p0%L67|zMcBBx ze?2pBB~K?9&#N^X>i**aAG=a${~Y}&PZ`W5#ONjCyx}O9G*v!3(d3&STsZUo!tQr( z$$fTbzJ0tffe>c5rb<$1`eB;IE^j%lXB9EK z(NK3t(?I=yr~H))CH<;85j#YXKj4fwwt3b4q^!>63WbA}o_pJybF7f?M?UoTzk=?- zAR^=XpVK8Hm-YQKmubGrwdcdJ8dpaMaam>P3f1vuoy%Usfre=HwGwxweg#AS1pW+f=)MyAi@a=d%n zX)Vl8@67GGdtc&b`x2}7Y1M^0a`x&tpTvPVo$e@S_C5CLeTmPU-SYH|^{!{X&&u{Hprn6rSvZH}v`#H6+{ff-|)$kBMh|3q>vLPV~)@!V~$Wmu!c~Lv>N;xydu1DLW58wZJf{`6p8WH5!z(T5o!q5 z5UP<@gI|MJgf~uT5Q?OY6B>jfGUJ2>p@{4_p+P8;XPnR=RLS#8t=Q%^bqhT5RFSPB zLo4o}6-B^RpcNqqO}I@|HDNXpZ%TX`rUeqFVNS!W?VuG&lQU^WVh~dZOo^`V;Kw&j z()$%I=N+`#4r{v|skfBt?RqShgDxYrGJvz7Q+C5X-#~Yrhbyy%4Ls z5Sw@*R(>H?d?7aZLTtkev6&ZQb40R)7NJR$7?BrZ!3(ka3$Z-^NeD{d65TkA2NTUV7egJ)amhp9!m!=T%prYKW)P{|Rgo ziDC1+Nx@OM!;(#=atpb_{6hQgMVNgY8(Xq`@97g3@MbHfKA&@qbZsWw4R=5`%kV9Wq!T|r$Oe(i;ux%6)4lDnBp z?PgtT6NCz3ZkD(%glVoqLLH?68#+zR@7}U&CpNOG-dXr}@s(}D^aNl=VBBD98nQSy zXHe{7!$^~B%^51rDJMdf)ChTDM&+z2nb0CgrX`Rg6s=qp+Y;59HmK5CVm2hx5U;Hw zT4qLTdSXx8Zxrn=2S8eJCR95&XE_BnXh_rGrjdU@!~TGV>j90?0~(wMG%62hI3CbA zJfH!1KqKsc#?Ju_paU8~2Q-8Z@DYmtJ^0{}dpe!j*}J!`NDOUTk^C$0M(`oR+oAUp zzPN2gdKvfv;Vfa1u#fPkguAw_D3@mQ^>QZIwxU+bcWNTqr7|1+iDJLZPDMl&qAH>R z(GbDLIDEggZAI_51ilIAe}lkl1g;i%9e`(KouY|-v$HMzriVCItX?xBQ|x#h3Ogrm zx+!<(mRxSzNsduC$uSBiIY!|m$0(fS7=@D@qi~XA6i#xC!by%%ILR>zCwrbITubQR zc5<5EUfWLc6emD1Ro`~9|I6D>@~bUd-gdIcr9h;fE#=#@+fMSun4%oS#ZB_IlYPHO z2~TZ1$y1(sbK6PQFWK0T8_R7c**=f-R=blUSKoWLo$UQSz{-VDTwiEWQlb_@z`WxYab@aD&^f$t_ zg#LBb-`075TStGRjlWE*s=uwX{zk<6+dAuSM7_VQqrXwYQ|r9H!Oyd)ZkwXmOLQHb z$Z?BR80|^9?xYTwL z7e}vs=g5e)&Gq{@B4?8K2YGW}WFgPuO-QpZX|^b)Hsh^`s0Jp)9;A88*Qr=M=uGf4 zSR7vB!z--gDZ)#eJcu$|b z-Hy(>9r<-T>g#rtiI-uN;gn&~-FCF#wcxejwcxejaU=iBf0OW#X(Q7{h9O`_jeIqD zHF&hgjvBloydu0Jydu0JyehmZyehmZyehm|c(cT3p_|Z6(woGaq&MMB!<&XTO?$O&=Td!9;UEWh^mVm6RP!O2kRHZJ%9+@Tr_Lc2U zlqLjfuz8egsih|GPzBvGyT=-sV9QdKMZC(&V;uuTbLJFfhKY{Y3bb2hC+E4m<>iGK z)`Gj`Joa_Vqt1su_zAY3+zFp5l~C5gZdFl3f5~9I zi&2!ri>{?^PDV%@$F+&5WCe?*uRfA$N05)k- zP^~p95t9(6IR#WNlKK3`fLsx^4Uf+$ABezghZ?Y|Yc+3{nnAb8`@$t)XIx(3FmVAa zX20mJwWzMk#AEU2Li}uqFNC-!#M?u>*KxwGe)45i$AbsobK*EZ`WF80CzmaTf;NsI z@PNmYZ+4to7vj|+|Ai2LH{{#+l6WljuS5PrA^wLD|D)r?uZQ@ycxZCsws>eXRA@HT z_HMBe(3iSFr<*8s>r>rEwwtT+Ka)5o*fZNLb?ma_)id6hHj-B8Ob15t@}y!$SMw!W zUQa}LjIK%Go!OC*{78=9y!_}L9a+^){6$c0wCJbZ-al5eauIgR{PISu?-QrxgOsEnj2yH^|A6L57N;gNYO1Dt$)~c|o-DI zsKfli{KD_&a=$Z@9m%fYKWVC&fwGfUN!V1?0Bj>UWDf>V8Sqk>z$1_D1c7TrL8%co zDeBh33Np63jj3*JipW&AI@K);W~_@grn|Li_|x6$bhj*Me9ly6mKpiXNOp5}bYx_{ zTh7#r)WTp;?T&BgX0zRFuFLcJ6duVk2H>|@E_E7_;jddRQ|*jBu9qv@-Ac7vsdahV zty`JF$(h7TE*G3u$H&EPquOmKG`lVSPj#oqyHgZh?fxHtcU(aohV;qf)}LMsd((?y zUwSd@NiW9z0(dd*7r=|*1@K}#8|L%={$kkQUkv;Ei(!9%G3EXJ#iaN57n9!KUrc#_ ze=+RuFDAXeznJv?{$kSm`-@?Je~}*T{auvzcTwKoMTK@kMV8(c*$OYT7AmwC%CFED z6J@VSsuX{g$)a-Z^I!S=LmztRcVGF+H=lXSo##IPyEiRfJnpyS1z|EjX7gZsTBRdeD?|$RTv%r<6G;#87>P|T zw1X(oDknv>+A5YIkZs19B&tOE9t7xFW+t^Yc^P*j+HwLGn{V=zc#oIS_qB5z+L1KQP8Q1S(jo1v)H1i!LckUDwB9tHE3L{djj$$|pDwx&scia19< z@^*zCt)UFuscQQQuLi!N^p+Z-|2F{R#Mm1oataSnWqJG`G*^+_`*Ul|1`h+p?{JP~ zQ;z28p6u~PN?xW{0^xGXE1$`zhD6xoLcIs~P&g+x%lACpvHFnqDr^2s>K~BF@pPM$ zo;e8Gdy;n3=X3#%;pQn0?}XB@<75r1fI4l)&Rv5Q&%DTE=uG-yi%r()uC0vD(NMu9 zenvDyGwl}D{7(u^%d3*OiFYCCtk#n3HViNQ1dQ=KuVqo_6S>h&J~!HKXLHx4+f(!N zjg8AnZNdzJ|BB`n&x?^)fEBI%f_B#J>?QDr3Gbcav&HZEg{cH=U- zssIV&=opuhank}cCi0MCOVc<{&FANH{A$Z@?&RnBKQE?&L`*x-NHJtUPf>(|I993( zwK6*@vm-?v1uLUTwD0poXO%ds#90L;&MFbOaT)Jq2@q{yIQx$XsBoYGF}Nu$OvzYT zx-hkS%Yv8^!sNaiz-BSfNM(`54f(7go=k4Y$cBt;U}Qr^HafBc2#}QxS=qqKhOBIu zl|W2&|GINB-VSAecn%PP+ot0w1SsswqAO(h_` zL{)-_87T9MRm=xis+muORI}cUm+e(`a<|+J2Z$lF&1xCV`3?Y%ST{(~X1ANoD|N(W zB(D|Z+f)2z(LcL9#8XbWrHWIkGTkh%Vz}9jy;h#=Qm3!Vav5%R7v$)I{9M4)f=uPb z7Xh)btRTw@(q6zr1!*ftNkQ%@prjz@@G^}>QBshS!t4z8E!aiOOTW5~#Z*9gN=m1s zbPA-t?90i%oOL`P zL-iMk%F-N>oGi^*er1=nIa!;NwK=TK$y(-ri()ND4+{`&qn};v1(YA?o!9z+F3Ziv zlz{AM%bqs&@M$2xdKFMiSp^VVZfqMWH^RZJBHzUtgad5=8A#nT2zdg9Dq=}8vRIRmKm*410k38Q9;+!z zki|et_JXV&FUE|M_L}*mG&R+p;)12!d2U2ADS@1B0?`Lb z%2?)N6<}^L#pV?GL>-GG#jq5Jx!h_Sl<5^Yq#`STRsoPljAx#x1T$O$QKzyiLn87F zhe`>$XFwh?p1G!C9P>}bi?`CvKis97xY>cB6yt@-R3c`p29#pO=7Ngmf{M)r6`u=0 z8<$yiD)O)@V|AepWb+DHeE<~A?+}}kq8lH^PcfB~x>!}Hf^u4YtU3VYG_Go-3UQkZ zP<1NQhBM>(^w89n*I}vbQii0&51aPH2A6-5OF^HQtjMb<}Y_x z=rOqrbN!xDLq7MI5;q@)PINE28zteiFS>q(Dj)&-JRx7cWCHQ5O)Y}r;s8t z5QybK1K3@aWsV#gPd=-fB|syMEgPz8Pe4Osrta-6FyM$Qn}A-Rk*-3ngpWldkbzV+g**Wzidd2j6EiZrID_E;V_RP=&Y1l|O0n)(oRR%N%VB&qdvV6gwxU%s zpPq^%$||KgS(U6tst{j}aR=n1zugPbk6B9=z@BM}o>tM*Rw&R& zF;w2PqAG7%6nv_IdsfP$DHW^8YzYhpOd0F z@uX-juNzF=SNaHKxAJo;)fB5AagvRIf)%PhG^Z{!r}1u1gZ3Qb-JFK(IUBKo8<+8} zuK=l8jLynvJ>@oNOq9S7Tc&4adKS}Jzyz4-KqJMF=~uWj4tqSKJC76r@>y1; zTA?yME7P->&dwkLG93tz=~f#jB13^Y;175Do)sd2dHcFVe(8e$sA1TZ{4kJcB zedESu&8B%!D7?S0s^|%n5@i$+VW0tu;uQ#E%PMFC##U=nlCd!k(%CYO2*$B&YcldB zv;mVbF9x zV3g-KuAJ8MC*sT!@WP{lU6kGNI#Fl*h*wKx(OBA zUFbdxo#COzj>F#uYx5>oG3Qy5 z*;O;UYGx47vSSdW%kZmSBT<1@SQ%jL29GQfkRO3|^_polGYx2k;7mx9F?SNdpS4*v zI1rgWyaOeXSvM1;ydZ~`3*$3RXUY7~qJ#JjDAMER`71rxUcD-&y*4uB^_4>KbS7#g7 z^Ak?e$~QiaQa;QhWwyR?y&mt7Ocgqt7jnGb+{w*Pqi>}&~|(|iz15Dpsgyx;x2tXzXlN24+-q8H|96*x?b-#M{hPuUIy!iQy0;mmrTRQ_+iGXXRNlGg%qqS_W6fSM3h7D)U(U{X}pqTh$=`5YkigutzUY7lDY z3!;4I$b%oAhFOzzMW7{+&H(1DglC|NqGkm89|jaeOYRC9R3&mHq28j2~9a}bTOkVOL`om+(%DnV-jCBQLIPCxPW6S#g z-(xI+lhXTV$Cl+RORi{$uQQJ^GXi+0{uoQRg8fer{_8P*c`EX~WBiVTm=x&y?y+UF zB6xicaTa0novBflpYl`fTz+$oPoIymwnZ8b(-AOW8A`{Nl~f5ZkJYII1Ro+E<2k9S zAwZ}Kl*OVFKm!GcDMFjj|8dm-f(igDgr;Hymb{PgQ-z?EIRz2?wB>V`Y(YI^YMwV? zbJPVsBRF*-loeso3T=d{VuZehW6J{mVbT(vLXz{luoEBrl2IMphjRFOr{g# z{Yy0mE}Y+-%X87F`8U%orLPm*eGkSvUbys47<1HN%A+8Hs^?=;gB3XvEnVz#_4th-rvv z5e*1NB>;hUr)9na5~&1q1Yi`1P7&IKUb-!VlT`v(Av6^uFfLIELh1ul0Nnc!b>NEL zOyg$T3%hqswYMyA#ZF3CV3tr;gdqyMe}Pu8*<2M{K{B-o3cYYFvHF-s-(&GFoaajW zxGvfB$8~X~Kd#PuC7wX?N=7{)y*YL4O5RRBb|qU$Ko)}F*p>ZH9=nqFb%@gGg~}a4 z-hwsOH54-mozvffe+(pe>^I`i5uPPnAoLKn6ZRhCKyh%Ea6RDx*`ljE{qbD=2)jRa zcAq|Yn*EKA>^|Lj-DP|gTCL0muhX`8MrglSVt~tH&-c9(Qji^3V~_y0@cTDFR*= zOMX3KlJL? zB}db*IsWR`#WxsA{su^UBWTm@O#F6jwsoCt%TvjIT)HSTb+)QV{s?E5ZU?y<11 z1DF=*n?u2$V<<&e2vsbo$#BcT#IGW1@FvQ98Vk`!V&a*Vv0iH2FjER-E7YmVw6qc` zh%^Y5I;NEC^X*)rQ`ocT;l9?$^%_mkGOnU{eIC*Px!U9?6RH%csuZ%2@Kv zvRs~O1*D-b2&!gVoeh$uWTJ=#ul`}VzJbZ42G#DA8SC4n)=F+f7;Z`{HF&N}e0ODH z^~%JVm5ic`i)YT9II(!*;epB<-}uJL8wUmkUORPQ_mTNnawWID)S))vbo!qz(!I10 zBWRRxNu)Qnp%bwqsu#~jJW;9r@fh314JS(Tv5Qm>o12BaO%a9UrL=PW_vGQV~MsBmSTU+0U`6TzfOJ<@Nto!V&jd}8>q|=5|GbSP@mO-76PuFzUcrU=e^SG*;K~$I zEd3i6194F^w-GbM1wRK3U~CSc01O|Wh5~TW|M2p}Y9fx=joG$jZjlm58UR%nRTtF} z)e$u%O8YejYynLrE$S4!Sw&|_eLl;+)L$Ux#O{Nq7dXJ{V25ymz_!2$u^aUtBVx#6 z>|Je9+rwIWRj<4$VmZl}|LF=cf^6`ML`+#if(?)16ggL>3iBdu{PZE`fweKRBC1+L zS~v(qyJ26Xji36K^S~DGd&YL!M#`wmVjC{WdyzK2nGMfs!uz=+6lLf#`g}EvJ8@YI zPnx`84rg9mq$4a8rWSVbA^L8VPsy(U+U?V7RzdbtuA2J{S!}5yutJ)=oaUq4LhOaz zh6b>wbqfl>gj26U0_S5LF>@WcukUF<)ySGb?AEs3LUb&H04DqQsC+@u1)~c_PXND~ zc#k@z5cA-|&f7aro#R^wc7@)pntfHR2~gIiWo;TQI}-1ab&2=LWFS|}CJtVkk0rhf z`F0MEBDMehBmyW&8}3wAFQquUsY*l^=`s?%uVq<#Uw% zyl+cyDEYgw#H$#KFT*W=JRcjl3a3#2uU20{%JT(-4Z-+;u}T>>MVknW^KfCqm@W zTh_V#U$@w#$0oxpd~#>({=YQDD~KVE(mmeuS!ai-VPegrtZK=6wi-6fik71M zVl`ZK)O1v-AK1Nn&ygc%V?7HC^PNR}ZBjgWGu(3Id9Pk>)x3JSIp*r+=8UVCn>Vgr zUN_P7H?y=Lr4TEcLab>D4LD-0Q89Xy z5X+xJ2``0pP{mmK6mnQHnrUOto&&q@`20s^p1$M2-MT1@pEp=HxWO~9W~$sQmI~_e>D9+~0MoiV&f6$J82<=S#JTg)H|FHvIF5~L{ zEUcFuwai=IBt$ihze{Ds|2+9C=S6G{@efl+6#e1W)R7$fHs$l9Yr?_Sa|8y!A z@SjlPKcT~a!hKBzfrH-kR_z7zalWJE4^4{Iz_6q|hD6u)=C1E*LE2k$>c2RTLWfWGz=%jMvYo}y!% z!n>!a_bHn96uTc$-V>yK^Axx6r)b?%v2^UzXO^?CcC&Me%PIhr6a>VfdRH%}egXz7 zBB>&mn*wi5<19p3w11{`Z_{2yt|LR-FDDbSn1%V!c?E-rNiHfMK!1V$Ty!w_{KYw8;+_HSDBj5#N^$1ExUwQG>b#P1HzfWCzOYggH zNq^;*<*AdmOtNuJrd~zW>@C!Gc4-{g<5c{QlD~!Fj-GXN!BLN+?T+>uN>n_Sb#%R> z2ZS`D2r;4vaXDG+8ADvK7JI@F7rMoMVu%q%G0w{|%vho*V~G$?RD>8rgcxpw7+HiE z8ics`gcu=&7*d27pM;9R?k!t(ZQ1^^rw>_!0gON8QVevGa>#=ZMM?%4rion5V-X>T zmi1}|2(c_ZKFl_pjiM#QhxyjnZ3)TD#l+>=X)AX*kF0|DqI|U|7cFv?0jT<-Dz~VS zbWwF*R3#Tx??o;`0HZL%KLOy&Mb&*#RbHg}o%t>7Bg7dozBNx8CSF+7@JI<(PG1R< zH3h+0D+*FG6rdxJ7YHm~H`2Kdk^Rhaz6TW)XbKeLi}4o-Up&u^od4asu<+E`g{Nb& z7568eViPVPTT2Ok(cpy@_p`m#kij1q+$hL~UxLV!iahjbqyNevdEtBBMans#5`g3d zDplKE?K4a|vAb5>pFXLQk}qc?F`ziSFIAi1AxbBE%~G_jO;p;&75Ar}vrG@FI1^-x zAofVGJyt$<$C}Z#Prfr-*t~P1xU*R7o?G4sYO|C!5u96|Rim(^)LwmlcilO0q)M>iqU z$I`umsV|?S$0%e(CZ0J*$5627BqW+cI+MHsiJzQftUecOv$Lh*$ve-*_~gUr{{Ni% za-ophyfgRyiJg`H51qMd#hHVjM3!RucBuTEs2Sy}K{W`8zYu>MYTWfyA!i9qLLVnr zuuOc8K6#GbcaC0awt!Sd3r;ceR;Hb=bk1G9oc+3$1%jU^MohCUJaHkuVXu_RM6~-x zh~Vkd4dvjB3d_OiEI5Qusq%0e?8n|d_nGBoye!NH5));0M1=dA|v5W?rQ+GqDj<@T`O7wqcZsi01TW^ z`z7aM7Qd4?Lp8u4Q`Jt5+DLvBP~`z5e|)mT(7e#)<|8Jq()+&#U>?}1VSJ~C@txfL z0FvJXFuw29*u9eh*&sF3J&$1CAa&;Ug&F|`RCl1UW#4C(t8m6?gDUX(VZtU>G8 zgX)`!LF?Xw>cfe_&VW({ws87fxaabOybMQeeYOKmre`}hj{EA zu@(;*GU@%YJR+Ysc;p1fColaS`B3}=tnHNRhFPx;pXo(f^CPGe#H}RC# zi;0L$*^m+9P5obYfe>$Mh3O&Q)K~NL5O2zb=^@_K4&x!-)bm-F5Amj2m>%Ly<6%6+ zn;Ky}#GCjb?gc=EKH2@jsm?5Kir!=in+l;oh&NTkc!)R6hVc+@YKHL;Z>ogx5O10a z<00M@84&q2OcqLhnnSrJAg&Gq!ID5HZ&1LfP_S5Pn}`W^@{SA!o+Fr5dLaK^9aMqw z;pC_D<;D9oMnr#i-0`U}g(1gOOUj%pqQ!(s`jt#xLXpgsJo`|*|B+%{Hj>V5NWEmw z;r1Llw0Q2|eO;-DUlAW?+!U-EWLQ*$J4_*d?po{v9=p?H4|?nYk3HeBZwlG?WSARl z!vTYm6H5#uc>Y#mU8AN%4%K3#h%);k233pmE{pK8RQkMdvKSIq^XpcO_zOL}q}9WD zPwA=6cHq@(mbKZM!XT#`^~a3T;YP55{eSryK1G!P5CNdhKLSZc$ROt=<#gavP6s~a z?7*iSZwS>HPque0uTD=!c<_AI>T^n)iBZ#w)M9q?47Bq1^ik=2+XSJ@R05i0t> z_8K+xGMFu$Hf zQ(Lg~CEtO*pJ>6DMb54FayX_@_a;Pere!F{5^6}%tCr~W2I&U+>SnwyVh@ih9)9^l zvuB@t`Ar+oo_^+;$L>6WRD8fzcfZ_%uTwW`#rQmaa>Dz&QAnkg|@3yNY$s3xJBglZlneo?Rb4bl~8T`%fRy2*_j*;GVyR*T`!yM6(cOk& zZiz#Jm*1Sj)=(zGsfEf~?6{(i4b|Lhi`ihOQ6t!khkH!1CPITIqa=@{~mCj+4 zhI1k2DZK0BeLE+{qny+&liFs|R4XE>Sw!*^)RX!By6wsS6Avvu{BIT?Uf!!8KsrCe zvQpJZUX_gKs66hO`95BfOj=7+utd#rI(V3@(>@~lBBdE@iCyEjpUU!SjoWkR{x7k^ zusJW_ys=Ca(G4=Gxu;%3=IvW~8I^nfOA{cX`B(Op{>RyE{Oqzr_lTmtlJ}U&GezN<1L8ZI`|= z#D6Nxe&xVRR&liGb5GBIrPu>(eGW|h-QvAlH{Tom)+lsYw(|1X2c}b%2un$_?SO28 zmV)O|DFx57Ty+o2RN`wFbzzs1<-GbcEnEp=T>rGZ0wh=R zRaXA1#P1suzi&+1*th;3)~c`yxbM>6{i6Tful#CZ?ckuJAxFcGMjVYg8gsPD(P~F) z9IbT}I2w19b(C|IcT_Mml<->Bd%}6^^LrAeMepYsOukf{iCjb z)b)?L{!!OI>iS1r|ETL9b^W8Rf7JDly8cnuKkD@zbA4m3Z_M?LxxO*iH|F}rT;G`M z8*_bQu5Zlsjk&%t*Ei<+#$4anOV@Xm>tE&iSGoRGu78#5U*-B&x&BqIf0gTB<@#5- z{#CAjmFr*S`d7LBRbJoKu5Y#LTkZN*yS~+~Z?)@N?fO=`zSXX8wd-5$`c}KX)vj-~ z>s#&mR=;$8*SP*Qu78c|U*r1Mxc)V+e~s&3tEyb zUF-VRy1uopZ>{TF>-yHZzO}Azt?OIs`qsL>wXSci>s#yk*1Ep6u5ay2*Eewef$I-k zf8hE9*B`k4!1V{NKXCnl>knLi;Q9mCAGrR&^#@+xao0EQ`o>+~xa%8tedDfg-1UvS zzH!$#?)t`E-?-}=cYWioZ`}2bzjS@Gu0QMgv#vku`m?S->-w{-w{&v;moa@WEzMSjJxxSq1%elUs>&v;moa^K4Pfa|z zm#%N#_2*rG-u35Qf8O=yU4P#7=Uso^_2*rG-u35Qf8O=yU4P#7=e@oK*H>_T1=m+_ zeFfK7aD4^WS8#m=*H>_T1=m+_eFfK7aD4^WS9r<#20s4=gQhR?1D}5bpML|Ne*>R? z1D}5bpML|Ne*>R?1D}5bpML|Ne*>R?1D}6`m!5wEpML|Ne*>R?1D}5bpML|Ne*>R? z1D}5bpML|Ne*>R?1D}5bpML|Ne*>R?1D}5bpML|Ne*>R?1D}5bpML|Ne*>R?1D}5b zpML|Ne*>R?1D}5bpML|Ne*>R?gO{Fv1D}5bpML|Ne*>R?1D}5bpML|Ne*>R?1D}5b zpML|Ne*>R?1D}5bpML|Ne*>R?1D}5bpML|Ne*>R?1D}5bpML|Ne*>R?1D}5bpML|N ze*>R?1D}5bpML|Ne}kp@Hx}Jz>9P)d{tbNo4SfC$eEto5{tbNo4SfC$eEto5{tbNo z4SfC$eEto5{tbNo4SdE8e8vrY#tnSN4SdE8e8vrY#tnSN4SdE8e8vrY#tnSN4SdE8 ze8vrY#tmM2#tnSN4SdE8e8vrY#tnSN4SdE8e8vrY#tnSN4SdE8e8vrY#tnSN4SdE8 ze8vrY#tnSN4SdE8e8vrY#tnSN4SdE8e8vrY#tnSN4SdE8e8vrY#tnSN4SdE8e8vr4 zdd3ZW#tnSN4SdE8e8vrY#tnSN4SdE8e8vrY#tnSN4SdE8e8vrY#$|JS>oY#%20r5k zKH~;H;|4zC20r5kKH~;H;|4zC20r5kKH~;H;|4zC20r5kKH~;H;|4zC21_&UwDn+q zDVsGu;|4zC20r5kKH~;H;|4zC20r5kKH~;H;|4zC20r5kKH~;H;|4b4#{bsU_;~E| zZQ%25;PY+Z^KIbsZQ%25;PY+Z^KIbsZID~rnc8Yg+p)fbpNz$ped%Mb7`UpyLgiHj z7B#Ob@TM?PBaP;`-wdw?U)5?qIdE0)Vy!-Hm~*QQbNB>!DnFfxQfBhSM`L|cmlrwd zV8f${w@-F*`T8|29!u*Sbl&0QHKIqaO7HBO>YM7lW8s4fyLR2bcy{+4rw@JVLkB)} z_|aH;I`uyujivrwYFIGGVJ>YE#k^ooR50ARA3G=g<0!5i;(MOdCQnao^7PauPfu+!v(hPR?>JcL!k5P@rX`}` zU-{SXCA{y7E8}DFDx?sx$1MVM`rrwYh#fp3$vKw<5!-re;&Wn>FL@v=Ab9{z;Ad9xN;ylfe7$@SFkK@4F{5WAXyKR@f}O*SW4fms%GDE@$)?+EJ z4yQAS4Z8Wpg9jHDo_U5-0?wYf`1i~+&m25RyZ3lm!YU!zidPP1tfVLRnEHu<|d7U6PjipO@rH{m)BM%oR~4 z^GFo2%3O?GlEV7Fy`r61Y2)7Hu?Ru?5ct&;Kp+kp`{0{$)5U#i?d3PuY;WkC;k`Pvg z2rC1^MJA$@31{@|2owGws^A|uBk^2h&~u^0o=5`1ONa2%_goZ-oI;v=?rV|s*Fqr} zkr1S}3huCLhq*Xx)yP5OxEr{Cc)c}m5MJQx!2+ITEjSnNtr8Kvf z(SOD&wKp^dMr4d;2BqnuG$9dQHiVa*4$FoSl?`dx`ft2Eey6)NxonA)*it0fyu9yY zQX%5_<{OXP&((T{%k|LX{{QjEEiLuEP$<1FBK|TYA{F7tkz97kSd{w8C6UK2q2u4a zG=fE!Mm2`$^7f)2ych^8LYJ~-7ZGL^>Dg|HvfUD8zk5fFcvi|%w=ixxDkB}0K|c#q z!ZN(WMLaJ9o@T)r%_h-I@{vm;U%Djn*d=sguOezi%PYe2iLg8n$R9bv2*?i|Z1GN=K&oni3Q7<*BsQRVX3&QKA4r&S`AJFqmp$(~)0>@?Pry;{# z{a7dpwo<_+NN1It#z8Aegb9c+3H91gBy3>~l~y=IWr#KSm=HbB%YpE6AiNxUZYbjA zgdOuS?o6yV-FFa&J+8k`E%n;OUftEH(}g%>(36azo+Ls&Da4@!bG1plHk63}$Qj^J zLhdvX+-Vq}39G^(c1a9%Mmo~gygcDmZ+BApkN zPX(9CL%C0V5I*%m_|%7IRbT8&E)$V>ZAeg}b0{Vx+&vntl5&p`?w*J=o_vB?;+cDl zaQ8%{k8~Ij%9(qVC?7;TnQuIIM?^Z$-Es4c?nE3(|2x84$`m2@NH}y)L^|(c`Q{!g zRPKq0k3__mLc&LExkn6hj|g**5pqu`8(u#0vRy6ma!B~arVbr4oWPM$N_<9r5C!p% zM8uaw$TyZG!jeQ-l5$H#GJfe%oY)wZ>%?Ke8`!(Eq-@MZHeE_N~8TfIqKx(Ni!Nn0+3d5WFfuk6bsjka?mtM}nURW$ZD_tPm!)$Q+TF z`{ckI)o~tr;)&Slht8fp`{ciQc!0^#X0cEO^B9#gBb~iuhUUDeJwn=AL^I#T%u&~3 zNDr0k&J9u~35P`(X>M_*X@s1)D2ju^ObiM$OJu}E(wd7!Bjr=Nk(O?=Y1rVJSeLx1 z1PZn$cTyRh8&y^FP}D<=j0QEPqcE#crt&MQe&{eREv%v{XBW>tdF9zNXBYY7T778o z^y$UZ4-vse7mBV1yBaTMk48q5i;VCqGBN_|BjZqZNzE)FDwk{SVp-w!VLtY&!ZoV% z@_(td-FWK|OFDEr5$V%C#zi_3!6zY8_fk>1d)7|v+$S33l%tR>PI4p>;2T zG9$U0bxH$%WMXJJHI9^g@jS!Y_J4Es^E(E)%348%<@8RAMgQLQ{a2;_*K$qS8-kZ$ zO48PGuL{cJK~w8{S5@o0y~@Xe7NM-5Y7`%H)_s}ysDirxEb>7PbP zS6nqI5kOF>cX&ItSz=-N_!U=;ms*?=D&0Zznr2Ys;Odad=c}CYs{@g9haNk0|9?JA z9dCH+&<$@LzTvGSH@tQ9hPRI0@YYo~ymj>rZ(Vc4Ti22_NFaR(fgvP@5E(*dXjS~G z_{Zcvge!0pE{NLMMT9nCI3~( zYh0w6`<=B@@X{^zKc5BGpKK zVyi$hp*r{O6--(E2NJ6X=o$m+6a$Gf1Bver#J_M}+jb4eeFJ!A0H=U|_8bJjy#Or- z47}m^$k@X5?R+fumY4rYnz!+14-ceXHW0sk;LFDc(qB*22I8NO-*S8){#t|IKR%FH zmH3;1)YS2T)VGfhB>#KRzCt%)SFn{dTL|;uFa4_bUSTUmsjn8A!GzACy+R8{SJ=vs z{VK7gA^Ta9*4j{7ze|!$sALmL-xt?(%{uXe8@HV}@z4_|{%~Po>%I1@h^G`4lJx*G z{d#{=VHD$=c;RxtQ{tumXT)=ANiK0ySWLWcFzvn^Ab=0 z?1njJzjc7hZ+8zs%dek4F}rC4n*F++j-?agrxTLzzfpAmjiy__jAC||A4^%CYZi7dtjEl+se7y%Dmg^bGPJ^`b`EjKGRn8*e}9L zGr0+qn=rWv%ReE0k3E0vvFDFH_WZHOo5`#jcHnp=~oQT1I4rzGN9P;Z=1Tk;wSc+I{l1-7T+HI0L9n+ zm*w`G7&@lKbPoY-Wu?hu_p8x0axsEb81cS-F>+~&T^mbDc6oRVRZiS|W9s*kKkO~^ z{c7T8scdc}H!{|;5BWznZ$8QZbToe8{LwcYKgw8fl(FJy>Sae0|K%tH(oqJaqw(91 ze);&(#Ok9OWR7b1I-2;KqkNJq z?Whe;M;ZCTZ=xKv+xep!p^mckRv<7gkQK-Y69PqnlE9=uS)d|N6{rc+1sVby z1eyXZ0n^bDH6<`DFe5N4Fn9E}?sG>qavt3R!ESL!HH80?v7P+b(c~4R@EIA#@_C&A z#`2w$0LBi;SUv<3z*s&Y6Tn!$8WX@+zTgtTSUxQiz*s)S62Mr#uM)smJ~|V?SiWu& zz*s(U62MsIiKDkB{}l}d5&!Jb+wO|rt^K+6q1pSxkK~SSfq8hJTJErByu<7%`P^aV zqQg7xO?=_-4nCg(3tbS5$l@-8-3XW!B@QkTu&8Aavm>5mm=dV@I>xcw~+r?dxTfxtbB;Lq}7ePLz)J{nPQIY=@pXewah)tF6p; z6OSLh`@rEZ9yq*k;P8n9hwq5rd-$&Se7YI`R#4+)sivsJn*}-0zRHJY?Rv95KAF$9 zGd=&U?{Nk0PgrHrN|XQn=6Gh=_~780OyJbWnoQoQ;We3(Q>%tDb*F{~GaaV}hcdY^ z%jj4p7v>pUoymonhDI~FFxMEeVYac=naL>Is?21VZ3HruT{V`O3}siZ&XgnB(M&m% zT?IE(v3iit)7_jkLz!}zZOve&8fF_BVb>QISPdCjHkPS|*@odp6mpdnP#YBWGK@L zvkk7zw8Ctoa3jlxGp)$7HJMhJZES6(9c3HMv?IT)&a@+KW0`hj8M0Bf(adyIp|Q+# zq-`)W9cCLH&P<2dhE`|hBH7W*Tqrv{n3=1eev%C^{qJS5aA${@`*e6L6AU{!I+6)S zoLmDpG+{N}E>gU95IR&hx+YVLY#XLwI$SyhBEOBSaaq0;Lw^7uXQ{uarh&njz?FAI7}Z~=e-Vb{E%$cXponeXU$!oRUbYwWfzO9p-$jtxF`@Tr53 z{cdHA`uN%35e!p?GO3@1WLUzO8ef^zIb^{Y9kJuxQf&1pVf8)XwVA%Zb#m2Uh=)Qv zoS6uk`q>-CRkJEd;_~z4cnV7)phu za*3r+gsiZ@)nS3w-6?QQSm2tlKN;Zf#iD+M!JEb0#uOf8unPc8h~uyg`N;RK|mLvE3PW33@}Tkq^7(F#X{` z$oHxZ(-&IP^`>7j!**GJY1H?F>D=$wjLhU4C}*`gxz7oqh811F?ZDPSZH3@Rffm7%7do2^6*$#rVTa} z5_Xv3u)_?GSz+;?({)S|YItz)#HD{9i)j%4n5CFXqfDt$);>mAV;gm`Ec1@>tlf>W ziWn7XA0Dy>6K`lq$Iz0Fp(PzdOFD*@bPO%&7+TU{gR$8>w4`Hrsbm|PEp>P)@9
  • >2M;bjbx?U8F_9Ytzq2UDnBe0^&KaB)Ox$Voj}1OzoErp_qeh=J_zmM+ zZ}4q`=^5iMH+rqnuK=bV@B3e!8rx1lrQVqORpI1+bevir;-kX3=4>YQ_byW@X?9wQ zrv5(6^7qQJdy(yX4}SQ=2M;}X;^AEj+t+lj`$b|KdmlS`m!VuU$g%`3I=)vh@jatw zf97aWDET%+@!8ajMMp#YHpkzIZ*U?$8&Tg${zk;QNTsd_b31;U$CcX{mfI-h4rQ09 zw=Gel)mqTH?ia;8hh?za{dXFCGEroXq3|EYpS|uE>EAN=?)U}c068|AeJFGa%UJ)N z2)x^I@_q5WH*J3{A7yOW{nX-?bDy7?zhhz^FD2iXe6zvPc=<*ZsAx6M*(bFUPYV=F z{K#RVEI~PqOm7Rt&4)j7_#HX&M~?$hE3cx}ockj~*`PVgwmy_<(ONLGL??dq9l0Gj zP%d9-6jg&+uYIe{4#;NA+l=v@c<=G+0PJ&p<8|?|-nGZCdp!B;$FG}Ztj-+2j{6k* zNp)TSyN+L1AFuNy1gV}mqy*w~z_%|RI&$Pp{>lEAeI$M6$$?kC@#SZ3ja?U?l>i$h zvI}IsMK<&kPbINUBae0<+RfQ< z-85MVso#^?PaVIG4^%p>I-U9WbtMjM%W}RBKlEz#DvxW(6+gzI%k7|s1@U8@T2|V` zt1i*~{|}5@sandBW|3A&*M|w_X#ZxZW4XCxE=E|v=ci#iOv-f{b(FhY?|KT1WYSCfjotOeO7HG7e|X>{uk4GjPo$Uk{x?I73cF3_GCiv- z#)><0?8P|8s<*jYv$Xz~8%qDeQ15ewCW20_O-Hu~4bN_bD*S-K@%pskzAFu7E1hv8 zc(fH{*+M&vpfX$j|JZsP=qj)C&R6F{M^eL@68be>&CSTI*S)k-)mtfaCNY^*dGlmv7@Z^Ik{t?FRB>Ui#wAfdf0E4 z*Kl>$dc$}32A!Dwxk-1W6wXxx* za%7Z!X`~hh`7m$vx#4yv^;UZ0$bs9Nb8kDTv$r}D<*`dFz=@hvh>6I8;_Xo&JIMFE zLMb=%RfH0s{^5VA$|u^YBdh9CKjZb8KdUPfRx5M^U4#;$LKq`V5Jr@-hR{Q}jnGTz zBlL@!5mzM)5_(i@h%ihTQ4kbWq%4p$-gJI9A0!$X?SdKU;-itJcvSoB~-R5N0kl3Z*T3oBeVMmCPtd1r56%cxCnJPCl1YmOl#rsz0 zjcs7Bn_pHkoz2&7N=!~paVf5@?(y`R?6=N>T%0yuHs*{QjXQZ+UwKBcszzgdpH|UEnTkrq9x;jZ2iUWrkIASBwjH-BEwKX&@hHvtQjNxvsoCZ4{ zS-Jn90s{sk_CP|n8#uyC;S?Lik|2DuPNeCNg^mBGu;R9kyVQ7es(7<9iv1?2!?fvJ zChn9un!DxUYISN|b*%dE*w(!xow?M(x?(SHWjn~1WPq+>A1~xRs5cZJtjkOSd^O}C zcQ1hKyN>eS;e*)r2sl1a0&^WeW~bmapnewMMV7rvcmU#+fthffQ2SBzz2F|gZYAkgIFF7k!K5z`GRO1A|rUm3KA;Tt_}DIExl?&R0vfjf4JUx|Fu*%h!Q9t$ zPW|4lkl1uZg@y#Zf<8e%(6}BLQ0vQM@9^?Vx+^Rji-FxH1A!)Vmr`_B=u+ygB9TpZ z^+`5>?&>v*fV#__O#`X6dfr_MT2~%~j0}u`Jry>;RldEblxl5w69|nqifT>zH z&=2UsC?xm)DOqjfE388h92JMKXC~R=xwZ>mRc{B1>gK?}gi^S23l_)3bB$IhI-ZpZ zEVHRn@=#Gd;Y1PSloA{-k1;k|ZXe@j<`#CC%L3hO5Y{- zyGx_o3*p3M7w%KgU8(d#dDZ)_x{l(wc-|tvtB$Ys>|#+bc0ziB^cJai7aH*q{Z-zR zNqtWMC~Oz^9sz|ip+XoSR0+J(okXP$#2sV&5$|){JGEuQRBD%A5(&I6xGcyCHVSqE zTw;Rj5Udg$N{uj!4HY5IUkTkpp1u>|#m-=vCDPHdW3-vXNYNKbi1>I`5El-r?-~*9 zEl!SYnA~vBBp05dtxDxh_8=<>IA92&EHWC)xSln+8hEQ;`9D55AH ztP1#q3CK7p`2iuHjRE;s)Cg1AK%04MF?}Y1J~M~aXWpSppP5k6XBg@By?t=5n#=U} z0oDNzI4DJZ*TF%p1D~aCt`%iHS zBjnZAATJ~Z2%Q4v9k52|TxB}eqa@B=0V9(fIR@3jm#D!3!l0Pj1k6mpI4m~17vTK# zQ*7!1PNAw_&{Df+&$dT)@7w(dX`B^*iW5L!oWNwK?6W}Qr+_X$MhnCKJ@v0r&1{-R zOnK9XDKGjkE#X6yUtVo8!5yTCGh3BDp;z9D87U5%VhoCo4~&(}U_gpjhC%@2q$rmR zBAKtK9N6~A{zta$IVk!xKZ9|`?d7Bbz6%mXo1S$BAEmq9Ob_nJqQDh7WW%x$oFGZHXw}+0x;J(nPHY8;%|)t&x9{H2ki}NUcOCS0t3c zi_D`Irhc@85284yiD8g0GXE)#;4>_?6Lq(SQ51g}f&Kj!59XzDsha)HG>K z%ko6>3a978pz~30eyPRgm-<-lIqA^+Qa3Zd)UC`f_3`Lnm=^iaO1?E(-x^X{ zLrNHXp8IS*E~IdWO|dZWeAsuMyYEg6DcopvETnL|)v=HghMv!_xY60*Tqw*wA6B1l z3#n}(HKss5WSZA>STSVlL#l@9rWYB_>3ES6+b|DPA5Qbggjxl;Dv=sg&!zhG77s7T6 zVY`Je+Cms@A&j;VMq3D@EpTU8Qp0EqVYG!X+Cms@A*{8Kw?!1fS_|Qq3t_E=@XCd- z&O%scA*{0y_E-o*EQBEz!Vn8#h=nl3LKtEpTyY@`u~7Jz`x}N?2)A4aZ(IoDD}?bC z!uSede1)*O0uOtrzhQNSa9{S#3O(>N169@1&M3ZEx<56rLyz zhUd6~S<&1J)9{Jb=3bdZ!zc31y|Sr>Pqa1n%BmVZ(cau^Ls-TWtz)F{0@0S{+kkRP!Uc+ z0cG?mqgNTd%HXUNP)46J_;fX(3{F%5W%Mh94_^by;5-#5-)L2}ez0z^UWqEmr?CMQ zXLP29&|MEuaiOg$gKxOEG}bIL`$J z-ZKsKQID;u1F1i&%RFq7T^q4igX}lqWKl)9?qYIFkb@@me-N>IgFI-GSsi(^5ql*_ z$t3%Z2`9O(_JT?Mw#b{0#GNL&4~e9{_mqg92r{`GBxk~tKT71;9}ynyF*zA|Z6`zb zqatHlfYc||&rjCTOSo4B`vLk#+7p6X1P1~71$VFDL4Y2SHVbUxA{%i-36B1JQm5>s zT>$6_Y4eKi1n9#jQ&;yi+zR=&i8}I0J=z511RDjE)niPbq$}&z=@RHV{n6xggQ+rg zJ`5T*(e|=BK>Ss~eqa+lD61RfJR!J6a1fvyWYZ7gf=%=WRL+knkP~c7l{<#29Th#J-|VQm zT~@ct8o@H-SG}WNR~S(MbA{_0gX!O-+dy`$xK{zcfV6u_b{@XFtgI&h&cge}9|Y?E zK-|582Z79LrOk?aMNk5w^q>yUTmb5~iQ5U}K9v4Vnl2zpwWO{nC<`ir0l~Nv$Be5J zT+*tXLC5-x{lkaLELaZImc>7VNOrmqTz~-lOIm`cRh7=Q`291jh?+m$6ljfuhFg7)T`I91b{xh zrmnokbzOjt04hquU&9grn){ks`qhKeH?iJS|SS2_FWIh6< zZxZw<`V(+*$Lx#^77yH zySXUw!g^LG3h+Rs3cqc9PME$^$ODz)|JeAdQ&t)Oy^segmAlaV3iH1z35ob^G$xm(AzoEjmlF6YyOxqH4$XoB)vUiZwGlU67Mw0 zv}KmGm9wk3*v}Nr@~>1nS64j82eSxO!u}zkOc*$KOZq!!&n?`Ldie9_7N#~4=Fedd z=P;LZ_?>fgnFr3*btpb}j{7grYou+0?uBlL9)%t{hvg*hpj6`Xbz`gQGNdm!hY3M< zox>#0VW99U&ta|5!{@NobJ*}XjPx8fdaiDikUmb{#d8?xIo#VhF7VSHHE4uSLIwdx zVHc)f`0Tlb9YsDrd2ZnkKYuRuhFYL?qTG3;tZn!??!*u-6LN%&=dihRnA|xGi*WLs zjQZTu>F=IR4xu+Gf+tZbEPxBy0R}lw#?&;ovk?JsIcAV1}Zi(}_*08Vf?dQ_g z6OE5O`r@O1x@Sk~-=_X29C6Uh>Y&+!LF;D|x-Ch6B4}o35}!}#+Mu})1syJqBpD9| z&Abwni|ixZfL5KE3kfX+?M&K`<$kyFS&xF@i34sbB!yp1=>DKwy{{Fk|AR=&F_Y^s$^L^${r628K5v4EMDl-*LLspJ-#FH|&ZLVs zahPfPn<#YFq+!q`H)+!N(TK&7x!-YYqUg@3@3a#GGfCf6eRyxRx_@tV?BQps)rYHN zsV42RnzX-a;*mXcrj%EKu5P^pU9tTDt2>3BNHs02mc-oxOfWp$scK>ZCbX8oPOM2g zF$&%*co4X~I4IeCkM#w!`z2eYuTYTrM~gjXN{jvHEBlyP(<5` zCT%O4w5@1*I(-MgPPXak4L{6&gJ<=do~}_xaD@MkwA6|0%JS&wXqmq%PpYx{S5wQm zlSVKMm^1d9HI~Jeg$@E084y$y1QNtHemPIP0i4tT<3{d#E!Vv_z{vfs<+^FNTsPmA z>!#Uq-85US+jjs1NTO5G;&QY&x_--~KD)`8LR~#CE?RKMAnDxu16;b_a{IenUw@Y? zW2XJd{xo+sutY?+h?UIa&(gmq+oNf3l z%T!+~`{oZ*7Z==MQ(t&5PiEHfi%K8jx!d7p4oN+nB0+@QB=zErr;R4-#Q&GnS1@>C zcCGnWjr)b&Wv4%3yv2A>SpNs+?=?Os%&d0$toc`rC1Lg*;h^PYE{Lz+=Ja`CW~VUs zArPB0UN+{88-?0@C=r8=rUMK^hOz zc#y{D-T1s4pLgTIlhe}={Mz*N3-Kk*t>0p9{)MDHY=YNu?A0LqO)v!|*1F&b{zhr| z6_a~|JZOR^QDSr=5v;Ua6f;t4%mEY}he=5VnqDG`(swS(1npwzhycGW+4k=lNwz zO6Xzj#Vk3|S+QJ8NSMO|Ne*AS?jBvgernwm-+9$(>6Q(0_qq$JF}X;vr!oI}QOxsTy9Y=Gh%|3`*6CAPj=4 zdCU)0!walxct?2hSm~%*NRuJ7n1_uFMyZ-)v`a-stC}QKEsVoZH4#)TbP`JDAtYm{ znum(4YEu7+s@XpHKy=LEQZ)n>`>{z=shTvY8iI~Nsala}fK<(+N(N)7T9r!G6hzfv zNRX;6B2>+RB;b<;I{~)3hqTo{g!dJk)Jb*n_rR~c$mcmyQz>5MVO$-AE7wZnDzivt zlL;5k73&DX6J3h&6qN{1?unFwWVSoTWo;$af6Rny&x&zLTZHFTg2L< zK=>9Ph-XVIuIc~b8{T??o5(_bd%T+O(u_MYqA%1$IzKzBF~eM-r>Gt*GRjCTJz)b?a{Z(JqgzI+ z>mTN-ExY~EhsUc^sYjVlJiz*>HqejiIwrtQ=27jzAJqo<(Yg+vH3K=XG6~xBRU+K6 zc(ktRZ-5D&zkgKEjy_t~*yA{WB_e{6dQ@jJi2OXzxDLQ#IYz?u92tUm1WOeAe3a`J zhGu>V36onR=m0ncQ$xFooofBM+`76)!)ey3#E$?T$OSnGd$f+-^zoqfGOnzy0j!sIKr=N54E)-MF}Z^kj8# z#})Q~02lhf>{_wL%&V|m5Fl>kjGO#h1P2B83LXTwnsJ4z$l_iBxQs943tF4gOO#jw zvhM(?9ut1=L+S-!Qy`a_^+B&Fu5W0uW6tbXp`8F1J17}AQXEUl$Q_@XN^r5#s(L$y%J!!&O#^i_2i{t3b@goA{82@jIDT0Mlm zLMV~;&J`>R{=y%2uIuWmR=19nDlgvid$i#Sk51A9mB{Q=>#uNzH`0BD^RMCVE2&3F zy!nTpzjCwAn;<{@Y_O2@2Mo&lYo-3RxNZ}FSWMCX?ep~Nb#+R8Qg5z2ohoeRJsZQ< zndU{Ae|P2SR~MzftE9~Hj;0!vASLM!bGxG+tqrEO>I`6O-2x+L0mNSw><5@NTV-)u z>pH5u$D`D2o-+Va4+`!TJP5G!->Mr4aIXls;IF@^Oh_WPbGB+a!p*D9PJqonGyQeJ zWkF7`5n%T&X993%=EmRGBrE+v+6-`G2JThCetaZP0+p2?MWLTos)Nrj8yL;<0u)2X7NSP1+y%Q&|R~vXR};dg$&D zbAhN0qY!Ko%%|Ge-w*^91p$S0DXERiO=Kx$K#Wp)RFJJrn|3B`n1Z5b05(5xa{y~l z8}^{+HGn-5++K0p0s6=WiIk%N3m3_U0G21X1+|$Jw|HMF-$g20C$e?{>?ztX6h&78 zY{}pb1MG|1uoi&zuuZ$AHmzyYavaE91lV1*VKk{W*@uvB$smMuOeP_u#~N-Seb#_W zO>Y31rc_(%8&9R$%KiK-o#&SQYx`8&tt9EHK-&h87t8WjU&lQfPUH7%O|_+WkR!Nt z;b+sMsV8Yjw`s$m3EL;rj)d}?EyueO+Lur+=SK#wVRnK3gpMY3FrniKDAW2?dv8*y!W02Q_Tn>^8vN6a` z6ZU03pd-ktAZ+bQol{#lFB{uk<(ExUsas{Fz$U?b>Q))(tr+O7*ygR6<*gW^QhNCH z8>A~RB290R(%}>yJXiLbz=+UMrHzc3G}XcLy+tL-GDX?kiZp#2XgH;e$WC${84Qf1 z)sEnCbXZ@pI5jz09MU7 zq~o?uaKNnd1}Hqxd^U9}FI51s)>~;r)TnLut@=F=a9NNOY!vJS*lNRd2v!LWjZ~+& zb|_a^PMR*;z7+$$RaSf}?;NGkz3wVaaa;M3$E{4aVrnhEWG$n#mQh;ED6PeBtd+}H zE0?iWE@Q1+##(s`(&hlX#aej_qSpYt1>D}6AArVXdUCca=cvjY0&o~`3jq8B+%5qB zuvY$o=t=K9Ku>Tgta;iTdV24R?~Z}`n&cwhP#I<|70~S zoyRB%n+Wsg>x%p*RQOM*@}IDONI}r60F)_4{s4sh9*#)=^gNn4k0RjDoJSYXx$|h^ zJc@w7M&2gqUg&n{QRt!C+Q4tdxPwqe?1*G6I8XnfyUx@5^K>45<$1aeJ$#;ypT{qr zr|;+K`guIld3uk`Y`+Tyy>WhF`o8n*IS8LVKhSqRz2iIvbRHF-Pk-cm=ImKAn$F*{?(BJ9QZiI* zPJR5Av*(AaV$UKqJ6lG|0u+t~6J94=Cgcbk2|LeYyU-58D#D@j<(^{S`Gwg(K41Um z=R0^+7%xXjeDwL$_cDLkVr zDO$H~9p`1dmwx+=dWmQ*=^;0=Jexn^PGlGGYJ&>Fd+HF|m{wMJ8Qjh0iX zlfl92Xrk3>ed=-a`Zy|m9F;zfN*|XJAD0pzml7YBKYCo6BW(^qXMhpSk-bSdd&O-B zP!Q3hiXH+`AKU@}^}+1|(A?wlLPS>rXbSGI6^X&M>5sRekciSAm(m`W(jJ%69+%P{ zm(m`W(jJ#d{3n!2BsKsc)k{3%M6Zx>{1P3zM87UEwBD7jO19jn<&sPTs3-`k3W73$4q_Q% zs+X_^1iWzxzd(4MaG8)JY$WWw#Ds-*5LOWm-CfFXPP8h!DjVIOTX=K&uRc46D<^Cs z%+J*o`A?|upHSsLVgC@JcaE#51Nsah&{%bLeyqo!*@J#OLQw%^YrbPPzmtoOIO>){oRJ<<=Dc*DI|8R22mD(e53j zo6<8tN2O>~^Tymj-yE9?p0Jr)$lI9I+|<|68n_{~sXB_Hc&@RWJliOpS!ulpKy`xN zo97rP!Rvy{f}CKZU?-5KNL`W8Bj^{5R)9h7czdPewzL0NQ-jQ9f}o-xP%7?okh%@V zUa|aOoVF2gEQ56eVpNXlKd9+HsOdkb=}+_w(BH+B8GwL3#A;E13h4Ltt6 zdiO)T#M*!LV@gG5vV_#2+%WK8rK$&9X4?xK8;fVB4=P{Le zECv3|JO%=to5wunu@3laHu)g&vy69-s^41oX!__`18-b&kwq4W#dy#}?)> za`=_=m;m(fJO(h&WS?i?=Na;OCOYztlXr2Rk)CIu=QYkiML|$i5YQKou&~!4EEBkw z1=1~s)KMYbV??<+(;@Y_jZo4y5#|YR%rnIEjPQKdC{L5kGlKI2ee;YV;dR1gLXNPJ zuydZ#gLV*B5e~7+mCLMjwZ98>Mg9{i{3lfTPuM@CAm~*9$`m7i078BbN2GsxfdRO{ z0KlKQzyLt!E-(NW7y$TdHu)g&w-V06-U9K;cOv(18{)>xWE7)?>KoEFE9WX7=Q~J0HC5Es458Piw6KK6X*r{7E`@| zrU{z}^Mp4_Z9MjS_T5>6KhVM+_QIm-37FLf&J@+^o|S6oeP=YOn+BUxsa`A z`)X`8<}YLlsV_DCA7?M5zW3Im+~Iqw+~Cs30QDB$;aXzqPct7iU%&pI_wyUxn~}OZ zVjI%mRB4{0T+|eTYsG3M7Nz$k2M(79N2!AvyDhv|D~ih9;yp zGBqK!$k>GHzRk^qt`m_4j}5dBGc}%43@Ieh$uSH>t%Tu6}gz);={3 z*Zq+E_e1j94=wDjs-1G)!0Up`f}CKZU?+g{hU*Zl5*%8$b?e@}k38v`1ovhB$D&l_ zzSK<(U%hWb!rvVf+~BNQky^q6%BB`1vHC;A>JLR3QyujE8i)08PIcoiHHq~Q1Qi8A zRY6cDprc9b1V$;?$)xOLQg$*aJ0W@oz)s-i0PJK^c0%+TfSthY6}Mg7QE`U=>;!HB zz|s%53&1WWWfw$OMk95JXw=s?NW@OaISgPYld_XZ*~z5rWKwp5{Nn&7GAR?8tgHW6 zYEr`r%%>)8EFfL7v4C{S#sbn?n*>OIZ4w;z2FJ3BPx35%`loKw^}%96oz#E>uL~{< za)OP5od5$4*CALX;3}SfprTiAY^iQ;cwkj(MoOKLQh_r7N|=#$W|;dXC}c(xWkw2^ zQQv2zWs1)MsBlJFn2{D{)ORwj0VrUG(VdZgDYgq>&?MD_u_N^`z+fp=!oR-ozex&2 z5*4d-afP72t9hvCp}0pstI^-Ohf_1?$Y899#eTr5!B}>IG2wt&7vSX=GYfn8TplmQ zoRJEF*9DgaIl)H3P5>3cbqG+FHas)BvoynH1%xcfE(8rj)n1HshF@6xGb`VL9teVp zf}pA(C=)nv2UZTF6b59624sg6wnJ7<^bCNN!_5Kce}@!D^csMb!|fHfUEEP|hX4i; zZUMk3!0iGU;2pAZqKTE`ALSm|uaU?&kaHMdq<6^5cW8WfXnc3b%8`E@z#Hz+aPQDa z?~tkQuz`d0#$y4}9~(G`H#C~iYg;==zisV1G*f_i!5aW}zXR{4A5%>87}bG;y!HK= zg9lT;nEDUA9PF*2#UT7((8=S~geK|Z0})5(&PU~@p0L$Z8UK`~8^FlLAfF|itF;5yL`B4LpUZIwV3;obm>xWk~C5;UI z@B*epKX8eD#D#v~LO*;-D$x&Itvk|B?RlYTU#8}VZzx2Q-sJTlmxCmE8zZ*U1P9=f z?wFi_V~0fK2t;rNCf1T(;O^KJ6JFx(lJ1&b!N-W; zWkm2ZB6u1Re2oZuFA*Ngg4petT1|F6j#^Q6KaN^s_BxJQb@DmHbmA=1kQ2fCfa$-k zf9WZHVEi?$xay=H99G7Ei&k%qR%@x!p?E8Zwqy{WP(w3u7N8`VoXP7!E(dXiT5p`U z(*%=qtRu*(AUfHJux4?sxnpoiNsX;u{Ym8V%h zpodShdYopMPqTKMW?)YnM|-o?`l=4nRqG{Z>v*l7(QP*D(66$Es}`)9CBpf@ZR zVydSZIKq6X<1R+!#vf^KoaTi1G^c5&>;Igx+@>gVZCvlhto{AquDaq6?xHe5wQLla z8wgaq=mN#6_`ej-RW^?8;b2q=bcxG6py(N(!x?jMYs_6!?k45#6}Mg7QE`WW5g)q% zu4HZ(T)((MN0X)5P2N-8v***~tai>}xIS^&m=-GYYGKe%ht*XbPU6Fdl{|DJOJXeJXhMNiQ95AMoLZ?(+K^d?szsyakf zji_qcn)$`ETSg!F&@CFCHr^}FJ>=34?#f;y{RelwRsT0Y_5iTKnCe!_)<4&;-%8Cc zlfC%fSmW0vG2T$kc(FtovRGaRmSEj z*Ubo@z8ac0;c&&$$4w}^>FW!-1U-U&vn8{F)q+03kf2vE+-(%lF6I+q{%SUR_9}{I zbgnWuS2Z?Q-|~g!tC?SXYf<`*d+N{Jle)W;A1;>Plj8VS?z%A%#ntkgX0A4Vkqaco zR=*R(z&pF3=~d~~=?-+5Fg-PnOvktAm-SF{3pmeJ083uBCL5Hk> z-1lmH=2O9C&Zb^S|F5;wRBNg``^b%0jaRAc|9DUAx}?n6_egrr(HzlQE6>*}mehZ_ zR!)*e`EEtAKj~;>zqjgIN=-nnCH=QI8n4(5VYS9bnH$ZIcuhhyUspU=y=MhIQdi_Z zp~8PcmH&kOLkfc4BXwm;kTn1ytB1nrpB}-J9Kn;opE-i(fX*Gka~#2Qz+WS86Lc?h zJM<{@&=EWbbit7utEd)H-sW2>k-qB)p5zF#0Dk2WJPGvh5j@EeJjM|`#t}Tm5j+O+ zj+1xs2oB;14&n$NPf$@1R22kt!>gyT7bp|x3cgZI^$2dBu!%5Fc%!4t54!lPxy2(a zqes#^jJi-HaQQM1gp^Ft_kc>_PT7vbvxc8ZG_JAl0{)#(U!vWplu zfN;XCJmF@Za692?=n1zJo~E8~Tj6Q!3Agryn|s3TJ>dqQaEnP_sYfSrdRc<*`|+n^ z)aDaz^a-~bq}eCj?h|hK3Ag-&n|{L0KH+wsaKovamY;CbPq^(T)OcB*up+1m%7TH^ zB5kXI2f1i7xQ^D;{!%ZK<-&}XsRD)8n43$_Z zM6cox3o!|h0S6gzkRb;-n*q@u$hi!N0f1Nl$cYTta9X&A=iWqlzy@OFibxTf+9pzk zrnd=KR1}lGG_y^lG0pDD-&yA$0=NS*gYD`H5raGAASb?2Ea!T-8SAt z(o=L(bn24+uC9>@FB?@{r!B&rLQY!5cNkY04>A0?yAmaeq&}?S7Z@vI=@?iw82J^o zN;sL{Pm6at%7I6nAc_Z3Jc!~!%mc(cKn4_KKtTop#9);2fRG^;ze~tm2QeNH<5Bu1 z^LvH$G3}?OHcvN9t0q_g}+F5e=Vk9%P_C{M~`R+Td!Q>yJc@mGE`rWn<2`(YRB{SgA~hah37< zZ+AwvRPSiCuy>`@ccr^`)wy@ow|Axbcct@prRH~~&3C27ccsO5r5p-h0~oG%)%SNb z1n+9x-jx#G#Q@&rUZq6Kwt=dkEEq_=J6Pn`NPT>ps4MkuhO_7FLiY@~D`8T9TgPqw z7hgR2{PQREllReeXKz3D;-fEYx%kaD(|?zHnj0iba*K#G{6<8Y?jcg<-S{IFenkPF z8|~!tU1po^C!Vbgmw6tobn%urHi{C5_jP(z96!kr+=7v>0mC8i-#fAOl-ir%6%A-8}aS@B))eFU* z9@r|JnLc`TJ<`LcUHU5*`Sb-Y9e(It=!6(XVu1n2NU9A5HghCsBy`fQf z>r)q-3Qt|Ur{P;qUF2h#`jquWzClprqiYv4UmYCaJ93PiK5urB56JTIF?yWtf9m4G zKE6|f0>+DKNKX-l)?9pAgWEV(*f9C&`?~h;J8=KWb=!{K*Zs8*E-Zd<;m;Nu>c1p5 zQ%Re4J1zJBI_i#MzHfc+R8Q`h$=_?A$!1}F1TL6>rC@ZFRgWI)Bl|?y|r@Jv-o+AI*4_)TiNEt7O0-`tZ#47RuC0#XbSe0A%eVZ4izuRZu9li!K-0`ofM&GAW?B z{v&)<@B^8DL^Pp9>q}LXoG_YV#Is))k)Y~oCeEgHTShlbE<42+2&X1X+qQoCZs%w; zs*D6s#Z*gGCQ3=82+*ip%>IV@4}{cywa{ZouglGg)!{N#mz%RoG00blA+fPngZd{k z8fSQ!4Wy5rL7HR=z3vu-wwK+na+ijrj5*o|{Nv)rA2UP#F>zclebku!$JGAF&QybH zzLoieCr#bgdC#foFK*kn|AB_-`?j1s@bdJj%_MK4z=R5aWz$+hB!0H zr|*BDY5K9R{?^H(PjqcPdGda=Lst5QnW3mc6O;tzXmc$|BS?~r5|UV>5@c9Vjh8Z> zpHWwpkTjw;a3qC)hmjpXG-FM$rJ&6-NdsnQhS3X&7>_2L29FpkUyU}`Dyly)BULK5 zs`UgWh69F=J~hvDb9|x;O>5;dN|2rk+78-mOAuep(}-w$(vxI0BD1fueR}r`yVoDx z_$#OGSU=Lar}L3eo$Q_bn(4z1Mn`1h#D+D!RbX$Bbu)Z0)NJFMQJ~5kSAl7h=(~)8 zw1&^kXhNs{DwVZ8bg?+dG*}{1`)d)3=2oP#xfS&zsjT$t0SB7@S+e{M@2C9W!lwVk z1pG0+!&`z(1@G)wYQJa*8NzqS;V@wee{LSd-lC>-|nf(*r^9{ zjDZHUi)DccDng?nw9&~7a#y7d7Xc#${#8sr^Acf3sz)G+WlX+NB2JANL1?t>oTXER%rrXj75wVBT7+@gxzc z7{*V;t6dEpaDPv$>Yu;Ax_A9UlOxm9d#3MRzkAs2YIakhyIBg89v2^-i_Ys4|pC!w4g)el_2u>kS=X2puDRfl4&?m z*%^fXb~|WZDTsqb8$1M1NEKd=f9A1U>LnD;=u@|n)kB8_%=EtlDKh8afULI`tTq>(tLeUiz zRKX#}=vG`Yk}J}dD(qvZR2597nlP@$`%Cp$D}`I%L)+3%+O31sph&5CCj9S{Rvt-q z=}&sUNlg12$m+`U)_o_Zzr`!>c27TZXJ)d1VtC4UIO#Xqt$O%w*q^ z_fnaVD}yGMEzfxRn+@Ys8OFs##sDL=LDWl#j1r=KsY!ImE5?(tVm#?(kW9RjyhCtF z&7_Sm2T9XXYtlT!g96nF$Fxx)QAQ3Kj3PrJWH`D&i>Id_oti$f<@viOU+fwG=G`xy z!PQYbecVJAMojJtl6&4{SO!3JxXO3Z%r$-4BxHMyd(vwhg_`cPlavs4L68X=(xU|r zVB`}PgX<}lNY!2)4u=620`Vphk;2|Hg?r=<0CuWO7cr9OG@S?V!zskb{OdoGdTg<_ zOd^hE|4C90POr6lYjx@~*;lH2Zy$N;q0W;BrcWV6&DCC5HH5b_VXVBccuMw!XM!-r zz)Bx2PyZ6lPWT>b?NI}mu3o8Y5lwenZ%@E)N@enhT^lUU}7 zoe)FSGgi&FEdSs_En>NyEFX_)c7Qkb65_oSJ8>||AX!C`3Auh!!pyjT>(T8m?D@?9 zvTyg1?Ju6B2J+Q+h<4@>Z(^Zd`XBR*Q5hx(M9veE`}>)M^DgGGWJ#wOJ`IDUNV0lA z=E*>u=^uNaV6|Z)Rq8j1mRePcOEp28XV@AD@2ibMna4^fymE^oI{*Z@>HDX=gNjV5T8!t|ntd zCp;1x1sWfk`5E$=pwZUo9`PhvJ7wmGro*w3!B(*{wSbwEdQ3i%@7ZdPk-5*lUnFUC z#^?5#`|3Y{8wNFT`9<}x76$EiuWx*FKf*G9i-p}p#xx_JAg9B(u?2m7n!%w&X^>Ue z7SQX7eaYwA=}RPSJGkwEUwZ*FnL02zvS;1MXPS|DDLm<=#f^UbCgP@seN~&_C7|JZ`RugpxR#a$@lT=cfk;ocAXfXXUabN%EJI%_u|(_4;sFa z%8K^sv<(O$)&5~^$@{89UFwG{Ia}4#2Q{{k3CHM)^SF#q#SzH*vendnCHAo=q09AN zP{E(gXx|SLM2%JP{$lIo@dtk8?ydK2`RJ+L_p?nn{^AiOYpu*pHMI!2gv0Nt7;s%8 zi7X)<pQ|-W;Yw8GMnB*c)Ws%+KT!m9nmOFx(AE$<(Ak z<0tOB`@j>AZ1~LBfuqL`oILQ2YS${Y(wEg_k^R*cG@}Z#OPQ$hk7hLBW^~eEM*B~O zN2RW(Y077pN};v04e0)mmfEkfschm5k`+JmMJ}W@>JpO}Uqun)vwbJYA)5)$YgQA_ zjMfh`8WuCdm0n$@FfS9!e39!MA=XSYeblaeQ2Q{hdNObBgwZ+*sjQ!#oZ7l|`Wqvq z=hy3LHw-VSjYXdV_8S>_{A6J2|%Jti9AhPvb zf2Nug7$Q2PYP2(zd((a=bN_WS^dM#KS9?HhtV}onsuc^Oy#_>+3F6B?_j{v7Oyi&; z@ABq}ZFm3a$mI0&r@wGt=Xbw56@}R@k?j+8sI>%u(wf`OnHUO`;)qz^_@8xG9&x6SRGe_);T1^m)!1EYixTtj~cEtl?58; zvCb!RYqjRZ_T<^v)IA&fwrA}KQ_p&%57EjAk!#j2rs>NZhJ7_YK&h4|J~E4OP$jvUz+?v*OSU(f{sY>jeTDJ8hNR45=Y~G@3*oMx~|$rs$BRV(G;=i z0<>K?!Bvo~W;q3`&2nYcX8wtAuGyS^C6!ITQnSxULrvsaW06p8{!5k3(K<~@%|~^u z;d8Q~hR=B>L8L?ovqk^k%X^#~KBwL{d`>G>!{;)2A6B)SRlA#BTC9Bvgyp7G{KC|9 zS7pnA>FH;x>(+1Gy3aW=gDIl+_pR0`Zne(`-|4gU#%1qG0k}qnKe|~ zJ&676Fk?X9iE4p?$VEY7cEQM8AdHU}H8{qRgxns?n?N79$s+Y?9c}uOIs@t~3?i2f zkx8=yy4@&yU{d!fh7n2|6H>-Ro?XYh7>3I)2^CjG)q=r}aw1&Kgbbvo zIM`gqmsT_nbu*5!Ws#>{x|PyQ50O*5|Dc@)w_T{P;lUYsF5elDF1Z9P#X%I~;v+Y; zbYAU~5w6BA;gVIkmBw_bUERKRr89VK)7dSuZJa)`ZR@rp>#G~?|H|Gi+%$ED)*$jN z`bll~B}m7yQHE$RA*dO%fzc>JY{#7C_^QUGWJ0l@Z~Tk6ZqFG7ZD(VQbT+3Ngnr#Q zVdH1O7qTDossWXhh7GBk(!ZKopKM2(t4wSUE8>z8aJU<<)+>a@?0d|Y4T?7 zGs-HM91Fzl4dICsNl{?jq_I`Lq@wKi7ckv9IGG&WY;$BrQiUh|XTf|RX* zLXCSf`@e^!7aCRsGd^6oeWZK1*)g)MMYp73MU8(mHJknLBM0{HyZ@PIp8U$>i}7HF zYDm50iMAv6OK&zUBtcB(dy%6{?<&#|O#MB)ua<;*e!X4)Q8SsJn53^jQWMwBXuQTT z2W#Da`#SerjZ5Dd+|#Nx)1Cdw2cKTJIsIS3F9$a56MtK^SnA+cSN;1Q+s*m%Gt+;% z+n-x`>4?2{)+ctjZ*3*@tbi^)uNKq5CY2)popU7o6IAEI5IMcL~d%NnUdv<9LL6{JT?Di>ms%+xzeTbD-ukG|T#rN-&~2aX*$y8Ve? z*!Rp6k&#@P;@I9Q_wp2<>^|atW3qa31CXxi=2~*QjhkiB z9j@;t_uO)S;=9u7udeO#xC+Fo<(7-|^Cai$CW&o&#i#1W7HjrpwcWummaECG*7Z9}mq)SXEs?Q2imzxzRq=P=#h0bG>CF<*62e>B zEUvZ12e-DcnMl3UDl-*vGg@1hJC+a99*OOoJ*VEux4GSX2|Hpgl3yN)%h{w*WQB#b zwYfcQZLXp5Hs2aby#u4EYla6(d=0V|d0$++bs5AvZJ~~~f~#y>7Sf~2wxq5V?o`_f zx1()^Tg>;6rw_b%^uU3A_a8X+^2-ON_w7ElO#=CTuStsq*6;D?rQU(@b$>CrU z#x-~)YmzugXNG39Ej6RFjbPZBo6)|U%JSA5U(|K-vp662!W4KmCWDwz2uw0{l%~n< zQYhtyUdZs(1?~XW@^vQQf>GbHRu955sXcwEKHMOiOO_s8Gxj}FsJsN^J;I6U1fh?M z$`i$9-`cnPnfveAwtd?p`=5F2`1)s<6;)qR)eKHp-_7GHqvt+5QSXM?kb1X!b9kAX z7yCvNmEF8Jv1~Z`HyscU6v0&;kJC@(qq@xfG(NdBr8_U|WVuumt{j%8>cWoCO?=5_ zFYQ{#LlIg>V7Txaj4vZ`w*t>3ox&WLnO!~+uU&KHoVWZ&UDC3+fd7O!}FGjb0`NQ#Bn^>-eG<6t{gejs&oz-~KtA?TE9Osx;y-B{uq3!p2 z{^{?RKTuq{w8hwJ%p2Q`g}|aodtee-?btHoa^nhN%Tl8Swpd_`1-4jVOPh~wvD_95 zY_W)zWszY?Etb@3Nv)RDYDuk@)M`ns7SU=Et;^IM{-*fUi2CMEwOUrIW#ug^Z&`WE z%D1}0yhY|MGH;Q2i_E)Kd5g$fM4LslE%jk-megiRZ6T@6lG^f-Z-H$V*j9)n%WbpV zHp^|Z+=As6EU;jK1xqTlsUz{H&P`8Au?34PSXRNZ3YJx{tai(4x2$%{YPYO*H>cgl zw)@z2AKUI@+kNaZj$Lg_067-qSde2ujup}s2qHj4fQSGQ(cyzZ4i-{W1UnJwFv-)A zFuUj&eIk<@KxzOH4I&yuG-%N>s}79ILR;t{5!WN@g0~2n6}&~rtl%v|_SC_a$mnBb zPo(%**;C1F2T2D>2O}Ms@W=#_2_jQSQy?p%BOu>jRo|4V)`C<8TBMo@Z;@&yyhW;+ z@D{0NmfTFx$Eum|J~o;uvnR#Jw))ss%^Z>}Le^edDr*NVBH!X{89hmsBx{G)RD@Wd zj2_+s^UKwxsr6iI=R{o-%6XQoNl;oU>jy1TW)E+XvUhmPYHPQ6**i&YTwAD2W?y2j z5$IBdrC5p5uo96{kaCdDAYDPagH%l15X~BnlEFdTo`Ty`h%Rc*kl|((a25QiYob%p zIn5zTxM7+@rKOrfpe1V#!CR!}5WHn+4#9{1+n2cu?aSPS_GRuu`!aW-eVMz^zRX={ zUq%l zf>?&^4;~qvR*x|Gl~b)-nqMR%2Sg5t9FV4fhyf7;h8Wt=>G=rK2GGZ9KElgeLo7n- zfwu_h1>Pd0kWMQEw1`&CDI%66g}_^c6asG%QV6_7NE4mb1lU4fSf>Oam%^MEBBdaT zSfmsLZ;{duyhY|^a6~LQFI5w91)80mo}Hj2X?DU}l4d8oTsXu6JxPgJZd=IJTqVf@ zHAy>{YHbqwz(Nalt{8R>aV?s2U5u5wBrmAGX>vK0668e*+5t~HKpG%KD1-(GlL$2s zqa7ga=<@i1v_t4p8b%_PtMP-kT#X;R1#0}@EkfSED-0T9Ng6+Ri_rMNTZG0BKANVz zMKWIaLj5_@k~OSd9#+sIHGc3GsquriNR1zSh}19=iQZ}$iCB`xugl{HT9U>OUSKqC=D152tTa>!+Pc_2f}KNcFuw9^`l-RY4E~A_k-qki&$L&Cx_Sy3a=o$q>O( zA;Y**Q414?|L+U z;9Y^{Pv25+(IIZQrV+daYX00+9*&SZNpD$4a~K zKDK4q^)AW5(>3Q=R*Td@!R2%bV##t${Z=z*$Suh2LjzV zc_4U;um*`(l2=+H7MKr#QY1+hXn#b+a;49HG-wLw%5L>2XhdLbdr3MLlPtYTsz%}OQ~Qf#(5KBQTry|&!`YP1ca zwm~EfqP9UK4Wc$dBn=`d5VfgFn|75Ym8|YS)mD)t8nw91q(&`nGqI?} zZ6+4Age?$_+H3~Zumea)^Mo64M}*V_Aq%88Ak~2MMHt!CMMR!U1Va;Hu@=E)h~P3r za2XJ~4ldV5gg=%8?MkMmoJ!LMJT^j0Y|&0#F-w#Vh`2@SIlNn>e#5&d>NPx_21B4+ z|A0qH7!gZSw@I=j*+0De3&a9F4TxB-rUAU=Y8t?YT)CM6=7bN-w=e@^uTSq-8rD10 z^=Oh%P;Lg|>NGpxU4d6?Mcm_j=rA8T)RZC1m20jHc&>o%hMW$(#mniyTdr1Xcpt0P z8s5ihn>E0i?S}YVh!~OvJ@P_wAczGKE3Eze-+L}C3r?msGN}!uHjvstY6Gcl(Blar z|2E495ey<2L;{Ee5D6d>hFlV)q>%PNI6#<0P!ll@2RR(%a4-(10q`_n$PEzsIJJX_ z<+kKw^T5tAN7kr&+$x;m^ELp07w`A!A-m+wNEJv0l9mLWE z`q);D8WD?-D&Q?bs(`l$sRCZ(RP|N`v?OT+UULm%fl>v$C50+vcBEL6JZ%*-o8$=+ zp0Ds2t)E^$W;TAT@xjHg7liW z3$lM?SQ#FBB5sz(9^TE;*u%S7-v1Mc*2x4$F@dS%JjPaOm#EapbUi*hAkEduGT>!u z5LYJyfp>K>5cn7Z*$EL>r`_u)*1~aP45|@<@CXz}1_Fs75JVt|KoChFl7ud*88sH` z7Q_NIeDD_7(x$UCFJ0?(*;G!EJZ)egu0Z1k?+WbQiMSqb28p;j&9*Un9MBa=0(Q zDniLaD2Ox=X%iL$a=Z{>A~Zt;OBBHpA;=a+37zB-qw&H>qXHsYgCNucQV$sQpsNW_ zFp$9jU8e>E-jbws_z=-ja2+iLx1y!sR#q zfJEG_7PNslk1w_-*vT+ASSI86hU?@bJ(-bb?*ISm?3Rdy;Hmuk-!I#iI*b z_@_gwH@(ClS`&m!OfO-`oarSHnKQkd^~{;KocGL`7H)vZalK~(Ew?29k9VK*yCmrp z{;K)~y^X~(u~TPc;$H|`2qqiFPKj;Lgq_LiTs{dWlDB3)`tEZQe&(ZYcTq*v= ztS4DdvN)d6fF~86RCrQRT+g2HRAqmrwH_6vvM7~BsVqumQ7Vg4S(L7#bQMk4h>Vov zRY@vJ1uK%Kk~Ed1sU%JKT9Iy*q!8Z@Tc)N&>Pu2zl6qfyed+b3(3e7A3VkW`rOE#4 zf;*L0|52kaUA}acrK>DmW$7wQS6RBs(p8qOvUyc5%d2uZpsKQ*D@$Qn3M*1rk-~}; zR-~{Zg%v5RNMS{?70Fg4J1p5@$qq|)ShB;C9hU5HS$*O87uy*Y$F1~ZSh|L#YgoEQ zq-#XFMx<**x<;gHM7l=g$B1M{Dhm3DRE!K;hMEfNsR@)th4oYjY8mP+V6jy~^@a6R z4{Ajv#tJ#cs<56mL9NQSDtb^@PY*z=^E&-vFVk)&2%^|pR!j-~E3B7enm7e~o?{Wd zAW$Oz3UW;Ir=a;$(EI_lwz8^eRM0dkXc`qXjS8AZ1x=%ZrcpuDsGw;CDixYd1x=%Z zrcpuD2sa(k3r(AXrcI$(vdG6I*Kj z6f}Pdnm+|ioI*)))Kn>GsuVO;3YsbfO_hSCNs`G%E_46@_8NPP3q(Sy0d{C}l8YHsEO+@PuDa+khwFdIGK|;CceCC*XPl zt|#Dn0J>l0Aem&vW6MjA6 z*Asp{;nx#>y>S%TwG6V6op?<+9?XZMd7!|n*r<} zZsWjFVxD~}3eQF1xhOmrb&x4)1;*PA)P@Ky3c*DoxTqDls1>*<#1@6vqL$vG5L?vJ zTNG45<&F?r6k?0`j8CmaO;9A!gx^_FP4X+uLtq8h}MY*u2BO0Jq$-sU|>C6QDR!%*RGmC@mxStE~PlEJlomOtYb=*-+GMC~7v8G!06c z1|>~{lBPjP)1ahjP|`FgX&RI?4N95@rNa7r^47#vFTptYYChq+443qhmh_UQKuJ@e zq$yC+6ewv5lr#lOTEt750wqm>lBPgOQ=p_NP|_4AdEByA|8YU2B~6KvrbJ0oqNKS{ z(p)HM^)3ngC4s*r@RtPsl2-4MR_~Hl?~-tho0_RvxGo9TCE>axT$hCFl5kzp>W#af z*-|J?~>qM61+=-cS-Or3EsHZd39Bfx+O_1#3iA=B-ED#_mbdV65LCIdr5FF zX)P}a=Oy91q(!_WJmXGkDixkf0&+<}E(ypb0l6d~mjvXJfLs!gO9FC9KrRWvB`xA5 z9YRXNY)P0cY0)kX>t?pqA&N#w;Ke=M^h4k+3B0(ETO~v;Zsr!{wpxO;2s#Xa)_m9rJu*o=par{Kim ziN$>6A3>!wK2%;`J9SoEod)ou}f^ z>kaLxRz2*?^Qm@wYTDR|YC}!p-$?%_X~K9IH*EvQuuepAYBeX+c-nf`Lyy$WNyl{C ztqm649=a0tXDzO=I?&t;IT`N4VZ;C;t4xFMAi^7*X-q| z+Q2(KlSg{%u*|n6_yS_F{nkVaFGr@5!T(JP54-Zs^`38mwhp)EBK!-5vfGxA&}>FA z2|`0EA}yU;NGA+ra8M!7{uK)+4UZPpmbHK_x>{3TWUQ9&_46DzH(hG^uGy&NGgtPg zqn4j+PvtY86blV(J}EcwJT%(Uky?H(>|2*>d9-2o?_OAXVe!QNwHIBqcJA2PbDn(R z_>t^G$DcWNp4#JGUrO=I5a^Yus5bDhKcQ92$Jbf7vxk*!;icAL9*>UL=%WL8fhr=% z{?_sYw*frT*<{$%wbp8HAl##f1%RqBZczubX#miMk?wP7(T9mBLYbwg^PH-h3achL zu!us~NB6m!AXZhpq?%(?%TMF!S#t>MiZQ?o7mWPWOq;U-{UEeq%>q3ep&Ww%>&U1L z=Bi0!&PT&HHntiFvHAE_YC6@xi>IR=_Dy=KhLN!V1Fwu@YhmHUp`9ld?|$&E#l4HS zf8f~rUS2$M&I?QC&>XJHs}@2XCXGG+c+_yQWnwpZ%IN$nnVGDk3g4+d$!s zO05n3jh4ZhXcos-jkLOgE5^W@Of+48HIV~2!2LzbIWn8sDu||Jke|gu5iJivUb78t zu#RCZ6gJ*t!8-o8IAqEjX)pH9076fB@(FBj8U`GZz z!=9XW#KMj+S?8vkQHqGWX11a-2GJO~U69&Vu{Smbm(9I7H@<^c$`JA{beiOKJECFy zohuH7+_j_e=lLTVh#p4O!sEg`(cgG*;mPNox_xlZzHNJMo8K{izygT%#C1OcHb94Y zaoTh91mkJ#w7y3yPt6vYXiFhnR>L*HCAOZ&3*z(b$+~zKB7EjjevoHeG-(}Vf#Rs$N9vRA<}{b^=h0#%pBn@%x6DuZSadU zV(>9dfeEp0aU!9`bC&QUCIs!=C(lg4>Bffv6<{?qD8bxVWEZfAD!E2%I`rIug+q%A zcP%_Txc{b)J^A?Ir=GmmWXBX>NOJ2@QK!jG-)f_60bbar3!MC2z_91r?u<~m_Jk7$ znU28hst+5$O>!vEHVP(!NHDS`PO=(&@OAAP=qnF2I_txt(9H-&J*wl|if+5nYlYpe zQvq8%G1(Rn=I2~M=n-cvRJsAcSu7fgX^S@=(2fe^vs!cNMsjR!o*p~JQdgQ2^TUrsT;lYCk9y)O7u7{4S z9o)A2hQ;kK9+bzKrNk>@TAL%!r`_C(&MEj>%XQv9?7<^9ysL{F8|5ST%~*=*7=;2I za#1wj5E5hfHG~C_5QxeEdn=58V>)b4Pg?JxA3wS@sn(5XmH=8{Qgk4dWM+#}eh=JMn1e%DMz$1z|WV{-U~XYP7n31s%G`1x?<}10WeH>~EYWJO;p&7_VSP@uEeqjv6t!rt8mqU3Y zPaViuqBW|44U%1NaEVf?O|gMZLOI(T(aIK}MqXPZ5)NyF+l;l-Cz(c`$ZO!7U>3Ah zB1@lq~$?GI(lAB2G0ExpL{G1-iD9I!7UGe`I--TaJbt)d5-+R}C4{o22{qOio17WMx z6oVhZW4;?IX;=>7R~CDw(y{8e()u`lDv`|*Z!6i*R(^ao%HtPES-;te$7+3(vuqIJ z#a&;(T^|DnA9_Y^skT3Bt$Nl9zwSpBGgdekHe1T$m;YEii{F1Y&Zx=YM+#YKBKZA6 zW{k?;3t|I_-(ukfv8y8{U2#ldl}-J3R~`F){Fmo>p9*iIRC>df=O6y}_$I1m$qbXG zGEMQ-zB1Lma+WbZ60ue$QZ<=&TQcpo)ymkuA=elHtYZO0@glx##>RLz;3HJDN+ zu$oqsj<-}gJsB0_SNCLCRAiHAK>DNPt{&rrwA)sKxA|B)POn+suTF`;`wCu+`~T!Qhpn_!ATubPA5cdav7_$#>dg&p(XWO7a@X z>m+HCn@H{eiBFOXNJdE>dH&%K;2R@o`9O9rNTQVb86@egAhC_^|C!3%>yt=u5G48(|3-ea8 zd3Hk{gS3H7S`uh&Y{LEXHd*j+Ok*8~{txnj*hcM_(jGPBq zcp(;FX24aH;nx)o-G%cazV~vAot0PVJsL0y#!WCBC>gKLS^c^F31$iGq78n7 z))?#s#*A~gQ8UAcA#FtIT$J{@bQu*Bvr)s@4fXmbd-{dxu)eTZ@6~$`tQnZX)x|Y~ zYZe#2HR~hUjB8>VgcsY5;cDP&;+nu!#kB!fh^vMxz(v71l1W?}ag9&o{o71&wdRa( zQPU<|bzE(Vw{Ufs!BwUHxu0}22S;;oGzUj>a5QI*=HO_Kj^^NK4vyyFXbz6%;3z6^ zRdH>=72>Ml3UKvt!O3#r4Po_$luPK9IZ&R|Wj{aJ>P39M`+SKZWau zxZZr==f5ADxC#=!l4nWdl?S*dJa9gi?&<@vd)wpnet2D2@7KebM%dUgUq6Zc|0GTl zCkHMkGd&41#cLcSc7$vL*9|kO1WwXpdjGmQUET`Ktk}qF3?#k`<@hK{W8}$g z!j-rk(&Nnax?Rxv=FOn-e@1Kk{Ow8ng7?XR%=6Gj=5lh0_mho32kyHk@q6AU2R37U zMoW~Wzsiu`GOh0|!anO)huxyxi0S9Sd4K=ufN z-DyJG97ugLX)lBo3`7Uad0)Ohj-nQSEws5A6Rh1jdEQ{^RDUMiI&)or%dSuLdow6(hjXA!UR2kVCeSfv zsG>>vrW*9gsfJvi?fYsE%JzK~hir?#gn&{tItl-%q#DYVP6vGa?oesyGOTAyNmr?; zO+vp-cYEym>-&7sbn25KwmY10P&L=5Oq;gaQZ+LRw#j^`?%)sV;L$4P#jXK~e?iGT zU_XsP&v_(8k`jqeQYNX83=}M|;Zveai83Y1lqgf8Tsn)tWojr>Lzx=N)KI2|3N=)yp+XH6YN*g=g%TA? zR47rQM1>N=lo+POFeQd5F-(bJN(@tCm=eR37^cJsB}OPQLWvPdj8I~P5+mia_&Y)k zBh)Yg4S3pu1RC&k1qn3ZF$xlBC=erfE`kIScp3tP7%31V1!AN?jNs7+5=syw1!4rx zJCIO<7{S91B$OaV@T3C?C5RC`-#{|_lm6mq2hAV%qhLJ497k0yW+BX}@@jCxJOT+c-~j{@ zNZ{!M5=h|L0}@Dh#0Z`>03k;3paBUbh!H$xKtc&(#3M%VOaTcch!H$cKtc&(#PiNF z-|#R2Yp8*k!5ei+sDT*4^8+N*K#btw0TN0OBY1Lvgc8IE9vdK`1TlhV21qDDjNpL* z5=sywcv=927{RjwB$OaV@L&K5CH%9@H#{Z48fqYB@Q45jH4r0sK7fQ8h!H#-Ktc&( z1djxO5F>aXfP@mn2%ZKYp#(94M*&DEL5$!z01`?NBX|gagc8IE?)WF6#PC_>8*cnZ z4Kahe{z<5T7{Sf{B-B8R;6{HE+9XDBdp`*zaC1KiByej#2_$f1KM5poTR#aUh!Nb( zPeKV|1b6O}P=Xl2ZTlpYAVzS{{!h*~+``WqY9MBC|2_#d5F@yGpM)BSks>jIJN8K^ zL5$%3d=g3!BSm5ax8ajef*8R)_au}cMsUMD2_=XT+-*-n31S4d+LKViKg)c>efO-P z24V&`-IGuQF@k&TL5LCDXiq|$#0c)MC!qu}f_v*pC_#+i26_@o5F@y4o`e#_2=19D zp#(948|FcX5!@(GLJ4A|IDD4*hP&ihLk+}Ck?RU>jRzq{aAQ0PH4r03Vx&ln6p0bs z{!T&(Vg&cTlRyG@zLP)#_q~%q0(ZTWKmzx?lTd;fDG?*Mxt)X(#0c(eCppV}!+q|o zp$1|GH@TBg12KX-+)1c`7{TrBB$OaVa4$OuF@ihUNhm>#;5K#=N)RKshn<8H#0YL+ zC!qu}g1gsAC_#+i)^!p}l+H5Wa34Erh#B0zPC^aD2<}}cp$1|EH?EV=CNYBB(@7{n zjNsmM5=sywxG|lC62u7ZN(UiEa8Ei3C5RE+kWNAgVgz@ilTd;f!L8^&Ip1(+I%}wb zn89u7B-B8R;GT36Y9K~%13C#Mh!NaY9K~@V#Fs#d}72WMtow#Cq{f?#3x34V#Fs#d}72WMtow#Cq{f? z#3x34V#Fs#d}72WMtow#Cq{f?#IKxXzWKzAPt5qljL&t&Cq{f?#3x34V#Fs#d}72W zMtow#Cq{f?#3x34V#Fs#d}72WMtow#Cq{f?#3x3|#7LPKDH9`QVx&xrl!=itF;XT* z@HYCN#BZ6HDHAhgVy4V>rA&;JiIFlfQYJ>q#7LPKDH9`QVx&xrl!=itF;XT*%EU;S z7%3AYWn!dEjFgFyGBHvnM#{uUnHVV(BY1f|2_=Y;GBHvvon^k2iJ3AnQzmB0Tvy7( zNSPQZ6C-#7J_$7tBV}TwOpKI?kuot-CPvD{NSPQZ6C-6}q)d#IiIFlfQYJ>q#7LPK zDH9`QVg#>aC!qu}QYJ>qm9xyZGBHyoX3E4&nd?fK7%3AYWn!dEjFgFyGBHvnM#{uU znHVV(BV}TwOpKI?kuot-CPvD{NSPQZ6C-6}q)d#IiIFlfQYJ6xVD#S>I7^x5=6=I}9j8uq`3Ncb4Mk>Teg&3(2BNbw#LX1?1 z5&UQX2_=Y;3Ncb4Mk>Teg&3(2BNbw#LX1?1kqR+VDV}A%Rfw4iF;gLCDqL48#7Ko0 zsSqO-Vx&ThREUuZF;XE$@Y4k##7Ko0sSqO-Vx&ThREUuZF;XE$D#S>I7^x5=6=I}9 zj8uq`3Ncb4Mk>Teg&3)nhY$X%fB2%}J$M)7`t^<}IL32K(J>{*_>L(%rs9}k$Ba0p z>X@2i0>{)H6FR05F$L#O!TD2g{uG=)1?Nw}`BQNI6r4W==TE`;Q*izioIeHUPr><9 zaQ+k;|0-MuBmX?-pXdDZoPVD4&vX8H&Oguj=Q;m8=bz{N^PGR4^UrhsdCou2`RBQK z6`emt=TFi3Q*{0moj*nAPto~Pbp8~bKSk$H(fLz!{uG@*Mdwe^`BQwSc$J)gCFfts z`B!rOm7ISi=U>VBS91Q9oPQE%Fe&C^RMjURdN1QoIe%kPsRCDasE`CKNaUs#racl{#2Yl73WXI z`BQQJRGdE*=TGIG;x+928+QH;JO75Af5XndVdvkl^KaPsH|+cycK!`J|Aw7^!_L29 z=ijjNZ`j3a#Q8Jg{26inj5vQroIfMZpAqNJi1TN}`7`4D8FBuMIDbZ*KO@ec5$Dgy zJH@N&{Hr?us?NWv^RMdst2+Ox&cCYjuj>4(I{&K9zpC@E>inxZ|EkWvs*6|6`BQWL z)SN#x=TFV~Q*-{*oIf?^PtEyLbN?)<4cf9lSk zy7Q;*{HZ&C>dv3K^QZ3osXKq_?-Z}l`4>9>Lg!!T{0p6bq4O_v{)NuJ(D@fS|3c?q z===+vf1&d)bpC}dUJd6@!}-&2{xqCF4d+k8`O|RzG@L&T=TF1=({TPYoIefcPs91s zaQ-yjAzt{289P|u=VBZa`4_B@{0r7c{srqJ|AO_Af5H05zhHgjU$8#%FIXS>7p#x` z3)V;e1?waKg7uMq!TKm(fjfT%?)(+F^H<=`Ux7P+1@8P6xbs)w&R>B$e+BOR6}a

    zXEsu3f%cCaObbUoxcKi{tDdr zD{$wpz@5JWcm4|8`73beufUzZ0(brj-1#eT=dZw>zXEsu3f%cCaObbUoxcKi{tDdr zD{$wpz@5JWcm4|8`73y*^H<=`Ux7P+1@8P6xbs)w&R>B$e+BOR6}aB$e+BOR6}azXEsu3f%cCaObbUoxcKi{tDdrD{$wpz@5JW zcm4|8`73beufUzZ0(brj-1#eT=dZw>zXEsu3f%cCaObbUoxcKi{tDdrD{$wpz@5JW zcm4|8`73beufUzZ0(brj-s$`mxbs)w&R>B$e+BOR6}aB$e+BOR6}aB$e+BOR6}aB$e+BOR6}a$-?HvC~UH@d2#7O1AUhDS=MJ+ z-^zTdL0QPMkY(Y;Dl30zpiPN3Yul_%zkzJ}4P?`AD2vAFH_$l!1{$Z|sE`bkjBIg^ z^943r`+XJ`){9EVMEIeB!FjNQ^H4W94|Rj{P&YUab%XOzH#iS6tdU?KEm9)p_9W3ZQb41RSLWKc+gLKGB6LD3kLy||Noyb~=3??i*a zJK4uOQ9XF4YT3s-sY@LIr7m>}7RNRe+)d+d0tqM@fua>Cnt}EJt4s$P zG96TAI;hKZP?hOGL#6`_nGQqKL3c8p5sdX3G)}KUl6ZGN)y*L5E zak&nPMxbozCN%B{q49O@+tVc0yKM3~Hu)Tz ze2z^%hbB|PHz-O$r!e$`uh7X?P%!ukoqUB(zCtHofu_u(sLecz+RURxTLxzoXD%o# zMC;6>Xq|a<4r7(hp?Nxo=II=irE{oG9YR^^5Xw@AP?kERGKi!O!9YjrCMfDb(Et=J zvd=>*`#egs&!aH=yvo?Y=h?yM(ZPe8wNN?mQ~YE##&Tm?lNP{@L!4Jho*Y8e7$t(GBB)@m7=l^>w2)xrXb+AN`$ zJ%Q3Jp_e7}vV>li(905ft&6jt(;yk+MK>-E^zBB*f!`m<%{ECwlJ>wrzsDwgx`h9J zjnyO}NgJf~(f1T;xHrm@f+e0MMN3MS_?DC{saP^>$%rLYOKO$`meef?Eoqn|r>TmE z&Hk|2A2$2LW`Ef151ajAvp;P1ht2-5*&jCh!)AZj><^p$VY5Hnki8MJH)8fi%-)FE z8!>w$W^csojhMX=vo~V)M$F!b*&8u?BW7>J?2Vkgzg4qeHTzYwUp4zxvtKp)RkL3; z`&F}FHTzYwUp4zxvtKp)RkL5U{?^Q1&Ft08Ud`;)%wEmx)y!Va?A6R(&Ft08Ud`;) z%wEmx)y!V)?EMYQeqiQqVD<4B)F#Cb^w{G_8 zX0LAc>SnKQ_UdM@ZuaVCuWt70X0LAc>SnKQ_UdM@ZuaVD?{8@KL$e>6{m|@(WPWEvRqvd2D_c2;d_VG(2mXm$_ z(un0`AHOuBT*2%W%wECl70h12>=n#j!R!^xUcu}Y%wECl70h12>=n#j!R!^z-e1q` zduHD=`<~hN%)V##J+tqbeb4NBX5TaWp4s=zzGwD5v+r4di)OEA_KIe&X!eR`uW0s) zX0K@Wie|59_KIe&X!eR`uW0s)X0Lem{+7&s$?TWRe#z{Y%znx2m&|_2?3c`b$?TWR ze#z{Y%znx2m&|_2`s?D=NTH+#O>^Ua=b_WZN= zw`}&yX1{Fq%Vxi9_RD6!Z1&4$zijr)X1{Fq%Vxi9_RD6!Z1&66U)z6i=c&+LF?$uW zS224PvsW>D6|+|{dlj=+F?$uWS224PvsW>Dm9zKP_Fvq`D*Lwo;$BwEnSI-TaYw5a zntj`UaUZJ{ntj`UJ==dh+kZXVe?8lOanGv+Z2!f5ua+}=w*TVpS1UAow*TTDSSvJp zw*Pvz|9ZCndba<1w*Pu(-+w*Ze?8lOJ==dh+kZXVe?8lOJ==dh+kZXVe?8lOJ==dh z+kZXVe?8lOJ==dh+kZXVe?8lOJ==dh+kZXVe?8lOJ==dh+kZXVe?8lOJ==dh+kZXV ze?8lOJ==f1v+uv2?Z2Mwzn<;Cp6$P$?Z2Mwzn<;Cp6$P$?Z2Mwzn<;Cp6$P$?Z2Mw zzn<;Cp6$P$?Z2Mwzn<;Cp6$P$?Z2Mwzn<;Cp6$P$?Z2Mwzn<;Cp6$P$?Z2Mwzn<;C z-oM^|2L=QX+kZXVe?8lOJ==dh+kZXVe?8lOJ==dh+kZXVe?8lOJ==dh+kZXVe?8lO zJ==dh+kZXVe?8lOJ==dh+kZXVe?8lOJ==dh+kZXVe?8lOy+#c;OXDBjcZH|u@HdA3 z{(TL2e>Hg<@uV zkWOAw2jw*Z;p8P7pc5)4QP9DKrVv3Pnom(0FETe#6htYLS^SUQ%vu}BcG-;ys~oeWo1gf^b$&6dUw?g2@AvxkddPNY zw11$=(6phY4X%xR8?kThA3%#HuGo#>`^+Pgyd;FF!6V3mW$ifFIWUk3aczPgyq}a6 z$PFF==|duU37B>poD3l|3uCjeHVgULu-Wfl{*^7G0|VQ)@4Y>jIk+@1z-lN>rMg(O|EbgLz?t8a;l zEsYIWns{2piKk=#kY2Mj;(K^%Fl_apCGm9pVkKcdcDb0QW1>dL?%08!hQbF>rrQQ? z500Z@Y#?^KlEE-yV*lBS$0G*NQnTxoB+gZmdR$5TSteJTzt4+n#k%~S0&uar_H|)pb>cK=F-|J0=L9gA}f5S`rp9t`#>2`Y}&xdhu-~1@L zaK^vn_f!9QAX8@&@NKHCKAy15TS07X7Y~!ND!zu?-0b|$h2slLM-MJ6-nKvU(7ySj z%U54`eqt|3z1j_%RWPwxkilH?(@IirAjuS!%M_KHsKUu^uZfE8TxSZ8nNE=B8$Bl` zn^A6Vx;@cp;%&~TS>^L4k9XQsO};uEcH4O=4>v_j4_%idc)v=o3H5ZUI^FK}I_*ZA z`Z5Qlr#sgQXQo&Sf7Gfy*&J)P^6?(ZQl66OZgaBTLLP5xn2OjLbYKoeO}z0rYJ80v zCr>EpRyR4j=u|Tpmjb*(V5ZaV=HW^`#H#`p7Iz+4I5Kkk?B1>`1tL)k;TVvfj4-OEB(c*y`!i(-Fdun zid{<&M+~4+9UG$+!D6ffClozpHSFqNgctFHIVI9AY)`cN=!FKr3fpItSkTTaas9W* zB~PS(CHp9RN?j8%b3`JC0qA9HGuH#Z^{_hyNT~p`q|?zrMar!}QABJT=mf+J#?*tr zqbSebP+ig7Hvk#$hQajXWxX~Ns+d-oTgXFha!zrghWDCZKa;xWr6XGxx9?n7T7L0` zmtKD1#TTx~>|J_cVd3zR0kp&O>9niqj+b{@geKGZI$0vnA)c!!bNuPvM2&u;;O%ZZxwn@_vKXIe z0=j0WOUBxru{N@pV^N3x_H<~{Ew-U0rRUFW3ay3 zs+qiT3U5tTOIm0iUpl`0Fn)I8*@thxXzcd2H!dE3@z~F0k7ZxHceTxFb+RNeRK|a2 zy3@w{UGjJDGynFY!QVKuZZ!v2&X7Y65W%dMmh@00k{-(TBK9=XRSAAzVWNqp zI|KxQz2Aml=K3>psh2g}t7=m(XTFI>sh6`1fBbi_sm-nWEtlU`lI=!bWPT54K?ihZ zh(UvW*s69m;axd{Z^mz#5rDYIL{N9zjYd0uOSOYFkASL&b6k4x3PbG31YP`=#3!b? zSdfW!hxje6>S|O`T<2)TZ%N&VF~>}$8Y@YBVj_Gvch2+sfAy)O-<;3VNmvoI6Dw2<7 zb}{+gC{b(V9^?#7{GnB?&3@Rie-Un4E&lEHM9(#P>`aNrGOH4gWll>xmN`QLXK|ba z5noIZb|K1Ry|R4jr!o(6(_%-&hhun6i85MkQa{Dj5NiX95;JG=>95yul$>h!4<7sF zhcEKC&(H6C_Tj7!!D^fSdiFm=Oy1k>NXes;^2kL}qy|Ipkg_U?UVaq;6gj(;ur$c5+MKyoulysRV< zXC`(d>8;71`uv6GZxv1cRN})h8}Ei=tu_F)DZt*s?Nr&IU9VyV{h7=q1xq|jiWi>W z;KtiSZN%h1jdkJDmvG|1D`oLo&I`}4b*e#l;rag$%Ku&Lk5hk%ZU>ge|A(cSJ1uQA zXTpn(&3@gb*!}s*WbiAHzw?X?KCHYCV9950Onxc*+7~{4_|d^V_*>|6i(lCCar`2X ztI8l2c=tP+HXN0J^59rM_&@Q!*T%)@Tg0(@|33MHWF8U3J9#?ONE?&Y9>Tbzk4$_= zf=w34rx(blUL>D-k$h|^=0#FuO)$W3x?mN-kr}ZF@cS#q(RYlz+(>_qeENHIX{tZj z!<>Z!gO`%VGNMbOe?gl1N;1FqE3@;54<0;xbnnrReQa-I3p6zo{}*f1Gs({}{eEm? z{MMKRHszBTjXx#%j`7#T-?3uIyGPoh{lGs{T6kF@c z;%iphfn{m@duEU+^M9GD#@}b65vx|4=^|b{RF}inwAG)SR5ts6nhvIF{UL?uI)#XI zDS6$Ny?^u~e&X-NEn9Bbvwh!Vb0;1}`ClYai=crjNJteVq%KHcFG&A)knVIR493So z`~bvQUM0y6&~ZVsgW&}Q?$f`ZA|`|a5)@ug?T_J&sy)3zDIfnjO41KdF#P}p(+{vs z`hk+D4@YjTh1Drc2;b7=t@v3JW|bsw?c&9@%vzefb$YymkJ~J9+y(I=DtiG0@5Jl( zFlR5o`3^Ud^_pF-eaKH?E5s(YxUd_qFx{=jZn>E-u_~_Y1SzZo2S^?OX2Na*Gws0#B zP}FVd*PJ$QZgQ{y>H-$g!9siul32!aNp@*4$+7t923AFUaaL_kb`i1a6lzHMp%$Ul zR}uJ_hWAimGS%=!K@-hVzs6~r`ZZ1*yvP_I-(8w#QR>%K{p;i#{Vskn@b7Wdx$&l4 z?o#}JX=U%&f#;rj@x`y7*ca8ap-kilALnXgeDeP$Z#K1rrGAYCX3z?LodwCCP901C zV*LNh{>RgN=8e-lbl!siCx1He$ETYaZ_SK{xigzFi}*MxW-;UeJ|b@dj{nZ~g3RC9 zZg5QvD^0V*7~$)gYOjwV1(Qt$s~Z5g%^)h8s*1xYE$HJ_jcnh+A|Z)YHs6>GH?eY6 zcD2L_;w`=1K7Q3hD)1o=(_*^c3NTwSttZU8!~~&}%H2M`KbOm8&iK}DfQ@eE8kU%V zb$or*!=9N3L=qF&C{&KE!qO#7n$Hs}i3zNV;E5jae{n{+cq@+ zc>S+q<$@)iB}Ge0miU&GEvZ;CY{`fvh4oPy=_y2t3X!Hlq^c05Kp<5LWD4Qn6&%l0qR$BU$vw zG&;>ubM(NW-@B$McTb`R36EBQTyV0D=$WNicz~_)UFumFGlT4QTvkXQ)I6c z*(pVGrAV$6$@!5SA}0TQZ|`H9&P3lpR37Q`OBa0jLLAS!c-c6(!QUj=_2CQgMJ$fS zybI9&nTq)1{{ydO26>I-b&@p6O(b`K;I+))0+LaZM?eynfFv&?sWAIJk~f$gCwVus zPm%nPG>bgG)R4 zt$7DuJA}QtrJYr7UA?89si)DdQ^h-ed1g)j_0rCI*uW}*Ny?vF$j3j84;5Gn_=-i; zp}2#OJ3+X#bFSU*jW_y}Yz7lQzqAuS`NOYItZ65#2IEURlb^-)YWkm+cFtDum3eDy zY3JaRFkhd6O=#)hOK_YJ$l${h{)WLqE7{P7pLCjEkJCE#S_A~U{NCe>2cCUy-?4We zU)Z^LVE^5RZohEx8hF>oDFnXYV>EUe_9VckDeK76&MRP{J&7h%HC+wxb8v8GG8|Ju zfEV1V$4fg=fj4_kFYUy;HOKK437hd7z1CO{9h$}u@GyYrdDL}!UbmRw*OCyiDSU4} ziICg_6>Ww!_Cvg47{Nv(d@X>__HYtGoq>;Ih?9QUg0++QRtCuAxZr~@+QCy;>sO%- zhnKN&RN%w5+QZH)|F}T!5F~{TZsBZ$4>61(#pP+=k~+R_Xf?azra$(j#P?ynU+vVx zx(#9Ge>GbVUsT!g$!8ay`~1Phg=4?+H0J?yR&g++9h|oMIOj(rj9>Z;@nw$FSa0`h z@Nc%-0^Dr12*JTmV?3%Id>t!IaE8B205><`ID&3)P`V+|We7lYrl;{HWrf%>hb+|2j9?}27G40mv^Qjc|&52&`Q`g8rT={0fq3Gtk!@b zS;wDQ5GJsz?O_vspU?iFjQ zxN5ipTy* zu_&9u)xy=rHH~W{t`4p)t{$#Ft{GgjxHjRM!v(wO??{bP%2e$JwQy>AtdV%jCE`31Ea~&WZlNKo=nzJHil~ht_fT%Ty0#_xHjVI z;_Bh*(G~V=*l|uWF0!P4*gh%ZmdHu)}a&YIIipj2%^ZYfFO$NIIinBuIvT~ zqPULZ${v9rit9M8>o~6L9SEX`{yCpq%0fhS9mkp720>KWaS%k6T?au_*?ACp#BpW+ zLFf_3bsfi*aR8x599M<|gdTBR837P_#Bp86ab+Yx=mE!-!2qEL99P7n-$yVpa&?Uz z@x=gv&?AmBW3-GAEh9q9h|MxWvy8|rBQVQ|#xjDjj94rq6w8RjG6J!Tab8BZm(k&6 z^miHET}E%0(b;A6bs1eCkV}z}HEfes>0th`K;5qyt^oW2b0zl{y0Z$Ns&?5q#H~^tX1U#VtLXQY|q5%YeCmulP z0Rc`(fY5{7X6L6%&;7);h!IGQBp|7igd`1;F_LkTCdmep36e>YDUudRn`D|~BT0v( zOVT6hlgyCJl58TGBiRgsn1Ij&!~}#MASNIU=ck?-#P`}Pq(_Jm2t7iKK439RX^n}M7 zQu@Lp4(TM^ecL~k(mfu0Na-LCKcsY#2Ov^9$wLq+-Q+=tl#cQ+L`qkAAOcnRcqk$- zUllv!pQMxL@M7DJRXo(BHwbIw{->?)yY>ylt(AFmahsb z4^S+TuZk;A7N5A5M+;K=%EJXIedY0jlyT(&gOtAVh(StUdB`B8uRLauGLAfGkkVHk zHAv|j4;!TPjmHgA#t{dOxA$j$UZwPvM-EWMm4^=U@>Ox=xq~J0RdMCfgC+7+apmcQ zCGu5qOx=8H6SBRlwvigeCG-apXyaC5$7_BBUB~3nUIl8g&aLa&>-nb>=@v zpjbe#=prZBT6~cc7%alb2@V!xq z28%FGR+?rOW8?&09`4xE;A?S4&b&4RTc}aE!S&T`t|n={FjVvSCw0QL4FDvCY#RdP zH1Bv6R7=e~8wS(~&o&UqSuYII9E%kodIh7f0yM7x%PR=>3UIuFFs}f^D+urk@VkQ0 zt^m6$2J=vIKu6$EVs5nDmH zR)EPB1Zo9QS^*kYfW;LcaRrfBK^Rtm!4)8I1-)MZ`c}~I6?A$9xLX10R&q1-`WBuF z^baQ{mccH1y@D>UpueNJ_=owiBmQCRO>5#OL6SdD?)xB#EhN9hvK5f@-!c1RW(P=q zf!V(SnZgFOrbV&|+uWMOg&@=9QV%hE@DE6-KYb2J{A(bodr96;a$Z zk$Mv!)#y=<9_8p!jvnRcQH~zv=uwUy<>*n49_8p!jvnFk0YZ;*^e9J-xJ>L1ES}~p?o)=D)NQCq9ry!$$b-kD zj3K!1gTP~uUt-w`2s}pi$IK3p`~tIo0|JjRGfChv$}R+f$D@oPvWOulKaj#>sQwxV zJO+6`$qANy>D*Do3}Xoo7&CD205LOvqv+x>eHOwa#&DD|L>3+~h9K~WF~mIM$4Pp` z7@`awF@~dz;izJWLVjGOM~oqA;1Od80*@HOQN?hSF&tGV7{gJ-5WXU2ql_8W6b`eZ zMjiu=zDCRWR;1U9S1E>Gkp$)7qa8+H!6e1_qXYm@JV6NoMU>?Lfno|u7@&m6L;^G@ zuAqd1B1;M|D7GvI92i|J8S+(_RDe-JRD#ks;Zi|m34IeZ8CP_Io#CZrPBQdWXjQ1e z6R(h@uR^TC4PKZ<1$|}2LFKD(tDvKV$W_>{u()5Lz+a)r!+pY6u?J7UD*9L=8Ga_f zjB?*sf?hF)ZmDiX34*S|A6wyoCBJ|U;Xai4ZEpRxMt`FOIHCxKf(iO31s5s-o|waM zs1W0!)XrV#C3EPe+T3f&`z<+PbzfqFaRKGPFtJ_``^;G{I5?_PFNk^7sTUj|)%}7M z%4?1Z3(aeeiUi~}#)X~b<^HIoTI+>IN{!VEjums(3yv0ZfR+;Vg7XA9%@m}ZD@X`V zBpfs(oH5K{*qC6}Akn;0YJJw+QQmqDXsg?rF)Fu?qg*}LyfJz89p&o0W{=9P`w7WybZXmlWUT{kDZAGzjsFR8p-P!NAVn?&y*g&^Am>_D_kY4cF&sOTf;2z8OZCi=Q) zTJ$E-J4m66Z359zQOd#{3L~L35{e_CJQ6BELJjJNa@2uBDnatdxhvEG@*2tOBx#bH zNbUfEX>d46Q3m7@>>%5ood3eMefyp`dU(&#Jwu;~T@b%)=ri9TNq$6hf;9G)=#`=u zyqfq3+K9$3IF_8?CwunU3`C(`p)R?xWB3+?njnU7|HHV{S~b#iCwTey(^ZCIziSs5r0dK z;%^;&crE_D_xR#N$JU;I4F5fMWbONoE=DeX^OW;8z3WsWW~HlTiRHuxP9^?SrF4Gz z?U~5+#PaU=8dn1rGVwo-G&%221#3du!rKb1U^ z{-QI3OybCD<}%0aK6QGS*#57h{^C!N==|d1(S3)Pj{KV=xZ^f=(fbw_j^r+FEbV!a zjdn}@s#V&0l>==xt1hCWbSJ(ewmG@M(s+^qVF|dXE}mq>tSs(#@n^8!&}UM2vWbOK zoUcf}fhwJn_}NPn_6$aN%jzNfpK+@+0<^ zDP_eqgSXj2iJ7@O{uLPh03@)kk;nSOB=%O6Ug`1-j26FL0vDITU4k51M%)FldlDX7 z#qNo%#m#urlVRQ_{uu>>KjT{Pxv>tOx;VXn#O|?qkA>RU?^ps1S|2W(EL=d8KbfDe|wn0%M@-)l;vY(o{A+{??wodv@qZ0*|fx8Du$B|j2r zLM%*m?8?)6$eORat^4M%7TWJb%Pm~C?RjNY307fBg$ArRwdA9x2;I#En9jpb1?Bv zC7$E}h>h<*x^(oJyN|8gzIXfHZ3hqD_SDhUx@d+$!W|vJ)-Yf4XVgSTlvC-4UX6BF zv6>8k6ILi%(h`F-e$J^xgB@CBS&LE)qqU6@QXkRmKyu2~ozrRjDl~g5nnjT18g!+b zK(C&%Xj=6JHd?1_!N$o83#_^h#&F3JxoV5t1w17f65Of;sra=HD_jwLZj306CCNUu zI39hAvw3#w_1m^#8P*B!*{Br+w3C{^W;?TGpJgj~HTdq>$)Uk_C)Sg^GBg-48!#In zoB1VVGrxo^$SXssuOplKIIipX|;})!LI?aL6XdVh^A`IhGTR zJ%Dz|h$fE>rE029)mY68wbodjswI!z_5|*&%Y;5o z*{;ED>{qRFA2bd+Q5RclsoKn*(P|b0h5E!bu^%R`Np2q6c3<-R%ObIM^Krt(X$1mD874Z!VFV*d;1V ztWzNpwOpsd)MgbTVIh*MhT;eAi#>K91}0*D6fx+4O73@6_s62D`>iT7#s-N$OCGis z;?#s(YH?`WW2r%!V*+zb;4Kmc__4&7RfmLiNLZKnvg+d3Mv`J!FXk%U%>8L@whDh=d5-tOX+z0&lTK=#}bW5y9!R<)e6hNFeOg1 zLcA7rmw~Fqqfb6{EVuTeOLJ>;=RbF7etr?(uAbSN`q?Gtj3ayu{DJy_~gX3tt&9z-nmFM`*JDmhJzpAYqoG70_RolklB@hxCYF| z*1ATC--~?{pM#hrYf8{G^aw|a({PY zKMXe`Hp%^&|1y+au1(@D<~+jeSf14O$1-Z1{6!_P44NkQ&+-;<;tB_x?D8z+hEiXK zq12ZX-r^0r_ikDE%)#yb%O77ndT{c*htJ8V6TMF#kr}S!Z|7q%L+1=lJX&)*znG4epD| z#bz#Tw#M;_jG@e>yq_CPd?5BEC})B%HI(}-lI*P@UA&$KAOHX5nWOVN9@_EM`8~%b z*S=@|$a{}n^yF$SLK`2zn?Pn;E``7!h???z1nu)27KYQ?S9f$7)%Uh z{_Rk>2~Tlp;tnNm%N*?7WhvF?f1cEhba1;>d zBNcd5fSLI3>FhT+dVg;g^~CehTDv}%r%2}520TR312WIGEy2$GsdJM-0cFVmGaeM! z1}%yDqGi#_kUe)eGVuf_rMZ<+}57&8NgH5D84`bZYAC)q6n|qVcIQy)RQz0c1PYHp z;Snf20xhRGE;xrq_6LUI*RVBdQM4rLi%3D8$ z+eWXO<*gUj-;(L~FZj)Y>Soefe_{^FOjr+*WC|dI|Bd8tm~E2Fllw)G?V>6hLUy7$ z(MFQ~F-U3{B>U?i?H1JzUO@5>B!5M+4x~S!n*RkBU+a)^dnP;U@GH$t+C7Yt$gr$Kcla_QX*=$KGssuuhGV8Mb(Lx@Q?;b2qP}R`s3oB?p(V4-#B()ErYzZD$&4jkOU5jzGdXW(a=}u( z>!YQb^L9?Q`2cvkWffIE={sqBvxQSgFRbCgGFd%u(`*yh8)8h^?6u}=5 zEqTq7*DXm~a+4)@D2dAk{+wuqqn13vBym-1uByIV2G61|l<;;){2z|RS_9Q839cGR zKvE|eBbg&fTnaKtu0=9U^8Xk1E?{<5Rl4Y2YrmmDyEqULXxE`>5J^1yT{UR}+0Uw- zy{q1ViX$e0;3<6YNDSz~08)t`m_tKC<=RyN6!@g&s#l2L zhp1@qDy`j(*W+nhz5hSv*p;$dy!F){W6b9m^Rea{YtAv}Y)L>5s1m6rf%_!DYEo+n zMhQj;Dg-tBvTc9{T|)%!&j1EV74a+6=j#BBRILtXN5)6mSC36jip)M=1l|F%z}7rK z?5_sQVtEQETme8Eh(ADa7yw~_?ymvb;?XG&I>Sz))vhZV22UqZsdW+EsC9x@30@-z z5o{v33BawA$`h<2xX&3L9%v7H`)2p=-_PIN+>7>~wKfUFop;#>LRmzr1c$KWB zl3t}PR3`0JibWv}Nul~OUZqwOGSv@~RJ_k1jV4cdqnc&f8&$uIH!4}i6QFFC@pwN- zNgYR$Zk9-6vo!E*OGfcj5;1L_nYcGPV#$;NDf!2fB&~R{K5wWzEaJubz0grh8E>d< zJ<}kImIQ~Pnvn4{$adD!{fw+p;xR90xy9pN&K4$~@N%{=X@qv`rw`H{ZzK}r`(dO2IVc(Na)NEAFf8pWiejU@=V0Mre&IlW1qRC%_O|2^oe*p<25bYBq(hcG;mYf z1)gP1o%X7lW5%nhU!PZ%9Ao_{ud21vmjo&0MnzY(C~0`wEK$qOphf`qg4Rr*SIF*v zl9f68%_oE4Za(m|MeR$YsFseQ z;W{?YWWwuco=LA`^Nbm9=IeBi5E9PgPG{3MQG5h-wE|p>WkdI^;$5>IaohpXThmSCc ziY`6e^);UV1xXc0xb1auR3Q$@5?_oQgH}!yW6);W4?insD&|4cmOKqSEtN`mFldm} z47cU&Pco+D6i@Usg3SRP3NJc|jHbcnWXY_*4;(C+Ms6yWL0l_VEJYDCvWa=aIOtIl zV$_LPsy{(r+br=kX>DvIr9)VzGhTFyPeX$9<1ve@wHd^{x~+y-!mD?C zeFmiDnM!+@WRjQ|XNK%R8ppFaWJl5%hPB8)OH_0{OU3#aY%>*7`#~RuYCDL=(X`0G zwhLYi&R{owj87S2DffhRN&a}$D)sW*{E?G6_lkGn$@6m!3Iw&et)iWW3Q zQrjGF70{9w-UJmbY&=bG%P*1g2DG&36a!jXyetQ_wCH>T`r?S=eFa*orVkCNGROMR zfLcm;H?;pGJsdnDtP|*01@%Rbw4-4hy2_krnAVjc<;3fdK zN-9sVir~IaZ~4#f!9YLUw1macopc4yAoQm!b}qL#G=A&OvGJ>~9=UGp>g^k0aJ?!rY)BsDy9`3567-B(IV3f1I#m;|2DL z*9(N9yaY(}Hp$h>CHR+gK`{lqJew7fpbo!mWRHNaO&UyuM!{)b#Q=91UCUd2M3iWSVeH3vwWL#2hweGo_((S1tr2kzBj-1 z9_Lkp*9bxcn+R?KaI2*91gi+{1MpuA;GRiPq4OuCehLu4&OloEkBk8DZ<4`71ivQu z8o=@20XS*G!$PhV*dcUp5c*AlkBHBegvaM44dG!S*9z16IOCtRD%T5hGhadgFf;gW7UX8Sm!|#K@|1aFY!55dE8o2_$tCyX+Ham!fNaS8d zR_>w|VQYqx_zpKsZz+Zb&dMcOu4MA~hXW*bG>Mp?E|m~E718*|!5 zdA3oYZOn2TMcPK0wo#yMlxQ18+D4hSQJ`&s=X~pZA^eSn)~^SOL?Ft|Wa!>ZUZ9a3 zAQ-tfi?}zFi+eNKAP!g{6g=RAtO1d?dowdfe2J;WyqQG|t-m%jwjK`VVapqWL_y|qG zoH+{S%t=EaBgw)xhBL>8z>^KZ_=Mx{IyQm3j6K6$CJnYsYGxVxhr3KUE+~$(3MIcJO4K{J0Xyr^+0x(Kz2efJL`k& zEP?E75V8}3**)FOjs&w44`wF>vm+tdi4U`TdiUhI^6opK^XPt}Y39QGB`)(PQLna# zfND4ayIcSfSqXqc*E=}R6t=)a5{ffUIaN9F%Vrx#S2jRoF&#TJ3Ud#}y|OCl#j@rxj-e7d2fmX44g8HeE4h(-mVjT`^|U6=ODCDHhX` zE+WaVr1_LIpOWTN(tJvqPf7EsDF2G4t7y84rmJYWil(b*x=S_PrJC+iO?RoLyHwL% zs_8D(beCzm%QW3(n(i`9cbTTUOw(Pa=~ijFRhn*e{~*DO|r5K z=UQp3xHc9sE1j5?PRvRtW~CFe(urB=@sMpSVw=u37Tq?TZ7jNNI@?(B5E@I;+s2{W z^0SRYx8-LWhi=QyHcmWb8;97YvyDTyO=la2ZkrB`bD}Ejm?HdbW5q+Zv52kQwz258 za@)p=hiqdJ+w$7RqT8mkjYYRjXB#UXvW-P-)7i$N+onTfi9EJ(=%49uSlhk(j;O_V z|D*Ha(3=2}ALG%@g5ZE91cwCi;E*5$hXfKbB!~}(1R*#ANDxN=68@AD;U~I5mv@7H zr9|KkCCJ=;RhbI`=GKFwrG$XF5Hebd4@XNOI9if$Ia-n+b0Nsw27$Q^0&^kA+!E)6 z4{2@O)y2p|WV){_6A8*B9+XK4%0xnB5+BMW1Z9#S%0xnBA|WzK5M`1e%0xnBA~6|0 z1SoCtT@cbHBp`xcJlG~eKm;MAO~i+7A_Uun1gk{|X%qTLn@AAbgoK}MLPFYv#8mhY z6cOE6XcxHQi`m>Nko%dqz=Xi0z?8tWz>L5?f&BuFzucj$8Gw!%^+@bh}$gU zHjlW?ByMv_*jy6vEyZHHz2+`l_GSOx75ML5ya#go-gO*5Ck=3z#9D$5fO`X}Hwhl0 z^GdhR`MZ^}Pty6jCoHit@ZWp=Us~zhzGG!zr)WAD6k}OK;WRjvcS~>D*~$mhXf7_tO=~MACHZF;Px{tF}7a3FBjMl*c8|j z*cLb6%8auXzeg%tKf5J0QK)n$q{c7Il0(!19`GFYo*XFgoPeO)sjZf0lx+8H=4 zlSt)+NaxahmPqFFGaoJ##gQuZ=dG`a+XVflm{gyH(Ky+Z5SdIOCPZI65w|{RW8NUFh@KsEN3Z(N{<%Y`*DuNr?V@R`cyIS(fQk()wgdag_%5 z)k@?{GHrC>RTbW&?D(Pzt*PUkFl6HgV6cK^_HCr`5h@n?r;X|TRT{)CGq zS?cOv5|+;OFA3|5<(9ZugcIrdGKIbqt~4%mJ>klv(|s$yEGIIzaO5sXT&hrSjcuR5 z){3Uk-E|gDJtnLQY~pevy4I8ln_R0Luyj6K-cs$^`t%bUCZC!A+WfjO{|9z&`9i*f z1AK>zC0sqzalR7z_AqVkyj~n$+-Nr2!#tSVa{{>k2G&_Ha#Sza8+LF$eZA;~u)N-^ zz`I$5*>E7Tw}EK5m>}?XAus%~z^33sBeiNCcQwGow9M`QL7hM;AT18 zR6vg`J|96gg@)0<3LZh+z=V4?bWbnCaEH=x(|OVT@xf8tO8^Vazzv`S%dzKU(JOir zXs8eL5X)cji2PLJh?h-vL+ESFtJgVZa{ytZfnWwe!LI`hz-W}r+;s;(zV(4EU%Yc* z=lp!>6GsQ{KC}vzU87+n0>gJq7EqvSu{9O_hbg4&310&g?o$+8-W%@eg642-6m;lx z02nY1XL-}w0N50~=#9>?=vNE10o=Y(%e8S#W-w)o6Xu#14n8PmUHDME+Rmi zmBc1i!(f2yIrk1_%WI34jQ$E~a|^dVyZx@UcTQ|A-nRG51L55Zcg{O&aYA^>L&t$E zL%3%Pw!_~q;nD~Q<^R5fov@|${0D*uhT2Ws*H#;9;pV?qE!)na3@}oK={j!V9Kv;A zFhj+3fl!#!#ibMo<>isXIo!O6TP@(sa*uN09E)BSREMDWI=FHZ z6^M+QFzH1XzPDapyB57H+wuY%qd%$FU}ukd3H>FyUD3H zUo!B-uG^RIXw+cykKCG=o@!y08xvKyB?4>cIFfw0iFzqBDZ&R~s9z8LANg#h==BEg z!~Klqfnoo5A`M(*k{!Y(30Q{W{;{HZ;BfDOLG%j>TgFW@wR{a5@r#~|$7s0Fsn&21 zc)eCfL*r2{z=EOJA$RoJMvFXP-;4XULWA%_?L-erKEngJNu`1kJ18)=%X@)WVH{Dw zYnxJGS8EGxJ%IppS3LXI=WF>2#)T@#$lV1RtYfjd0i&pa$i^kdSdIsflbRVdZw~_$JQhDNbe}ROC6O!LvKi* zD>qt$g>1)TaaN-NSJYNiF*}7lPnf8J)iSgTJQyW8Gv15bP{JAf)HRx2!b3|oU}E10 zk3{~0augdlx4p!sMK3@<%;vDdfGqr9z##_%Vpr{jf9@YGpmH(%HR`zel#Nqt$m5Ht zvZ12;|@ysog4ZXa=^fVDvk7>AGxoWr9>xh7s|Nt@MpmX(76#9_<3cxmTLxo8vd8_L!Sx$ zGTYylS^Hp#IC^Y&t^iQDFK+xMRL_<@T@@0mKS^MUI&o!#?xlm{b8 zvFZhn;~p;5pEt_Qw+)mP{PWU{8@TpTt|h>Qg6Ig5TKH!$DlZqk$lGx61Fo0E{Ro9R zJ6jg_6Tl?Cj;#mSRN+1teTW+)FjDdE04oNLZeW#r(T@V6bI1dRbj=YIss>AypTN613+PL7PaenCi ztwwtYm#L1FVdFo1eiQwy82Tm()8N}K3;XBdFwWxlq8lcHyPCL5Zh`JgKBK^uzehm zk1B2Aj+a)`3w;cieAWj$;a4!|Kz?=~KC@nQs?i?E!Nh*5Im)Ym8iB)Da7K3?s8!h_ z5*v1icxmvhjf44_+>7qdBRzzH)nUtn^myS`iyewqDi^B-ST7+IH&$GOut?*CtBy@1 zt!xersb#Nc3wYF-1xvv0$)_#wMjm$pffMeR!a&y>`gX2et`^8zZaHYQaW_S6IJ73R zcTp`s$%`0wTV7~Q@cn3K^uBx+J?s4#)$n4n5ckL8aVxg`{OAXfWh_PAnwDgMtcN7a@;~u<{c4uhS3-7Ly8>+=zy{XN_=si2?9Iiwb@FxlZD}; zj4uAs$a^DSjl6dX*K6SI)S6TrzGy1AFMQD?z?#YM8r*77ge-XYFw&xVH%vey;|R)8 z6P?$a*nx`W<;DSXwg3MNygR-kp;S_FN^x3oCUIfny2PE0zsg^*>g{G=Ho7*~9^mXh-z6B`t6A&^0Y-@; zwZV%9FBp01=(dfY-Fx0$;}327;B@5fNR3;QIAjFBILOhDeJ1i!Xd1tdyT^#5KSJH% z86B?FvKS8VQp6EGxY@agn<+T3LnFwc4R8f2hfBCH^wyCN6jCat$539tRv9mPU|V7k z!5QZm%Fi3Qonv_JaK{K(OX0(>IL9z5^NB#58mNOE-<0dH7_R(P;@q8EAGms5u=UWEkqx7xj|Ofj!YVV{i2l-l zJb2NRd&*9K$6Yz)oNe)Di^c`rgzNmz2Zo;Fu47{O&vHb_#3_pAY$~_!B?;Z zRLE9?uW)p1Vm`5DYa$0#gWBF>BydYBsT%HhZd5}@gMTRT4)jjwJrkpNE^%qjvCtML z9iF!se$X9B2Y+s#>V}Qho5KLw@lcJ;96kb|8I2UnRe!J-JqrqR__WY_dSQa-1@9Tk z)4a%5DefURfP@nyy1L&OP;piEmY3iG}bBp)n zV3dWnjowzp9cUOG1km)GT>|P^=dN9M-<|(&8?L{;Fu$)*Xsv2iaOq6&0t{#sFE9^D z@D4)ppy1){&)r*m)$wtI?!}8)oa=dY3%==auPk1Nc!4yau6oh0;5uqFaIub;T;z@C zqCN83Y`BlZuE>i9xEW6M1Kd|bg}c{3K3Xhr6h{A)-t8X`9Q2Q4CgxSE*08_$$8iC< zC?o!Hs2V#$KE!(2KOSC=U%&W8dc)s#U-jP|{`ta3^G8N6UijPm!~4c2HuaoWzGc08 z5%f-c<@NsdyZ%zJ_w`)6nhU>lFo%u6p=Af5aiUDrD-PmxE4)L=5_&FPLkIPH$o=j? z6iM{&T8%fGp$p|Og4de`=;IylU@#vZ39S#0OhwLiF0aeBGVdTN9FHeDsQV1|wxP2Q z6!!#1_Jt#S#|M}7M*e9ZAbbG%Dd~}jg!kEWy)1AulIeY(R|FNHru@^sCWMN-5c(=Q zQL%*YOsZA*rflYeKMOw{?T|`cebr;TC(pZk>WteTD-=dI6bmDT&@F-QIyJ(`zxD?1 ziu|jKL%^aJLpg4&KtJ`O7h@)Uos$?Js?{NApZ<4*?&qyNjZN#`_|QYUCU)PQo4Vwp1HB8kt;2SA zYH#Igf?bN<^f`ai(u z1I{mYhCdp*-Z>%&xZ{G>lk7(m3ws{P*{y*tH*!%pApA24gJn+%Tu7h}=LSOX!7w??= z$aNDV56{luQX0E@blZ+=gD;$Q!x^W9ek-`g;^8*zuNzHz>2W*~8v1SMk{~BV@`I!D zS&f%5+6!N~-UQCamRC=_Y5P29UK>Nd|8+)wd z2co@){g?SOqC#KVCv;UeA~qg5ms#Jjjy&bphN>yP{Q5IJd!536&n z;*Q1c4eEYV@gwSgrN#b~#o-ke2Od`Uxo(Bo`|}pN|7vk~+N~U#pMP>;;nsVO-oNQR zx4bxi-IwmhDFuuy7dP8@(Pu5@?PJ3$EG>J1mBvI~QAs@9-3WMw7agz#s^T7Kw=%8| z&Up99Az>dKyttuz595_47!`&GienoGs^~)aWX_|YyKn@l)-lC`uiQ5$<%RD}g#Rgc zRlSTaunvqti=p@U4?yXGSQssv_~OJKrV+VV6EC7_5x4T;Zn$Q>!|T?(@W0|K1s_Nj zV!K)n*XoI$5PA>w2C(1kewzcg{~frWv&akn@YS_<+j#^KQE;Zp+mt+8!VE=fu9QtbHzKPJ4hjE*tXp;qe`VQ5x?-Z?wE(2W0c#2PO zeC6U+J52F4CIh#*S!x$kUBdqz>(Qb{1x*HeCf?PfycyC9z5jUFAG#76G2meWyioC# zh>HIh^_IYG;in@PC@;(cVwwuCZsKUi4HAdm*&7)Jh`w(kyc#&XI&zT(IGUr_gd?(T z&1B%a=RN+}@$*^(=Uy@O?wuE2a{I2oSx*%QzYib7U6Rrm~ocNhJnKcH#I(ianXN9;3CZPbM6x#dL`UBiM@ntix~fk|Iv?)K}`j3@Q-2Q zknw3(o5L@GA{fHgtQXiG ze&LS@|Ao%QP)4el)bd~W?ls~hTBkJd>;C`socON`<7=sbDI83)%MuKB*saEzK&4Z| zZbKYrf%jdj&DTM-jQ1(lWXc%JFo_6N&bfDW7T>HxwNAENKDu}L^76jDPvfe^E3QBC zpPt^m_maXF0#jK2m~yV~b;E&MfT-0rnXcAx!*Dw_|8XT;-vlF_a7Dfo`Ii%}=zDRt zhz-U3LQMiVwl<^x#W^M}%c3cFg|eD)9cMXT=E_C%P-MSG!sKD2wZeT_v%)}$Yg6So z4y?-j59e5MO}CX%_mK2hdpL`g1w1|ozrL3;VSM8KqVV7&qG2R4WMM#ofACX`+`@8% zd>8$epH}d*wIBq`!t4e^Uk+ZRgugZs{V({~UgDH8hknR;0$%FMyl<&4U$gh5TW>#k za{g-z`?f!Ir1H$%7`|RGmEjx~h7CHuqvUzzpSJ=!d}v zStFqYc(DnPzKt(OVNgO1;hIyV!&jE%iq}iA&g@$G(D&^boM9e34|t7bNc zVY*viIH$!o!Af(k#|MKhaQF|92cBMBDc)s@yQ*!X&@xP!$afnv47}<;j%+bkBa?ev z%MVOp=0&TL#2*i&QX*nx;gkM8x3s``W8r38R3H;np$`E#WHpYiV370%gG=4e9NGI$l&Td>g)bLR~z=b&;d<=7Sqa~F>;{NEZ?##6Wp*Qvge-6#2>0wT3#(5#~qrgUf#$!im zBX|`Sejwn!=(`UFuL6Mjgfjv7{fenVtnzq)m+*1px#y|p!RYycO$V_fe1#C0HR7U2 z@G2#mkxz%-8~JqjAEAh&_%drZ%-Pr~CM_^ojITppk&Mrwrgu>07lCDV59`pO>4U8r z)S^~klUy^+Wl_9N2cDjK$5e3qNal-kcW&}e)5-#0*n|AF+PONW1ULb}We>I@msV>- zO-xe_K=;9Nc(y#W8QTAU2(acbG!Ri1}}(Qh6z!K!8uVNOyyxgWdcfi zJ+ura9{5(o(l8q~vIn^~d6_f5ckwZjD;5GTIrrjC!v?^NJg2tND*GaPv_D~BgW?YF zLKf3-g*KKwaPe{x3l5yzphEiTo!4*qz`|-@^x>mZrw<-DbKd6*4^G(c?8sTD4X=PD zGFe_iLq{)Sx2cE|%V617aPQ90xq%LlfSy1H|L9TJ41l-A3iw7oANf?|^St4F1QowH zsT7#*c4@5HaeXo3XHBA)qvcr~e7s}23;h#9dg`FXNRg9a;ZvL0Hs(^B6?ABsOXJFp z#}zM3O^()qu}mGkMxH7pXSAoTZZ8}-35RceZPUAVtvPVg(awRh9=+|X8!pE>LkpS% zSAF2b3@l*i5`aLBRFs~Vx3gG_#Ns6!;d>jHJIJ1(RwRVxw;gi`-z${(qRf~QV~)`? z87q2b5-^eiz=8$LFR+DKPZ+O-{6NnF_H&e;@uG6B08iJ_NY{Ejj)WZrmB( zU_7vFU=i~KCG4@lQ}LpVCsY$>e~N$f`|yHVh`|9wxxs~~`??g?y=;kKgUlfKhxS`? zD!RAt$l1{Y7w$dVJoDh#=jNZ9ojyQasy?>7dRI&Fu2jhl!5PQZbEeP5{I?} z6e@ADJc~p(BU@v|fi^T7WE9#U^}d0%9@@Yb%T_Cx`d=g@KBi_cDTrqo-l*Ocn3{r) zl+801G`it3DGbb2+Y6MEq*+{Uj&&ohD_LH2#mgWA?hwjop2FwS8by z_}1HlM-ObeeYuCYHdr!2??n@%yHJ1}hVnICEWzTlfkbf}Xh6&|z={Y2>J`R8 z(t`#h9ClRjuGdB}7XboaXLPr)LfH`~$LAqD32lkpqyZWpSbiRiP=Y8E@e|e-3NJjY zo$w{u^{HY8f@-AHS1nsdbl&k!XZV#udw2i@CSi+g0UDun>BtYngJK~+abzk-R!}F6A{T{i&UvhTm+_iJNbsHPL&0~l0fO&j9}2!R4Tx{v zA-u6b@hyVtLpNeh>KehCCl=)3fur|O*&y%%$Epo~3;@8~;-AY%HfcNR-y`FwhG$K>7eKNbDY zl*(U?MlW+WzK)-BI>0*o+-oe{0~*o*Mx*{+ulsj7r~7xU^Y7Z@-vv?qyATF%m@Yfc z76>SKoke>rLNrStswG~>W&??C@j8q4ScK@7Ky*vIjvWa$cfsqdbB{&LUA(tV+Kfwo(dd_GL}k8|mqQ4cSddG(it z?k9}=mH3>4j>K7aK*R(@Owun4-A{;^+qs#1_imqW=T2VzHak0gV{eahmUG7QipLbs zDn>X;Wg}PuF)V>7mSCK_x^aYH93dD-2;s9+`2Y(aVBrJIJZvx#)7wCzJsP;@dF#DL zgZD7^Zunh7F@~kA2n=N`@ngxkF>Q8sY%G~96sEuZ;Elb3FZkd7e3uyO60==G;-Jsl zI3y$v35i2O;;hpsz!C#kA^=PH9t{VUa9{}smT+_xjUkl85lUhSH645{cx=b>0>=c- z3Y?zV@%+q=v6&sSGdrf;U+s7vzcKt~@oTq}N%kOIqq=nzczO>b5R4Jb5=`%b58QC= znEvPoc07;Y7=E+(;UiYy0w6zjEu!D%aR6Sn0E9Z7+q!l4?C#w-$%+nl`GeE1JJ}t; z&nq5NJgXQvFH#Fik;4hH8$PWWSu|$RtVPJBODtG|c`Q;IIE{{Kh3OJwU1GLNFeiif0ugw?%3}Ddu&8lyDx{zp%gtYPWI9!`9n&v*ys+n)@O( zwn)t`QsA+KURb2bXQ@MDP0J%nFh(#-Fgk`wUkE>6Ym{O?}L#i8hJ|B4V_oL9AZrnK* zcr);da}ga+J+=GREgL`g#mbYTNBU>)8Qb$HmKSL?S%i58?ghJSsWZLAx!DPSPa+6` zcA9gb5%P6EUN`ddfc@`78gpcyQ({m|lwu zq@e1%KC!JTc7(z-TlfyFyK-if=Zbb`a~sqFQf|~x#fUM84`*iP9D6%9 zGM*yS$IwZuUik808aKjVermtK!Va+5o-#+1fn{EVil6C~f6EyL`Bn1|k zAm{bHmKjF&K7g)kA+b59B0jL&Y^f?5+OE>^bMFvP*> z)x6aUmeH_1Z)2vi0P`Cdb>fReEPq#{MOgR}znAfO4uyP}OMXzK2Whd-$rM*X#A);tV`sv4wg&=nCsI3!q%Cr>i0&2f%jv#4z@kOsuU+aVTr|2SHO^XK-V;)X+gR*=bRnAkMAqQQ>LWeLnu!3&uOZfRy4 zWmwIk(AgG@83wS-Gf2Zl?z^b9+{xev1A8WosBlwh(1(@r_?zf|quBs^GIgLxI7{cCAE zi>JSaUXPw5`!Zl+fyG)Z_@jl9a4jeLfiWJ!l&^-#E^cV56yo`p#^H1Tj1_WLpiMn4T7;boFjE+=z{rVKS2da!U?)!z zJ4hOse1J70XDLu1n9;+~7b7sM7@eXRFo7Qj&IzAJ?EKDRJT3*gr^2U2e-ir3=-0y6 z%QrZNj+zi7thVxS_#bA#c)vti10(%#!&d7&|l7P zdwg{4!7t2R-?)DMsRJ8EKQlrDGd!%YZ5$|Kp9#BT@b@yzqX3YfGp(UPR4ixehtb-w zdd`-Ln9nX@ps)?f2{lY+;*Ep7GT3WmS8FjNim?$-fb_Nb3Px}lA_DUTJO!}cL|^0_ zIVPELp+>b8Jw1F(2vi(Q&++1$$0`nAUEKOn@(29~WR;-fac7f12*W;n>2t-$e*g`F z$^Gu}`$G`AgxSW<2o346r32;yXhcknR7*Hlvs_%ui&cuP2R3w$lyAH4=tssE_OBh8 z-#+C-FDnlR4*AEq`2ixp#7daUxQD4ybZGRdX5gFwY!rZX9e1|qUZ&M$^hyj)?X}pg z|Azvs;x=-{V-1(jFA8JvhkT7tQ#EcP!Pl5bMOBlrD4(JOB12>6J zj4*r(86uJ(+f;}qF56fy2BozS6M3>kI40wA>?S|aWG6hj!k_R#8dkh`ynV6xmN;2= zOPp+kCC){DHZ3c$8w%-{<_ssBWJ&1KoEdpZ&TuZx8P26SvlL6Sewu$rk3QA@(2?-g zt-8CHk(M%~dnrfBWC=^Um-3{0DO0W7!_MWw_bomQ>guLdC!z-DAaihz0l_)C|LbBu z?~)Rx-fV>@g69efv7ZE4FDDG`BMcrF_y!^MmD(^VOwVE!so{QpaxvK?Q%y3_B-2a=kz}%)Vp0=uXu>)z zH0WY8e6F?T<1Q;2>xoS`-t*A7n%vw18+Lx0U>y-_Z_d5gX@Etw0w$_ySfXDo82J$u zBQVw;KmDM5E%uz=ytBOFimS$gdpg*X*@8tBUhZ6gi%ekjgaIes+64@YJhc#mF<(Qn zk&v%SjP0yealz7>6G*k!I)wR&M;U7|4z*qR%$NDFY`{c#P>3qnE_a7=J4+W!xB}tg_at{#o8*FyXb4Mu^Kb8m9 za){TC(NtED69aj92tY^X&^>#!)HF2yMurYK=?Qk{yyTC-CkZJib+)TKD7PQ@r_H?1X7 z>Q;=xYPb?8o%QGvEO_S8&5rZHquVz-Z#?R5SmJQji(_tSh;=yYF(#M7H^e%+AxQtn0o%JF&Mc%|bdk@ZgF$nPE@rXlkf3g{ z7UQw_bKQ92Sj@c}B)Ojm>SiG|gWTs9%YhnP;?R(84N6G22CajVBZ*|q+)&&l4y0#D zw}zHDx*THse0~ z!~^ceCmzre!*T9YJ3n#z;BAjm6R>|%duX+SFIa5FfbG=gz@<|(=-;k*T=5e#&a=-2 zuCWJn1+EF~@5N+7vpe+)cXXvYM*rwaXT5~WF19Hwh+}eL1ZFN>hH`Qn9PT6Ua(+ng zD#2?6A%aZ=Hvza+Qh9<^1oxddGtyFj{kGE*rT$nJZRQ^fjpH%)y<)2jkN#&nZ{>itfkCe)jQh8D;PfF!UsXQr_C#CYF zRGyT|lTvw7Do;w~NvS+3l_#b0q_jTL%0I3A)5<@s{L{)mt^Cuoa@?=z= zjMhh=^6yjreagR2`S&URKIPx1{QH!DpYrch{(Z{7Px<#L|32m4r~LcgqCWano_>|5 zU*+jndHPkJewC+R<>^;>`c1Pgdp0sytbh zC#&*gRi3QMlT~@LS|2&(pHu!h<)2gjIpv>I{yF8JQ~o*SpHu!h<)2gjIpv>I{yF8J zdyD$Wt2}v?C$IA4Ri3=clUI52Do!YCj3(CKs z{0qvzp!^HUzo7gJ%DMU|(h@)T8`qRLZLd5S7eQROLWeUy}cN%@zQe@Xe5lz&P2my~}=`InS`N%@zQ ze@Xe5lz&P2mz00$PuGW0{lmnpJSJx4F)=F-4qg;J!NjaQCT8U^F)NRWS$Ry%%41?y z9uu?jn3$Ev#H>6fX5}$4TOUUC52N~rQT@ZH{(*xQEq~QNjOrgo^$(-^hf)0lr(bG( z<*)jOQT@ZH{$bui|1he57}YK{h+52N~rQT@ZH{$W)AFsgqT)jy2tA4c^Lqxy$Y z{llpKVO0Mxs(%>OKaA=hM)eP)`iD{d!>ImYRR1ule;Cz2jOrgo^$(-^hf)2*sQzKz zLjN$Te;Cz2jOrgo^$(-^hf)2*sQzJ8|1he57}YK{h+52N~rQT@ZH{$W)AFsgqT z)jy2tA4c^Lqxy$Y{llpKVO0Mxs(%>OKaA=hM)eP)`iD{d!>ImY-a`K{s(%>OKaA=h zM)eP)`iD{d!>ImYRR1ule;Cz2jOrgo^$(-^hf)2*sQzJ8|1he57}YK{h+52N~r zQT@ZH{$W)AFsgqT)jy2tA4c^Lqxy$Y{llpKVctUjFsgsxfK~h4ss3S9|1he57}YK{h+52N~rQT@ZH{$W)AFsgqT)jy2tA4c^Lqxy$Y{llpKVO0Mxs(%>OKaA=hM)eP) z`iD{d!>ImYRR1ule;Cz2jOrieE%Xng`iD{d!>ImYRR1ule;Cz2jOrgo^$(-^hf)2* zsQzJ8|1he57}YK{h+52N~rQT@ZH{$W)AFsgqT)jy2tA4c^Lqxy$Y{llpKVO0Mx zs(%>OKaA=h<}LIOqxy$Y{llpKVO0Mxs(%>OKaA=hM)eP)`iD{d!>ImYRR1ule;Cz2 zjOrgo^$(-^hf)2*sQzJ8|1he57}YK{h+52N~rQT@ZH{$W)AFsgqT)jy2tALcFe z52N~rQT@ZH{(-}q?enAhhf)2*sQzJ8|1he57}YK{h+52N~rQT@ZH{$W)AFsgqT z)jy2tA4c^Lqxy$Y{llpKVO0Mxs(%>OKaA=hM)eP)`iD{d!>ImY{)7JElk#R&|H!KT zkyZU8tNKS)^^dITA6eBuvZ{Y%RsYDU{*hJvBdhvHR`rjp>K|FvKeDQSWL5vjs{WBx z{UfXTM^^QZtm+?G)jzVTe`Hnv$g2L4RsAEY^B-B&KeF*cPj?Ebi?P;)!@ppg!taW@p;jKTd$%3H$#6 zA-L9HfABkjVff%m1h`*v_G6-o;8lXx2tov#2yOy!tEBP-s|fBxs@L&bKpxLH{}q`! z{|~|WmVWj*YrC%ymsmRc{lCbzoc%oZUSLija)eJoU{PR6;DEqEfn|ZK1y%$?7^E5! zh*E&A39JjeTwp_BQ=sMB78;Tu{;0q;0y_fNqNdiP4!D^Y^+oc3yYE1e-seB{j|ww3 z0#W?S7>2XxSs2_qDUgN1>GT3wm>wSsgL@wZvM{`fl8}YL4Uhs^7~E+okcHtzm4qw| zZk!az!u0r97~D50kcGi@lLA?o9v=&X3oHe)FwXB#glqk$vhhhek7Ml`Tglmz&s_QbL=3+;ehK`N_@(emf!5if;!Da1=5UJCJ2h?hdV6yl{2FV(+96d~b_8k**)<`jM_5>H3kbAL;s$&LDq-cn0wd;u*vi=PC$Axhd6VHGlw{H zkQS9DWt+y?s5r@=fIQi*aW-t6EgNUk#@V*G&64oNbPqfeDxcn?25UkF(+9 zY>hFUk9a?GV2{9^#01QNodOpV6ObqSMVuWY&Ylrx*TB`t1Wdu+5oh;^ zvwy_dLE`Koadwe7`$&R)B*8wCU>`}ak0jVf66_-h_K^hpNFvt%ALUK3&m`Dq66`aH zICEehNwAM3*hdoVBMJ7A1p7!ro_l%l?UT`}ak0jVf z66_;ba3>%S_K^hpNP>MN!9J2m#k%r#Uvczg4(u}t_L&3+2(0WAFbDRL1p7#WeI&s? zl3*W6u#e#G0|KUCAAuRc?Ci_r;fRX@mbo-)X+neP`n;V7!uX>~;s%sjw;cd;`9L0c2Ic1|wg?H{{Ft zayehq*KGQHt9`3i`|`euujQ-z27Osywun21@n7^6Yd+9`ouaSi8_nV$1iWg#wr>>v zqeb6n+1F|Nifvz~>?`{$qk$%P4X^eM4}xRWR~_(GhkVtXuiErgTfS=DR~@u6Kmy7z zJOC+9h~T@{@%^tIj?WkLe6SaSQSTpB$*k{_jxU1WTZa2w-zyKg2>&zn`jG2OJ@Mz` ntU~xXhyHvV58?mgAO3utM-l#mpa1zduut->KF$AUW&ZyFkheXm literal 0 HcmV?d00001 diff --git a/go/mysql/icuregex/internal/icudata/uprops.icu b/go/mysql/icuregex/internal/icudata/uprops.icu new file mode 100644 index 0000000000000000000000000000000000000000..245db9a0584d2f5466573fb0a25b1d6cf24eca2c GIT binary patch literal 135656 zcmeF42e{Nk_wchN+1-?F*}nH8Dp*hy5mXcuQ53iqL_|eEL=jX(MO3h2L4hm2hz+dR zu>+!DL9q*01v`ShSL_YjHz%{_&ZKU`*Z2QD?(a$F%xNFn|?1c9GsjTVFeF@1fmAdEYYO-s8d!Zw3%^>d+y*!ie&0|jAWVql`D z+$Gu8e{Ari;2FWm!E1sy2k#2r7hDp2D%cWyIk+bHLGX*<_rYI-J~1LD#FW?|wiCA$ zyNKPz9mJkuAF;o9fH+hfDIP1HAWjs|7N?3Ai?hYc#B0PG#M{Mr;$m^B_^h~6TqV9M zeky(?{wV$;3Q|~#OBtz+)F`!=I!axo-ck=~Z|MMOdT^w4j5Jg_O*%uGES)G_CS4`n zC{35{l^&2DlkSjSlwOzKk(NncOFv3~NS{brC>?4HiJ^|6Z9_YS+J~A#`-Kh;?G`#J zG&Xc{=&;cFq3NN^LT85Z(lw!*L(_wIg%*Y$Cgqc%=R&W9-Uz)H`ZV+nsjm`$2?^nF zI36A;whiaPt4M#XY(})5!d=4MN!g1`eZu|22NC~~;_&cj#|UeNW5XvIerJRyho^@x z3tvNOH;3;E-xq$0NY95?gkKLYA=2vb+VK0~&%-|v`Ok1LqDE4YRbsoymXR)zhRE)b z>A}4s`$i6obdQXR97-%>$#h2K2Y1LTqNvGVEi+46XKs)dW? z*|HkBTE0oXL!KuuCi0{5Bk~IQB_gSjXN);WYUDlnGx?2DeJ!U4+Yj>ZL=#jLh5E8N zwx=so<$|&ynVV4~n=74^F3OIj$dO_{rI#{5p-VYf(ZmtTDsi+jK{=k#MVzEeQD!L9 zgR>0(>rCz4s@$b4QSMXn(kk&O1-9h#%B#v7Bz~3nHkqrDkCX@Guaxge)K|)HrdaH8 zK&9JSDV5eOU%sBGIx(*17 zH&m5pZdKmpmty^U%&WPF_thM^ewX6GQa>L3AL$P+-81S7>Pw|Qz41TRw->^ntfwBm zqpFdw)yd?1#LN%s@9L7Ess%OBr?dubOKnMT8?BqRi?*lMPTN;IkXR1Yj?>0zqqI}C zj?y{WRPBZEY0_-%V(ofjyG@&~JwVFGv}M|h+8f$?+DF<~nkH%_-;YLCbBq7h{#^tA z7uLWZS|sX=P7l^cB3d8$=)e3gtpU2{ZWtxs1=Y=>9Z3mvHOe=_(}PXXoua!Lwf&=q zl&t2xeRy;<(T^e1xai5zGo$B6r$;XW8JL-pS(*{DJ+qUt3$yQL z+vX0+jn6I2O_G<&g3?o&r1X@RlIeXiQ8rT7_}2JFs5hvnd0+KwAotSF(H__Q(O%JW zqK`*^je=ieY`@qV-$XJ|ZfTWh{XtUyF4j4|#@B;Pz=`pm^1X)qJ(2$9-!<@maSd!s zuH)5%5UPh06ZJ$3p%WAJefrfOy)ycG^d0gkOpl?TM1PKc9gW8Rib}CetPpDx!xmm3 zSBExXFfJVZGy5oJg)U~ z|FYQh*sR!`*xK-15;HHhIJVS?T_vuJt%^Mx`!x1-?A_SUvA>8m77xW&iOF~&zEQkG zymK7-1{rrG(nzsaTod~d`v60a{ifw$q90-CkB*OvPc)>_(X-Sd{pH6 z_-*lf;?Kkv#UG8gkoubV%kdB5-^af&yw}D3iB6K7C?wh@HcKR1w;Q=Kn%E)HGeNG3 zCi=H(p^^xR^eokUu;{L>3L-TZEdE$}8 z`-ztlt4Z{CiOX~t6zUEX)4J)^6UM!mhh1u3^7lN#yP znt4~F{*c_aRdcXDTpy_)qo1gsrk|@1)tmK8^sDqa`ZygdLx^RrK3KnlSnkyy(x1?m z>-X!A=uhj<>ksI!>iZe_%zZG|x`-d?UBoZ-@AbD@+pJ`{D)m{X`%}CxImV}ClZIJW zi3Jx6^Ql(i_n(bO_K{-eREMft*)i2G)ys$)oEl(?J~}lnb!zII)U?!0q90+hL0{F# zl_^-`*D2SfZcN>(++ox}a~t{P=HAppsYg@Kq*k=XnYYgCt=?w6bM2kfCnbOSThQ03 zpHtBKUt|iU)imu7#HP|PFB)^`Tif(z>8;Y+5)aJ06R|xapYHHEQ{5KWEe%_spD`z& z-=rK|lKCz3_sNrtYk(sh+dMiw&QV``Z`RlK*@^49Uv5h@X+5`d!FtZ;)^DmGM$Q|( zJ^l`BvuPQJku&0dk|W94Z#~Q~jtbSir_J8j`Yh9WK`nfHF#E6X8f&R?3GT^QU(8L< zF~EFX$bBS?d3CQ}%C)7trF*q&KWEui)vo)TZCm+>=D#`FTeEA{ zZ-4k~u)bh_$2O@%HPnJuvxP;mSf-;{Ssinr#!vPLhy%nDK5};+*!els-6wYoAH{Nl z#P_7Kcs^G(GF>`4$5tU8TXpLTANP02qS1%yGY5t86v(?7aPvgFQ^{h?)}WEktaGYK z!aob{8_-{n;T0^cYdEUm3;OOO$343J1;fJj52|&WgSVCHwj}O>kcCEKzbh@VJ@AL4 zFMU^m*4nebr;hE{T2{K8UVcSxUxS|oynNTc%_}1wUH-n(X&)@VrRlqYo|(~kt=o^q z!5JOAiiA8V|GxLXPI70B{A_MC&CT$0thy2tU=*wy=U1GZKOyW~m7e9o8gO3vEQt|F z&TQ-FEVfi_8Fb9x+^Un-?YtZNCOPJ>wwLw$!6WEII{R9XaZA-?t z7WAasSSoF6ss%Uu*5ftRtzBzdP&fCxk{aJPxAsdbw}Jw)#mQ7FmGqeUIQP}Li|)OF zTwsN*;I!|+*7fQW%^ddh@rtMIhe;1aA#7mnE7gJrNYbZVK}%@&BFFVsWLzPzBNEz=S_tS{&nePK&j zhtg?zVO`L6VH-QWz{9${=tok2c#x}{qZw+^&{GL&?ifzJHG+;lxc z4Sk)K7p|U|U*5B)*u_?DlRkb?^rr*Z}xu19FW&*|3*Jr zW`!oPS!@v%iO%7i3fiZ~Q@)RM?Sh6azuAtpa(1>h=$5AK!IIEAmUMYpf9X=uR+tv* z-pdOf?k|Xj9@x*&aXGNW<+GN~c3umuv;CW2UXbzKY+YVz<9m+l-fMa3{>{Im;T_I^ zSjfyYC}EGWM13_SWVe0}3t0A1(w74 zGYPD8IV)Ql@UP4h`s?HgI(rU7MS6lS=rJDQpoh3ls2=9!*EXy_^Jfyv59c8aJcov@ zLUB7srZG+9o3?}_Bu?<#7EDgtjb9?xM=!}(OfsWe z%6X3a4p`~65SGz7!aJXHj9E*s@bK%(yymE#t!wlI4STVE@)CPDz`K=im)6kk{QaJ_ zX+CpYkc&o zHLhEFZM$4nwiGC_JrpvbmX=UwI=dab%Gu2sz5Od`kI%>2=Cx*-`8fCA?Co5GNV@m% z`x=#%0Q`>H5>SFo!DjL-3PmKJy>g4%6l+N|rCKsg`Icf+o8}3SgZ0I^9r#4FLnG;& z-~Q!fh~W1JsOy$3?A8x=JsdmTt!Pmz@IhZz!cmmIn2wj|8lM(f9+72U53>1XwX!$)!^90j(2p4`8nMp zL9$N(aNgnlIgS0r?;A~0v(zFf<|eBc9pu=-Y3H)o_Zx?;u(>kfF zXKr5na@&2Nk4%d_HkZt9me9+rb*`Z=lupa2zypcM%YNjoUgO*YH!W^2x;^-Pmdg0P zqBZCv-QLdgP*r;teP5e1L4LRQJMKSpk21$WjrPuI{TimytehRs%gt-v-kRACHTq(E zHs5OHGPYF7H!!EIO4lFDg6~K6J~T&R&z#l^zdyjaCCf@>)Idh3c)lfwt#z3&4=x+s z6R<{?5SIf>?7LIHs1MERWXJZPC$?J284BNuNY1YrkcB^QqB1TSEz6yI zIolzYT3Ao1WX_K7bNs}@Ezg&@^-+h*WBzoZHR{WMe%hChYS>z+M{>*VZ~Y$Q?%`!& zeZr-#ro^=hFpMKYbw&_$&eyguj{s6h-kjEOs+dyV^`}*}8>lp6HQ?|9Vkfd-*tjz z(8)JB1bWaw?%hq1IhNL5X+o6i2i~2{dqE`DGeV2s{=Js<53bF*0c%ts-vsJ|W^tdp zzwhx6;!^3a-fo`mxxs?x7d|4fiS8vuviIQr!tR{)8!@n7fsL*!n6vjH+;a_i!Lzxm zvR%`5&0U&zP`WDJlpe~iOMa%OyF~FElN*JS2zvg_kG#wUKA_n^4Z)Z$iIW(V?^?`bz5%55Bm*+{*I#Ad>fT%jB$wcdoO=UF#Hq=W(@_=JQ_sOlKvY!@FmP zPe|%dKb4_|6Y4=jbu15@J(=zNa$^p9?gBOT!gI8yE+Dat;KTb-8NU^}l`JnvaJ|Xv zW&Uo-|CU4Z^Vjo0Q?BmTE7aTj0?Q0%s_bl)O016S2TFS77v^Y(65m5uvIz6_Hq^s; zHj}d?9&7dzwxZWMz(32hcBlyP&^ZdYc@Lf{Q=^Z zhx*CORk+?xuK?mVlp^sHg?=J#E53v{z7Dc(XwoF68lczEJNRY@@62jqd0+UQmd4|H zu)YI$Z!fvBIJ##v8+UFsNlx2@^}{TAwGB4R$$p(_2|49=(^-E5CEs}D7Fki6lxC%c zJVw2VJlVNLRkS9pS!>ahs9PSUZ;CcYTcS#=Db^foi7D}>cr$tKlagpkG$&dTY-278X~*0uSrTu%mv!=~URF07Y?0n%?LAfD62qGAJ)yV!l*(u99ph`B zvLsS0OYRwC@$FqfUrT#-YoxVad3&`-(l#cCdfV=Pl;rG2xQ*c(^@63_W*3+ zimAqz%R0HnIP918y-~?l##k(kBN*2w)H{`IoioYF4|L@{9F8Fs_5<5|ircHL>jHhs zm3)qJzG(05x$n`>b@n;wEMczlrSsX3Iji|x0&3;*;+b_hKeyil?JYWM@S9V$C7iXv z8K74j+`B=qE<67&XZ??p{TgdO5!&FtHCUH|{(4mDoStp+b89~LIxH#u-IHCz7L7At zuXTXmWW9XAhW9J7@24z+N%&0}{9&2VLZ3?kHShNDn>yq{SWe6aCA?^z|JLokcB~fI zZ}HnNmP}J_uSPrmS{)|eTqtP; z%|7N@QmBuFP#@;rwAQ#YRM4?+9OciCF$bKX)^e8G8ejHX2CQxLb}w@tK72gLw9e1n z^EzA1doS(}){=QKi}h2;IX`I2oz=0NZjyWMN-Xd%rE;%wc_7})r(Axk-7OcM|L~tm zPBMO;&=Rfyo3Fk*#qc`#jX)=N@nmdQlJUwiWTts(8{ms~{jkq7dfl0ifoB=&rC-Rv zk3J&?m!B=S_xfahZW3gKyNt|vDz6)%#@#wOvWLQ%C;aoB{k8|6y-qKBFA}rTwNvZ! z9@!mP>^)Xq!sj*8?Sos%X-%2WC!9RW>)_Me&C9&@-Mmm=sW#>`pOM z$F{pNPg&eFp=nanl%^R?vzq2K&25_3w77Xf^Q7h}%`=*3HP30D+dQv%F^!<-Yy6z< z9hlGQ*9H53rpO>WFc|7AWsWj;Ik|tSgu0xJv;dGhnWo6I5aW&|a<3Bkw;@#Te?t<*Dq_dugkmi>oQFwcTpwCT~o8B%_33=@1Y`ft~PA~xl1ZUdsqcxQuRj zt$Osg=Cdw0`eA9cTkGg&F3sl%W*eFmEmK-%w9IOm(=xYZUd!T^?V7r_bZPF=vIlu4 z@;>mH($w3Y-CKigw+w6Q0-m_W`QO#~qbc||wc$Vc%%8k?e;n=+O1GC|=5tjz{RxD9pNeNkLKFrTweW@e^??cvDqPFc$v(UYWWrUvLi|v>E8gn{2 zuy`t&pQd89I@f8d=9Zj2owX2;`wwn^XHTmZm$$Zf77Y^i->hXda}Sx`S+e$&9zUIZ zowU}^5$C#7B-L3zZ=CDqc)Hh+fA^CkGWH+eKW5MJpTGEi_I`SR?Rd}yj!VruIKMmT zyN^DV75eA(!v0rZ;&z7fJL3q}299vnS0#CI-I+`JzHMrQ`*4{cizX|hUPjPwZHE*Z&Lx07h z{uu3Dnzt&p0DB`lvb&vG(C3geO~Ad&T)MT(w@CLWy2WrQp@h#w7$U#=N;%z`%~K- zTXZY!y@q~LEnONqhj=(EwZ5GC&-=KwOZDc~A{)W&0E?7k&rFt-#(B_^AP{&=>Q? zR%tHGY_?-N%vw&OZDVWImrnVy7xvP81cA2UD2h0>?(}}bGk)(g9h!}{;nYjiRl7ex z9RJRTr8Ix`TdmZ?s-<;0w=X&N&aH$9`}#F&VBfI5oYp7rV{Wg~cQGHu_km`{^#eWT zJyFK@KfK3rUBP;R$$I?77$|G$9gT-ppjFsADtlS`VU2>{tIJ`HVfCz_JWrl{+oG&g zmMbfoTAEfW%gA%Mw{P06rEAl&mR%Hb9TI-mYU!>#Z*7G>XMy}q)pdCnj`?v)E{G~V3{^IJVo?_IJecR%I>l9M0k>8L_*wTuF~T3+U(HDoYr z%GY2E(d|rq`Mn2whb8V0SV9EKe2((1@{yH!SGRUpe&_dlxonlbU)}Olat_ISn#o-Z z=1eeNt3UYX1h&i4v?#k<+ec`K{Yr-{*Ln&F(4(31Tb4Vale z?*MDGbY2?%{~^x3$1LU+VVhh1ts1u$F{5*7ZHKi2`RR-4sJC+_sx|w1*Szw$*}OC2 zKXqud(^@MR4^PZ;YujCKjw#=U)x#X`-VRvIzSd0EI?ZK`q5ngw=O&0sf~GP2|nf*?(=3p`fNh*^lBfrGVJ%5 zqn6U#8>gCmnM%Jm)933?Xqwn`w%w;#8qF$itdr&mEiS)%;q&=zzpz(uuPfX!xfQ$?}b2hBBvVQu74zZN4jAT6^D3%U2pdz^#8yYmC>y6PmnUxc|8CiRj^6(m0sI z3$MvoeOY$&;rqnCC((HHfh;tVYEW-2F^c-KDAo%n3ua+nC^2u;*h|)bYU5*g*&d1h zGy@(tttIBS>YeN0?QB79IeUxW=^1xTB7N#3W^pgEzvX-4p2O>#fLD~44f|#HK?^Ox z?)Bp1z5Lv4e0=3H`^Qk_G1SxA5BGTLS-agW(%gf}yv#9t%^J_!K(^YU&cCd7-UhPO z?o>xVYb;-Piu2Zax9x2;XU6_fE$tOAo9$q+zswn(YUUWThuOy0%K4e?ZZ-F{L5`|r z|99wiECzjV%ReW*e66;6V#%|S>S!5zVt&?-u{>p-_v7_EpbG!}S!=&*i>ob)Y*_=3 z=xVnfzgJf;;}&x(%RQq9uk_Vg;(rCJ?>UVye^0R2%H^%+U(bB*&PRMfm0Ln(+JK&%@t^*Msj zEpuZ2wESjzw@6!kTzXu3e7d{dL+_+_*1PCeX0Fcc8o4^tH?mJ;P~`f|5F(F=93APV z-j=y7GcNK#=D`Y>pE))1c;@lUIgx{v!<3_x6O@-S%QMp=Gb2|<-pqWMxgoMTb9leIIo^R+!=ePZ46JLl)B^VECti}HzLkLaWM z@6>hbGx?w5mnSxjU8EhA+b?%m?%>?7xo>k1&NSFM?Q{B(AP$OicHd{=riN{TY3Q{<{9Q{;|GR|4IK- z_oV`#Odq?vZ~i2W+S=%k8Qr^0U;<)olnX^1l2f`E%7NYDWG& z^LwU^9LzE{k-w1!r%z9BC7+kxPQEaGY5MB)&hl>ZP3b$*ePlJeVLqYum!*uBxi_88 zG-Mu1Kbl@eSe%U78pviR<|gLePyfrmYvA8C@b4P3{iO zS_9vv&HR-9GuB$+3#$CA$`kC!LNljH@d2U3rvrpU`u>j*cbZcn|K`Zo1l%AbBC zb!BRX{9bC7{8?&_e3Lv!mD6|13*}@wmu^fyEI%iYNFSZvJ3S!%iu|Vhq})AI8@p#H zTyRMcc+9S!+d^)B@* zwIjK1aI@B!tc(L=Lz6S(FU7A(z7=oOfh%H*Vn4@*>O=MIrFCxoeBM#Z6?$pA#JXlbls}h+_-o2%xzp6J*e}~7+bcUY_qOt}(j~ED z;@F0vv7rsWkn5S0=?zxq#TLhw#?Zc2`Bqt`z1r|gObCU=nei62pV6kj;jd^Y*4TJ} zIz&A}y|i#&Ls-15VP)*SVDV(W^ZM8A$c8!I%lY1pKp zXZ(`btXMnZeP#TH_!$jzVkgB;i=9hG62bo8-grY}yTq1>KN@~)7#&A#Sp2AAvv{ZY z4)MDhZf$H)682PKCYJ^do_b>ip5 z+l{X^o}750@#)6yp|=`xrdWtt9e*eONqlWQnn)STJvcEeab)7!#7&9ajjXIkAU({j zn?9j^+U$@Vmam1w^DnEb4UWkJ@2fjDGWt8TZ0-lEZCotJ5z$X$2N@ZMgtm`8-|$pJ zI3AB5QAHb@v}x1kxzds+8cY15)CleRvF3$a`d`{le zl-slOv$C=?R+VnKu zb4*jebWH_etO@k8Rn3F8{iX`I&hVEoj^?bJH1ktqJzP&R{e zJKlHIhN!D+~CqdvDzm?_wm;KKZR@y99PpJPE`Z2UR z^kV4Q&{Ls@L$8M(54}KYUy}RYUJ3m|?ty#2fpN|{ERWC8r7S1LnXdG zMmkbDitwqQawL(U4jR;>j(QAjjQ}68{p=qn{p?>$O6TbZ|8c}_^|97z>;wsXLFVnp zNvDx}I59tZ&5D2x~qc@b_`q%lTfE0i^lc}v$>-+(uP4EkpUTQ!M2xMaONZ@SAYg*@r>^MQcQ0B`&{4^)% zK977J`6}{Bgtu2JLmW(>M?N&ldz3{g$XG|awOphE_iMld1e`;;F8W6akjzsoaGn$k zcu~&o6&njAJ#+K1-$alH)d$s016JAuc#45yAV-)c<%oW|v>}-QD8X-r)XuPj4{(7* zNfRj-Xs@vB=#dNj8u^vnS^GVimZ;^pMEYmBT)JFpiokrI$YzR_G3vRRnMb0O$D-$x zJCx7YW=pfBEtG|-8*^hf$6(qv&^-WUM2HBz0zwSv5cnenbzql3$H0DpPGkZi0-g2? z^a)%il_MgUJs?A$R<0)Zp?lLiV79Z^xv_zPfq?^!J`D^s1%8h#R_;^wAtgU`3v3r~ zn;^a^&^s_nIYL>WJfSXCpH^Z1u=c2dBHtk$9yl^^xB-@+QDmsim8{%h(^x(D`VEqm zn{67a2h>IRW@%_(cwndjmY@-3s2yI#bR>y#@*f;HFfusOov?4>ugG7KN67DTVc9Rk zAO=?S;7hCCT32L|Y&RN<f#Hpj&LG_xpjdhB_v~j#` z+-sVWn4CC4AFFSd-ZsrosxTo?J*uDzrv;8OIF8K61a{4j3)Ig^B+r?Fs0{hZ&|~R_ zR7^NCQjTXecaO6Jb|NvVn@()!8TJ{0vm-6q3tEe29B}Ai$CzIds0GZjd!9y53tSLD zow;12QZ`5h((_}$-Go=PXTq$|Z&zt}amS!ufStFqiF-#Bgs=$XTZRynj1+=u~qVP%Jlfat5TN;gH z^H)omio#ccuZaAW(MN~}pCco7pkMD1-fjitVTl`Z*X8!i7lk&$C+atW&($`| zq6a{OC9|3(b0qlx9{7u!(+uAe&94FA8v^D>8OonZ+9ffx((*5o?JuJrkd^1Ch#km8 z`?n<4FVA~Zc|*Y;^aqvThC(#R^Sv5v)md%4AM-*j_2y$X6gCt@l3g)?_9(1vu!Ko_ z5CwC0qQURUe*nHx!DCFs&PJ(k3>(YPK6`Txm}}5ZNCevw+6kKk&9oD!4okC@)vS>i z^^=M|kRe5~CkSngc_O$p2HLj4jY-SxgpEm=H6*A}?kj*l);14r-rC2dkN#?a#LYbVawnS!A*r7$Q%JJYG}EW;e%SxQ9biy64=laGeY^UMr+gx?RE~r z)Vbsj63AVHz}ANLi5PlAxn;?d*KI2N932<2a$E$Mr~RrTzY~eYsp)<}VDJ3iK{Fi$ zTF26CWi@AeEX{j#5IP9Y#dZz?w7q`84g%D6EtNY5cPA1LSRv}U*k)O#hpbP>o{oWL z&!8RV2~xhWEN14TY%)D4I4IaZKO|_Tqd@CenyswnZ0EHdg^t2ss$V0m1P_L`$TTR} zQGnXPq=b3@;QmD7=}&Zg9Wv6aG4cY-NfA38g=8#gSk19myCauQj}DFwE-s7>n&~9a zI+kWD)pHCPerBtV|g!A`<4L109%pZ;mKlR)_*3mh8!G|RI^7V@l!L3Af!WYEq_ zF$Rr%HMWXlTj8O?&hpOkw!-dmdA1dR@_y9A$rE(;95n9^h6UuVq(?i;T@9eNtpN31 zjcHd>f`&mJ)L{dx5VMoXlf`T&Obyn@coKhpa6HGU!R-Xf3BmugGBtQ%@Dz^mL_Q;U zt^<>TlUg%irfSP>!nMI`gD~$VY)}9h=2YIGzyJ$cQReG314Oed)Ndw@R;9`!LpBpqv*uFLHq~T@aZ;q!~m;<{SEHk!g|Xk;}<^T5CV{*Y`G{o)CXtWKa1# z(m#x)I;goxd&&U%({fMw0+I*lF7%iC%X@S5t_VTVEV;5nI)Gh)-<=cbaoFVgkr=2195>V4iU^3M~o$2iMa`BkiT9%8eq+LEI z>m#paa=G@vUJ0-U2I+(JFSW0=Q^@~w$CsEyd0pODd4p7w>= zpCY-RB0OGd!)kw=SkMOjHrodZW;^za<)?OxgACxq?9Y<62MWu{fKh-FKT!BNMR`5Q z*hqXw87vGI-jg;Gknq`kPk*k0U5+(H~I{H7rX>t-P9 zU}0z(`igGsAyWE^zsY~fA1PlDJ}u#krel=762k>ACW=5vX%kDtDR(5d&u*VRT%af+ zN;JfnEKU~BDRHNW=!&i!DOAfnq7z36m9mvmJ!1rn*`~tiZIm&BGhLL*IZm*1lCT?j zcA<6Z5$;K}l}tT|pE;gtP>Z#m;Y-v@)RTmhgn^NPkv4=q!hORt)CmG|NVqn3Oce6I z!fC>=aCuG>HjQjbp0nuAsBo>!(q?IA3VSGSoGk!*Ddo6Yr5;e8Cs=tW;mw)|gzAc90~gjg&K{B6+$e$ky^0 z{TO|^P!2v4Gqy4Em5VGIZ5Bzp6zD;&q=S-zIv=Y%PjhJic7_k^Bjqz=-iY6s~Ofyz*4({|FfQb(y1(KwZ$ zah(4^31p;@6*NI5AqTR1c{qQs%0LrMK|8yk^-OK+sS9XUAEjXVo> zws33cd?We{GM^ecH8h@-mxV42og2D|NY^_sGxQ(g0o)oI9U55&vju?9P`YO3ZWeA98sogCoZN#v&%zAksGK>9JJ%>Ya|OPJ zGDM?XO*u!ID?sdA0ZKY&CAC<4hWVi2ZqXLizRrCYjN z`axk!4cMUslOGf)U}1KsZEb+|&Ki|=)^^sqIXJqbHT-xl5$dPI9f`z9#w3C`T0db0z0))b>x+MV9% zCAlTJo%2g_9|}KYPtltb-DBNj2Sf)%VHyza%W-Mq;si_=8FWc)llnwJ_Kogk&^HRw zK~?d&V8tB&xlnC=Sk#+CqISL#7>7j}-w4~Mz7Y^AFOGUMD(c1e!V%GW_+FS8J=WkS z0m`2QgldnCPAu^g@dU6$eYtW-en=j29vuZR`|;5eEgWokq5UV}H7>TVtuu zl&GC66IUjF6)4Qw90j0G=gfi<#y~kaYUhel)^nogL@zG!yFkG@mKU{pTpUFozT780 zFuh;;>fF!Sr8#6o{?PnLV`^V`%-}DfUhazm3kbIn7DVqU@npg815!FbZ-R{KQ9VJX zK|Th*SMEl}n-G0G4@J+=`MOm;RUfB%agzFJ^o{7t(T}3_{>{Ep|AO?2u;&TCn@j{0U#ew*`5s(iR5G3(pi_ z0&S&$|IqYasl8H@^zm_ITC6fIh+PnyPDuI8Tpp{O88OO@i5nAHA8<+R@N~88l`Z({ z=eihhRN9$F-_i8eo&3$G*j(;3~%K_f2bekO90!R2I z?!=#QUt&w26TijD0m4Ek!GNDUpoZnvJ}WdjWXYi$G2p8u?0{ZQWDHJ9)AONP=M9Nk z*&xx?*VVUCqE>eB0qqmz*vaR`7K!TF*@tYF*vf@neUxnym9e|encEVS1(x!*#4U+E zeS7-uN$}i9YA^x!5%%>7m=1lSg&)PY~2hX89)#t<+dC)FTd2?OTi!*%t7I+2}_9{&D^PHJ;V`X8IZ~btg zUOhi0f7HLz&-JfSt{+re1p^1Sp@dbP~-@jR=0ahdNjpO^iDw730F z-P``GenGl&pf^_ZRJ@g|$@qwr>(W~Gx-{@cc3N&)?g_$0g>J=e#aTWx)QiqB_UbhG z(|eZsU1wyxE)7|xCGHbuGKhf;MVjf461K00@bXW7vQyb&79;?V~((MYO6y|tuXG@iCj zqgYQ^-7K*h^TjrKja|mRV+Jh2lb7w0RKwzHla5XuZLD#(=yS*@ri|lL$XP@>kwEJw zr%q1Y>Ru*>6c9+ZPyj`6$~IZb{wfNtc2djHKlpG9nRhn=AISsd+x0 zXHs@}`|~L?_xj9iOupl6tpAm4t8YTs+?ci`y3Gmj{pX#O6Dw11q+o(MTsE_ha;c?&k;JLIDPx%lhE7hR~EuH+pvMl{X z`e`y&?@Jb*^_h9F^c)Sh^3`;uT$rb{_;@ZZIK%sR$tPzUvTvt1%D&+HByHveAJt)L zwz8Tv5~FyJOksoUCuD@g{uw*G--t{-eC8XQK}KgzXq8bnHgk*v%=)G86f*W{?R@P!x6J*BynRvz`Ob&H z(^0M^T%Vbl`O#-*R>sbczL~_E@uScC87sV%wLrDEXR75FAJ4s+^899c#?ibCg=$~O zcyT`&2l3g={UzWBO{ljJK=*9s;miY>M>8)H9w$6y@M30i)}3>*l(VvDWsM(5>CdD! z&x8I~GL_N#xCZ*rhSL1?j58rW#o5ZV=!Ye_3oyP3B#UlTqc6M0mniH|dOc#j`5cD~D4(frQ*mbrqAI#n(I$;bV1{}eJh=alTF z1S)}yQgaqOfQz$sF3Mg+#<&75%3foFT->jdd$N^~_k&m7kKCC>7LXCS7!CdfS(qqH zLXEj}w%nJuw7~aysjt!o9DN51zLX67=!9b^2{iZp2;pJ(=$YB@ArZ|VQRx1C1<>nr3VZ2 zENtedw8_~)TQOJ6b+@sF->xgC>6Y8n#1?+O-YwUGw6Y6f+gvBY7JfV14OGM(4cVOJ0^^^z{SFNCwc+`Lf(|L|#EyW$366&R@#eK^v4i8?v-0jh#BBviQ?c0(&>EFt zetX`TTl2tGHg3&dpT9PLQ~r?Bb1=P+Zf@3L{$YMsv(vw`B8o+Ue8y;i}@Gx zuMk!fj`s7sm9LCr{m#6buZ9ouUmE_$`;o8nJm2L{^qcuDU!EWHPW+Om{Fz5i=AI>c zK%p8=_ggulVCF3US$?yoTNvo%K*c$n{r zg%bwp&DFr(h`;p1q)02NF@LcMzjAW7L3jY=U zWRcE!J?NlL=Vpt!wyqEU;){-=*FFX;5yZb50k(>{(Jl@ zqtyd(^>Uk14cirY7WfzV?^J5%KL36GUz3%x*zZJ{%R!zOiyrbnA*pWo%I!g-jP0$C4nV@0}?ywJLxY4%p8z__~!!esVf4t0vT3@Uk)E0 zv9m1jU~ExrR~?bkha?V3s4+FRT7$X0&6h0wpmkomlfJY51o4999BX;%=h)z$x?N}1 z?5W?KyF0fkb|`t;r}tCwtk!3gXOxe%kG0prpJ|(=0QS_q+N67f!0`!WUa)?SO)%~^ z^3zu8nLiqQAXxA80LcM799$9+<6=Bardo1B98UNgFf7^gzNmzHEUostrMsp1(r3XX z(u0IW61f>&d@+a|q%4(w46c&aNV2$8`X%^F@Uvi042pM4aZwW&N*@GQ6W(g&-IDZ5 zutCflKPf*a{Tci%xJ-IldO~_t`ZD;E^pfNw8CFOujPhfW5eSLj1-}lyseC8@D1Tkz zWrgyF64ey#N9`BwHRTsF2cY(gwo>US^7IgQ6NiL)ic2H!MShWi_m%gRKQ+q4#OZ_~ z`VXl#=?w{8vh#EMiu;PA#LuO@#80Gs#UsSMh&+sFfMEnGeNXJ8#IMNwkdg-Udl3eZ zxNi*WUgA$id4PDV_^Sl=gG%v76M2|;ScxDR93`QJqCYF^2tO#_mRP6!Tw*QJ1f#iF zF&2%Vl>q*dewU6HPi*CP=`X_X(utxNa^`fAQu-On^wZACBIBIaC!0~}EO82f%9F*j z4CyRV0-z0t!XcEcu^2VE^t?1BbdCrU4`ji7nCC2#mFi^0${&g|iBPi$e`UHzY$l+@ z>Y%$)yuz@72lYZ<@Tc-c#1AGU8@ia(E*39tjiDBrfwn|t+A_}hmYFX~_A*uKJGKiR zAUBH_iK$SE^a(SYOUwjz@aIdknM=B)klk5NwU7re`&et%99Dbzx>Gw$cd607pt^I7 zs43TojN8OpM3|W78nF=KYuAVygf|GI=0@>GlkXg{eR!i*D|*#hYKo+iLlxo9*5?G) zCT$$Dvp_sft&FMKmZ8eHPh9MXTQqvR*-^iB$eV}6heY&F zAv{?Ed_dm8@CC~wqMNQW$@V0%f=_4SkA5`DdES+@4ijq;TB8}rO;RRQ<3&#B)M*4& zeOaWe6#Iqt3@sOzi#r)}kai7$ypl-Ih%>dBT3Su3dNi**FFr5s9D0pR&l}d2#+-UW z97NC(C0eICXz%%w$8vFR(gIA#W?^U0ZZ21n*nXim$b6^JuA!Br@4FKTdcBjO?-wGM z-<6zdrD$m4(PyG>ik0z>_^}SWBbw=u{zCjg{DMGjKakpI`e*vj;?Lrz;#xAn{GHZ` zMT3@M&kVM;MjZ13Y5x#kr&(Ao$is4BpWjZ>n2$utXni#CdtdyP^cH&fskouqMs=I| zhwRWL;(KD2Ss%=LTKibH-GJYEW4`H<4tbiP8Fjc&}j7s`!VEMk8WT)mycjWg4_p?>z4jzFQp--YaaK_6qMyw3SZ~&wKAh>Xdy;dTO=K(Ti^%b+lJl z+n4ll0Lew!Fw!n^K=`2W4!UoiZF9u8pg-tF-0OBf=dc&RnKZjt$qt zg2Hx@avUFKT&q#KMG$7IU4on^hU>$+6eox4=lN*m^hi)9hU@3_@aZHAonUUqYKI8s z`BCbll+&+7yv}*As$AQsQTK1ze^gh$R$COAUE&g> zyu+AZ9=Rbhx0O30mqe~3698itQHjp)j@aStD8pz+5DnAKk*keqVPs+Ci8^^YQXk7A z_3=`qcAkqo7kRY~Rzz0R(K_vSJ?%s6s?zuZIL<(&Wvy|e<Eij9y5 zl^99NPh!9z`Cxg3JVgE~_A3E2f5i?{?2MJi%7>Pqndy9_47mo8_9&2nx7Q+{VH&Tg z((#+jQTAH-Je7{xOWokoAEM)x=w$?0+VDFT(;=C{kY zlhL8BHvuC@x%pSBXX7uuAiq%MSWYa~Dt%mYCVDlT&Ev+^aaMPh*q|T{N`2tSH?@|P zk9Oni*;A>GFLj_}5!p?tpH4+PZllK7^?hAOn6dl1Yha9-t5qCp#(hj0H;?0$dgzp% zs4zCsHqlO2s%K+uV{LpHSSz(jm5+Ym?Afd!=O`B`mm1J9FD_Olw{n(ZhWf$0-1r!k z*r5lkWtO|z{21xpCN3+K?q20AFy6-;#XNR{^Jo)RtvSYdWC$;(fed$e-SkJPfSa|v{>W17b&mJSKS6(N9D6)P@)gDwVXx=1kvT>#rSLVs8!EUuz$AW+RK9 z<+Mwz6}xQ+X%}F6z%6*g6i3)as*_(O&qNa_5ER?A485 zV)wpKtBoU zyrokP)q!{Q5rlX3c4^9{X<%)ZvM`57b$T?n&pXGx8B9hY0mhI~NG3Q;Fg6M3D`jXY zAI(a~I2l8-pB+1sOu(6j1T{L1j01BLL&$tcoa)Yu4IwgcXnX{*%}JaWJ1=%hd~Dop z8tY+$m}wP`OXB7?JGMS3KQ;*+N2K;STuc8c_B-J(L!vU&spc=^dON-54jxW?LNd_n z@-&XlKPSGw$G&&q=)}>9QHdiG-^9L&WlN8Tq-S^YlZ(iA6UzML2l^s%#s`{32-Y;T2qnbkVS>9~4$-m-Q4ljPD8)I#U|DyX~_)B0Me<1 zN%8aRV3Lu8=ArVr@d2rQ36S*+2M$Ovt}2Z)Q)|gajHyhzH4fZv%(bTzX6z zrg4OQvj?SMRGkm%0LSIAmfyu0pT&_cz?ALDc!9{2gzaXTzydC59jPqp>-Wyh=W z_SmX4-pp<;u4I%CMoyo=XC4$RiOZq)$tqNq~`Kyl6+P?F_mlyOqjLMCxYfH}#D5 z%8Rz9(ug&+mWL;6Wq1;(bW9mHztfWSadz_T$7@+QkQdZEzg!<}AniXT7PCBtH3CvAS%D=9^lzd%x z0`xDD^U?Xqk^0?aqTHQa*m`aXXQ&H}bJaDaGuB09f--3xKnoX<^B539)=GiM(j(-3m*8t#!IAholZkZDq8+vaniT z?T}+d5+YaYuU7mNgkN1e;e6UU^-T>Y=hnm1>#}3>Q1;CNyBX67`LTIAZY7Lexmf|n zrm3}jn2Zneu>NSxu+e3xRPCrQR{zRIEphkyP5(vTIA1%#RHgXHz@5w&Whik%YpB(a zTF@8mdD~Dv9-&$ zfim_9dp12i-RL3M$={_3KGHXuBb1t*o^4oZpJ8ra9;(6o?j>#6M>0U$G~>%z#67MO z3#`G*)6jeHrpsZSm(OF3aIVulY%NnSY}MQfcrj12&ASF$fdcDmWp-GK0{bRxv+3ze zTG!&r>_M&G7-cPC?K5DTLG5y~ed!hk%>_B;0X8~O9rZ^$)-Esgh4zTwk3UVxWk3%o z`3d6CMss}1)u|`5Gs)UsC>zSYM$l3KwNe@8=V!cW{aMoKr^w#giK>&YN!5qJtF&eY=6 zBXzQv4ZPLMHgcvUdWq^JbsBTbZZ_ik-yl6WkQ!A%t z0Pjytv-QFZFx3DpSCMgEy!cOs=Xx?8%w^mNFEc%3z4D^%)=V|rl(ACnC^1(57d*## zDQ|Tehol;>e(`OOrdS)qZcB8En6QCRWSO9mro_5pLqGOkwEH^*eE3K zS-3x(Fir~sqrLF(F*8U8 z^uuyS4#t0BErK*X7c2GQ(gGhJ<9e`pp0>iz8=Q4j`2}W zf{{UvGe!o1(Ls=MQRVDzqi3;PVV1wjy`6h6_hIfQ z!e#kSa-Wn&p};KFmx+8O{GRjY|H@IK`BbZ{kZ9ENWhFVOPwiJ^X(YanXvx3xDwDmR z23+1+?z#Bo?9QWiRIY{bFzUf=Jd8^9ALr}ivwVGgm#-B%4hG#zx|4Cc_%XVmPBr*f z6B!=AM2_q*htCc4?pxvbFK&KpB#7Fia-$X8Q@DqWX8@CxvkHLQcm#pp$q7zQ zaxGIC*1EZpX>(IeAk~-VEf@I^3Mo zcR+c#Z`2kjll9z@leaPCwlE*nnUi-kWMejJCr<9p$vbm$Pfp&IlXvIjKAhZ_llyUU ze@@z zoO~T8U(d-ma`H`_d^0EC!pXOC@?1{7jgxOjnH=}-;N&||ChO-ePQIIy@8RV6oV69%B24vaq`EU{0S$2%E_N`^5>lV1t)*W$zO5uT2B6&lfUKU?>YHL zPX39Lf9B+0IQdsjUWYPS-rrCr%lkVg|G~+Ba@ zA5F_HZDkTHzEtf*-dbBVl-Bj7wNHQsu&oA#h*W8+_)<%kQ8BbynxP%Mjr zUH7e9y6$$nZg;!C^E#u%ink}mST0PA zvFxb$qKYr6c&_40E8bc0Wfjj?Y@^DhRlDWtim$EsO%?xW#b2rTk1Dox#pcF}FD`72 zO}J9eupxrs(V%C$3(wt{d2j?9kaUiG28q$B+Je!xyUtKxUEZv$?R2Mu&1TTUq{~Oh zx3Vq!TK?l^O8O1-3(wlrny^E;*li}cb!#1PH}uVQ#B3{O+pN>iI<%AZi=B?!AF6C- z?wbum-nDMWdYP>&9$H!JK2V1&ee!i7Kh<^MgWG|8gX@pj#5iKuOd`r)x8JyZ%G=>) zJlORcv|Z~uh_m-qc-GddEn=S8{iJ-zI4@+iP3*Bs?6FGh@k!i>dzxRD2hZBbDewpW zd%tAaPwe&+yUoOIbIqvQB8HhU-pqD>uxwH;9}>GwGUMDQBc5Y+U$;4J9qiAxou!$> zMwXwM(N|`~*11ES0omup$3-3Y;mR92uNtR8rY?5WO@M1%j{lp9CD>(W}?0t<-l)lL7i>$uLvKbt961$xZ8;c#X?2u)L zEIYwr=iJjuTe8}c)t0RB2FE;-WizplH@LE|`Rz}ZzR0pqmYu|ICphdR_VLbaDs~d@ zL;jOxlWgH9S@wg&=BCZX4q0u<(ogLE1V`I5PcLoBYD-qXWZ6mVc7nr>O~DRhyVRWL z%zQo=JmWbm-dC~a!u4bQ4vcNpFS%*gzM{D!?D$$U@hKHItCXK{X6cu#e#x>)md)U> zleh!>WZ4gS*dIF^_T2}}UEx{Vc#h;l{Z8WE&}HIh*E&y~7dqVU#N6+)95wuW=x9$# z%zYut7ej}6i2Cnx@~qi5u-{AEeQ)9R$tL7yWO;*_H?Q4I9p_HPJR8ss@@)(DGH}o1 zCdT~wIYIVqhI=dZU49_T%@)+J%NH|u#Hibl52!y3US})hID16fd=8-x(Dyx8oa`WH zc36vpCFIVX7OMh`aSeLQH^`rnfybnQobkE-Q={bGi1}V)7r5NI>wR3}4Pr>L-1$T~ z#tZ$B7hdmU&+=fL9I_o}+SwlcGCrA?8WVSHsc9~R4mxl@oqIiBJMei&YKy-Vb1uZR z4|d-;Uz8(n-5BK+BY%dl?^6!$Ah!!6uUwxxIBU5bFwbLRKO5LZ;m@#P7Bcs2%9%EC z-V^gomE~xAG20>^;2Ri!p1{qC@=3d-(h*Zf%s6D77p&+nHO?3*?1Sg3ehW{!Jo}LO zfE79B^wO&B&WbN1ch`6v=99c;SiU^T!B?D=CtaR>*j4Ei?po&6igAW7@`~HTE|<-& zT}hkB4fqxuG0(hQ$?+_Z<-ODv`slaFCkroEyjpR4c+DdERX&{NoX&e-_|_f5TvHC6 z2IsfLor;IRQ`X1@dMu+H&u}TKi!z>+08= zj&rg}mQ68jifL2qHgCBeZ4qw%)9FlfX>-903|W_JN4&umo%#Gy^J2)yP?N_cXFk_F zz_S}|VvbOIGb86S`?FqWqF1#IUc$2;+9LiNx*PGV7tH51G54=5M@?8$>4>Q#j_1Lo zQ}p@lMxEn)-kIu#4%-eTeXQ1DaEgOCp5q|rx%#xw;q%&Q)~tnGbC=l9REd{np24$H zv0vDqcZxi*`;$1Hi9&~UZyPwpFf$@4j{Q+OZK8N<3#Etcv%`aD5UxVVNVjH`zL%FvdlRmfs4-M4T z*Y!ehC-bw|i=W5rK#sLz7|TYnk1_EY@Mg)^j3Ex+7y75VJTc@GS3|z$>9vf*wa)Ot zV?qwOT^DV~J?3J@Gs|H=Gv)$}JrUPW-=^GaLTVcvI==2YzOLD}9Uganb@VmnePyoY zXq)B8rOe1Bu=hLW{eqF(-7&8}iBFLCs81cQMbPQ7ZS-~CiFU@u9%+-Eu`#Tdv9ST| zn=Kc{dX#+3*VJa{Smc!rHc%h(-S+B=?>QSf=*z5U$F54eYLC5UH_o4|^Nq8|Z4@yn zyg<48KWcYT4jvnA@3beNZ?o;1x6GW^ka6xBMZP_D)iqj2u#*{fz_5>X5%S=W!zSdg z4;?US#hv!0XbbmN6GJCB&O*J$b%IwXos<2-3(XU5hxz{@EBrj-0J+xY6E6dIao;}H zYaV;~HH-;j)jR<N_qFX!}(Xs!7yZWA~9+K18&w^Yr zIbZB&c(U}#H!4=tIn;S`(vd&ZArGQ0d9mUH6)$J@`>4`(p>c-G$jQ(NPC7AO>hq5D z`=f2Q<^6ofkxLkRV!!*vePGXH;?Ebj^XVZRu#a!Cet?Ysd8oPN2P^)eir)%W3~zZ` z#c%Dn9sYC6+X`>IW|VzB+5JwF`0L#@9lPGPLK*$;hYuYwo&meK?MVBtn9pTdjy*cd z!9zvghQZjQ;~5RCcOOHQ9PgyEeY|6u!w7iCMOMFwYdb_pB1e;@OLhA zF_vC1?euN>RLGS}WaST8`9@|Odeo2k4URa39mug>X2Pb$d!^a+urE7g^}F?q(l1%L zN>*QF-K!_|d-dR)GueOS&z#ABvg{OQo-4L}+p;D3aIhh>t(g9ZWrsTO$8cAlcrcq7 z{@`vr^P?w79L0k_A%kqSFEJN*bet#hw@q5wzb$KOW!%o#o5zI z=YSPnXq+4O2hSsvtF85uZv*Rr)BK)317@BaY{-fgS+OGb?&iA;vc_9@V1<`xC*lxt z#U$*j-cp}Apnly?#5Ux3R|r09*2m~~`%fBK`5e5m#_xnf2jfL-ow+V-k(_*RdGO<* zKT`CO&aD5mqgau3#!y@C7nJwz-pIS?ZH)hn_lvuCx9|7xekY%J6Y3lM_r7?io!2|= zv=fhD_m>XrIKv0rOHbn7IOhZIhg~Q7nmZ|XaW`9ZuBdod=I+JcliBYZY2WV~Q4hUs zyek>*sE#cf?vC=hEBuU2ykr#m9gGWSU9q1BCcZc1JjcEi-1^v8&77?X{j0z@hjefr z9(pC1F~`~X`&}Mq;|u;#k&|OCnhX9hhq;N_@i$O#IttG9z=KCBV=~YYr)EYvU29s*Fi2m<@JfDtuUVB zex$UuH#qCPm2-N>;mkFNJ6P&?411ZI%q{m>>S)hx>b7shSm3AMEgQKV9>O_?3_tnq z!{uc7w&XtW-4On8-{Rfb#G6JhP4hDGqt5XxgMDykjF;bF?DW3ePsC7iey5Rju&-r# z;aT6aB<~yck<56{m>FwoXS$1a?lEp#eckyNOzB-7bhJjOpWoBDZ!=n_J28!1r&AY% zeSY7v-R-;oT&FudWP0}mZh0=nn%&t;bF?GrLlKNWV~)g(xpRthX2kho>W4pjm{;5cz|5phJai z7_4(LYDLy5%z8q(zbip~7^EB{t2UB#{v>M+X2!h%Svey)zlp-RSN-zaq0G2Y$&BxF za=&DqSMlvlmXq1_3t4S({*j#LqTCj>4vc*& zzDXE#QhnIra((YNNA|km?{f#eq_Zo_4K=~_UB_!e;z{gN@|k;<81?Fnumj(GznFVF zbhO^d@}I0R2FHF%R@{PD_&n$wIrf;%?^nNm_Pdq-FjgZ!Rs321V#c%8diX;**lYMy zr9qgWI66e7EF6CS-D4M+?KsQETo(yQ_k-d7gCPlTS3|6_ko$Q9y8-va>1glf8&(p zi8tWx0?%s;rkp2JpU-qzAM4cZL|$bc;N4=_^t{D$f%;nVZ(SC8`}$3CKLd#I?(B_K+nd43pUela?RSbmZ<_Vqtc z9jpbM{hXEOowo3<17OV^S#8Ox55X&}56vCWS6^%I1WUj0zzQ!1vtN8~?E2(cTlWiz zF?R=x9kTo!yDRBT+m&~pjA!k8?txtXkTn+aQp@upS$-BCL|gUc^0c?w`(RW1;P?I_ zm~Go8IO-?a+oCoGuad9&CE8*AagOo!ZT&9?a~{Qf$5hGf{p>69GxNZT9Ph{=$8$7v z2KJHWYAM)sztUOQb_m`8%zj(HABN4y6EgBd_sOv7xgq_b zT8=!4d(PZ1-j9f}PBSAnwt1XKaQxfqXM;QztmWJmX8@P`TK2Pnc(vQ?1*diQ2Ff{K zShKmW!d$;Tr%@Ab=KO*W__~&Sp~2Z8_2uV;tdD&^`-Anq$5e~R$%pqrt~HCjrr60m z@cl?~^qYC$dz0kwAv5=^J)PvU%ZK1U)}8B&d&K-Zmir)QJ|MPYKaVBmzL|Bn9`_)# z@gGFvez%GFg3T%CWJk=lSzhbR+O59=ebpbb@`S8WPHjgXD!$$DtHlTBWRonLWYuTs z(}%*3*~k&JMYw%jWQ~`sHBMIi3-?2xa_miRNB+P4*T6SMtjHQKS@8*u_&BFAUi)#h z)!dQg4_S7|8l!V+`?gP%wq*HFR=;G;d&t9wS3F$WlGQdi>I_+Sf=k=vgMAWq)Yd*# zxN+9n{E1J4<^PX;rtnYxdeJ9K$3C0nt6lqzif#b zpf5a=-UpUG&J6OcXYYLktlS_g|H<;f899dWN}qQ6Y#X^ZW7~c++3(w1{wbJt4mM=j zDJ=O?%e|Fy+HXT&YmIWvOJd(!gE21{BiZYWIQ+S@xnnu`JTT`Nd!HOSyY2SRK`!6O z8l!WJcMv)nujH&_qhBa($!bfM{;R&2;&9l=>WeIW=hRooW4)7=&t%n&kYl{aH|I2# zEl1IpVn|MB50|??h!5q8)z3ekd^^~X>6=(~Tpqr)p(8ug_j(JPvF4X@jgj1Kd9Dt5jL|ubapP}mEWRGaz8-)2OOPv`&MBUhYc77_%SDGQ`Oo}L zk`K%|wWVA-Klen^=^0t_cmHmYlO=!cG04T8m7?>`C&5~4WUV!_a)Yc`ku@%|Vig?m zN$k%Z&M8)uqagUt1z-Jp#h=1=RPt9g_Eh+}25TJOt}~wec6}EaHN@XBC0?zV-*US? zboec|7(b7*JM8#-6T2<71wU2E#r#HH%x~1ir+QoG2KA?lZ`8%7Rt)(hzISEJ`Mv8d zaQHKCS0)~x$9}Xo?MHYg0@i)PZAE9}V_?i3?L196?^I39d(^`7lZ9>KD{eF1Yv23m zAm_kgXFppawhxe7<~dn;^6^Ul;fjC%^JuFaD7@Hwt&*=+{I_3(zRq3vPLFNo5tmL+TNC3-TcHFyt{W>z++M^ljmvfn|rRagk-y zIr$Uv*uTi?D>&>+pL^)-|Ge~Ne*mV<)vg^+j0iu~k(_mateSwhAr4Uo$f^lRj`w1t zuJ7~i-@S|R-egx|$geyZw@2~4ist2c_&^=Re0&~tKt3RU3eQje3+dzi?B>K{X!{4L zE&6>fG5XD1%iE*&he-$W%#eSzl4ov@PW%z%syWA=zJ|VGz0c!!Q0&9l;q#K;+cV%{poxStbLZ8 z-YL60)wDmheefJ{gT6E5Lx{;c{scN&3yJ-EU9$Wkv+W?{I_FTXwGbR@!5LZ2wQ$>? z7W-t`B&%(3v?Z&~J10B8{AXn>*Z+CpH~a-y<0Z>xaM&d4Ir_u@viL(*ObQPyIMzE^ zHpx1#hCJ*i_S){8e170xp)bXXthu=LU%Nc!;v@g2_(Rq_lGTbK{6W%ijY`=}LJzwp58 z#{Q@q!6WpI_ksJz$sq^t@b6eqH?nVqc}JK1#67u~_vCH|>wVyN=2?z5u!~x<8#0u=h#6+O_8r_pIVdeuBa{S54E3C&nc4x!{b!o;9{#(d3M`YIF-uS%nE1qQ8aZY{R{SCC099ym8*0X!Q36>qQ`XZ|)B=+|>WYvd| zM|~ixu72!W#U@!c$@0?~zHz^xTz!#cKjdLw`mFi){`+E|Ec@geTgHd1bxoH2;IN4w z%i0eQ@E@!-XT}G2-7jYCr`P?5q|>(}{}HTy$r?-H#n4gA-}Rr0K3V#OUohv0$$%XG z^zE1ab7@OfTe9qb;=e#g^GH@4$kGpveTS@=1V>DqQ_kG~SFouVlGT@U(%JG~p~L+V z>(^QOWY@>~CF`!^1Apy0h&d9RESv9tp~%USzwU1!*SN@%+kY$i6*q-%ZGPZ8(9s-` z)vq)BXYC4k#DpySWaYE;nR|!+d+C=f{ZIZ6$i?sZ+oIzfef5lT>3r(%prdh-`nAq28aLlRN(*LdhtN3Q$E&R~`F8+|^4_W?@6~o|&8(BUi zb{~SnhY$XJ=_@$ek{Od}E8MRbXX)ogl`}l^i+ScR@~>7pncJhyUoJ|gXG8zf+20S| zYlk0gb8a+$c_{OmZywL{LRK8e8Y7+s+BDZVV`Rp;BJ*%B@w64jShhnRbK2nd;fQgk zy%(&$@V$DL<2>Lz8wkZ4$j8X>G}DyO?wboK9jYk3NMDf?6`bf=XUHD**CIuP9|eQD3>RjAN{|{hrSiQqvBUK!~c+UrtPMt$@Bj44p?i1 zET11GEBL8aP30qg%4uNSO>n?M96ldu}DgTl4heD_4zw<52aZYymjOQ)BdCNLDyB#I(O?E|0#J$^M?se@S+|?kTrNet3F?01K+#Y>!-MT}%<82-s z2jdymfB(RP6NQK0Kk&B5+kyY`fFlu~Ua;bZ`hO(l5?QfA{%1K^aYN0Qoh5&MbmqFi zv*D2#JD>Zq9BbrA-V5HJ^aqCL5!bsnz-NN$b zVu!pf)&*I1$n;?l`s79OcrDkwQ(t{0PJKb2IWT=X`jyV-&H$^{ku@%|d?2&0o)sQc zycjGWs8ibqyALi;KH&YkeBk@{Be{M)4LSQ-?c%O7bdWQ+S4?uRjVIyu=;3vc#~HGj z*a+5rQrazjX}8&m_y zPCGoSh-b|nEPde|vYhgktbXwh&iiG}$FqO(&Ce&fFWdpbX2dEOF+|NoNTt8p#hxDh zVt+@?$!(E`@(21;V9oF6&jc&q$XZ8alkua{`b09&yewqbHEBe zS@A;i+q1>am(K$$=gG=XGUsK{$nrTjeE!n;XsdZ7YaWB+3_#YLlJ!h&+vq292|x37 z;mLSV$rqd7*-`o;v#+I)Yh6&TzK|P=3C;u0S3IourA4a4Ow*?IkS>-X7U0s z^8r6Um>53~XxmfU)%oil9ppKB?}bmMdGX)<@}$9MKf~Z1?s3zZ6X%5Ew29ia^$f7) z0{M0v`YGQs<7Wr(mapY`75S#N%snx3?~`oH*dhiE;UvWIH(GKRo3*315`;e|pfPQ)ws+Zk z?9=$0$Ng{c?<{qC_}imx7ymC9{u?4U;n#hiY3JgXY0TI?_F;SI|MNGs-1WDz#Y@oU zUi*kWWJl~#`@DU@zFf60dUJL%%~5+N+I_wq#g8p_q45s1eLH-5xE;q2EpD)nwHu(( zw|m+?ez0&8Z2R@i7Mq9W7P}K^Fx76i3-LtA_}eM8^Z)CKjW%hU?SjJXgmXLLa{M_3U60;7Ir@)r{GZ@R zBBp1)_;1ipiua4!@NXWlQR_qFui?QKySCkBuk!iAzki2cb$qE^*Wyq7ueI$xh{Rp( F{{u2gN96zj literal 0 HcmV?d00001 diff --git a/go/mysql/icuregex/internal/icudata/word.brk b/go/mysql/icuregex/internal/icudata/word.brk new file mode 100644 index 0000000000000000000000000000000000000000..80460c6012812b2ac0702a64ffd82695b7c7ace3 GIT binary patch literal 22232 zcmeHPdvILUc|W^%ckgOfTD>ewtC!a=d8PGB*v3>ILqUyW@Dm)H48nF6X^~aSl98nv z*Es1+pi`P@OKa#K&9nn;rg$iA$b%-+4wFoQU^g{!#*=1@At5Chm=+p_Ev9WINx$zq z=iKwS_wKIj;UAiFrF+i#zTfve-#K^pTH{C9D=QW-Y*BX0%;8)DMzIKEx#8)fCuSzb z56$NC!~MDI)(&2uD^88&CX2JBnW5a?sfp3)vC_)ity80`i7`EUs5EnIb#D8_Xld$L zDc6?+;E`jw>3ef?$4W!FL$k9-ht{k)e*F0AIWD|K_wiRv-*==oY z3Fa57snu>*{c#DD#SJ%Nk_V_TUXGO7cYy zg_TWZU6y5TmPlHC;5@L#R6azVM(sLQM;0j8Y4e?Q-f6N zy;SScg5EDm!CAzAfNaIW7({zrYMsm0Gxs{-hZl?74N-eHc(F(XTawy9`vuRdz*>(& z71HX5?uIDr2HH`fyCDj@!PR$zkq@%m$F8US(ii53s?(ra4_sT@ind?G3FAXVVMn>N zqrwot+(>&4wjG^PwhnF2g~bwuUFp)U3_}1Do-nX);RjlT`F2Cp+zk;t8AX$oY}cvb zDb=Z2g;pg8)|j**dY47*Dm1K+j`gWuqdHwrLYk+t3ZvDl5bA>}s88sytgw6()`eCs zbX}s5Q_8x`sigyAF+FVsTYO*eXubW#r{Q;eeml`q2G&MCh zwS)`$IO>@6u_U2puRs9&(4JdTgXohC9-8PWJRMVfFZ&Rv10SCJ$6{l&ES0XTgH^VY zV3e`SI&3Sj4}+UO3QTAX>k_ZY4>HR0(TFLj)^PO$Fpx2s(-4FuBr0g##LOq6ES?@( z`6*{Zdjy^czs^pwe`ICj>WYj+4oChb@^S>}_0clxj_!y)82u-x(Tz@-9dgb%uQ?w$ z8)L^~UyJ=)tTVnZ{%HJSyg9Ku@rQ}?iL|@Teb{};jVC{r{A}|1WTfuqy3f>oqwbe= zH>N(FdM@?z)aLZ5^w-nxr#IBk)qkb_oqA<|QNzB5M;o4Rc&j0w*_wIJ@C#ADmpPkx zE0b?L)Oe=x)yC$g!KV9~o@n|}Q&02$=F`m=n}5-KW6P1bCH|7DTkELC;Q= z;Ou;LY4@^UT_08kZdi8TGJe|r>aKS0mzKTI`=e!xmlv1cw|u=HFQjZ}q+1S6HzywLW!w>S*c_xc)r#52;sDKmJgzSo&MAU%t%_uh_wMFkCOW zqj-OmXo~(?%v{rxE6h2{?*xr_J?VS_&^UT3R%SUqqs%Aj#@`>p@9Tya(%Vad%IAo~OY|!SU1{I$5#{8SPPBi0 zy3F=QF#GZBJw51tHvdNa9Q%{}!|q?^pUFS2`M@O*S!Vkp z-^^biulal=@_IfvMy~nY{1;-ZpW8>uNTi{k<8ih-k^|4V{+m)g{R90&gq1$(J>qwP zr#21ggYKEwZTt6x4%%180M*nwcbbI87nqP7ib?xT( z97u1EoJ&9HpvO1C>~!>O8drl~A3!yg*-!g_0roQkP%X{912Q&=G5a!GJz!Gg>`C}8 z!7tQ#Etjf))Te90-J;L+anOF1^4r;*7qj%T)?V7U?5nQYG;n#W+U>CBx(uF9kBhx` zyyXW_RX4KBu=+Yi#cNU({fc`Rd{Y~V+&Iu0m6)9aAMZXeaBbJU732?54U|GEL3(lj zz*o8}UvG4C>_Y>8T)`T~aZeRnDj%5dqi`-=iajy##Jtw*nSqhWb71G3X3NRQXy(_; zswuPcVlTYNzCZAjftST>Ny9Acz21iR2EH4(SAGZT;JfT4;NdUoI1lxMHB@G}tMT`j z0}TbdaP>zbiDtaNm2OXVBtM8g>eWJZzt5$63WY*A4rAHLuC#0l#UNr=#H{WheJU&F z=(QL>lO%OpAy^k?@2oe=%OQJC>A(%wj1 zAU)^AxqQBT0gcYXeYs}g9F5P@dyOD}6wS0`&jwwEyt+|3t3Jcpqbu`x7jgwY{{~-I zLg>M0oBBVYdTnIT{I}t>d!d)dVdt*ad63>9+PlkwHFhO8wDa?V-))r6^N#@?+Z&sU zJr;X5_PyBqv4;4n_>TCI_#edoHhw<-PCS{&C)Ol(B|hcID-V=uWBy0lOrp#_pZFqR zzMc3F>w25!^{$Bynyqw)J=CZ-lE^9d_uZ%5Zvy>w;<02~5_ucwzBf6Sd@T8F@Y=_!N3A*A`2|zPdS!@;Pg+Tl>>G={NQl*x%D!NAU5#8aQ31*721($7`{8 zJmF2ef$Jo3Ot_SjcIv&>kRnhfv?06E31w$UJWlt@wc4a8lt&^MUtq- z4^gYg!VyIX88u&HBemcy#0xFOgLX0vFo6sZ*SZA~Elj{vi8;dM08kAirN;#G0J!HS z$C}9)=ao3{nSNvqwS+f>AU6Xw=Ofm zlaT_#DBv)bWBfmEg=y!SbV>*NeNV?8PL#PD76d%^;RVs84zY5?F-E|i3&kqJi&=!=YAWEDkB<0vIt29{PCM`|s`@hl@C7X~R|97m`G8hIxz zsmO7sWF!V)$~ZE)=`0{xJK(7uk!A<&Xr>x?YvXUgl$?Zb+}iuu7Z{$ zdDzL46T1!AGV?ezSKYJAl^8WA_4?LWF?b~jSqMaMKr5MqG*Bi`Ej4aVUwQOLwT^kV zY)40yH_{=b1u3hSx3W=T)Y0L+`s9PRL}-b&<;;=Yc&ckjo-hN9;O{Y(q4b8ggHZh4kF3WuWJ`%2+O~-gnq};W`k4 zBkX911ZbQ@EDn?<#JO3ejiUpzrxC@;sj6S&g$`+n)P9lYYbahYOtEmZi8E(Xil}l@ zsSg$pm7ub!w(PdFjko!=&1FJmUz=3W%DLg9Z!vq_-5=h&sP)@nS~C>{49;l9X&UG{j=@=N#{(kXlhAeb!A9{j)o2ye>ONu`($Lmmkh-sR1K&S`E{`zD20leiHd9uu)0f$Xm7A;ndQ&Yp$stcFnZhOAN zphm|mn3s~w0yV5SvJ_O9>ONhid#D|Qq?^}K*pkL4S94dc&sUl|C8VHMMReF{L!nY8x22K#|VSR)G~N)2VwAj;f8L>#96< zk=@zW6^iPTjiC^J%TD6ef|$XA`%20=jo6dq)uKfN7Jb(s(Gl1 zLxO7-^ML$J<9DS?EkCE}C_d&2fQ#QFv@hPC;(&qFqGeg2B!_zCh978X1t1C-wCa;k zTVTzOn_d`buX+`qIOaaBWZE#Q)*eJW_pyeRex3yLg@rMLcOS;QhKctco3Qv=JM^Cv z2%!Z;{?l@3TaEMOBB@?1pb(cylD($YHs>!mRwXpVFN7wQnjrI05Y zEe(yWlM-xrUrs;}y_f2^0kQ;=pi-T&JZSC@j>#svtNr^uk-1MGtL$)h(H4xzC5k|d zNE6yr#>fLwt|y!0Jq<<@NTG_ucvJ2e@4`(>^rF5)h9#DS)4FJ)!&%B2_ZO>)kH}{L$)o@kMTAvf7Qv*G zS?F;c2mU`EqbU}C0n?k}zzFxHBydOY4isUU+@>kGLKDp=BJf}_U=+j~Z=&dc1mcn; zd0O%CG7_p$IX4L-Y1=L}Zbe8?zR)oGA=P7#Q;ba9Xig3yg>&IfAPy%PAxJ<;KsFHQ z-PZ!+^_)IKTk!qPYd9CvcB}PxAU=>dm}v@{Wc&=9{C{Au9~1MlqJIR4_7=QY+AOpIJJ3X2<}7sH!T~!WAn(} zM^D~4Ju`OjmYGuV@aEw=P62)U7Ms3(3(|L%j!#ZZm29RUBjRKC&z7dfYyhX8T7}jD zbl07?*$5z^_#LJ3iRr20hcinBJBQ<1-|I9ohioGRJ~ zNk`1q(mgY!;|I5wW+7oV&|o6^=3ty4BVy0Q)c9oS;O)2!wmO}M*x#6*yC)2*^ANjZ zVr=K!5ioBn^%w#b>>;2yL+J`ugnTemnwc220fLMOzO=y(={&^Vxu-O~=V)=%mky7i zG(zR>o*3mZay^1FJw8)BdT3%4p<5?risQ7bLvn~^ed}0BqR4Y?j#9(ZQ?oPElf?Qs zDk~i+9UPvVI|l16w*iNcjpEe!+~l4UlaobQA`qG@j;>w1>h8I*jm34O?a0x|(*3Bk zbU*kzv>kSc*i6&JIw*MP+BWbOet&af^9XJRF9NSZ0Rxpki@RXv#PDs~0O!e|gyxF@ z1>7(cCp}!mC+VhEj3oimLp)0 zFQV9~cm(XKogD%k3|S z>$wh5R0dHiA)TI=dBm(fU>+g;99R*usUlY@@V4v&YV$u}oG-Q53eSe2ssc1) for quick FCC boundary-after tests. + deltaTccc0 = 0 + deltaTccc1 = 2 + deltaTcccGt1 = 4 + deltaTcccMask = 6 + deltaShift = 3 + + maxDelta = 0x40 +) + +const ( + jamoLBase rune = 0x1100 /* "lead" jamo */ + jamoLEnd rune = 0x1112 + jamoVBase rune = 0x1161 /* "vowel" jamo */ + jamoVEnd rune = 0x1175 + jamoTBase rune = 0x11a7 /* "trail" jamo */ + jamoTEnd rune = 0x11c2 + + hangulBase rune = 0xac00 + hangulEnd rune = 0xd7a3 + + jamoLCount rune = 19 + jamoVCount rune = 21 + jamoTCount rune = 28 + + hangulCount = jamoLCount * jamoVCount * jamoTCount + hangulLimit = hangulBase + hangulCount +) + +const ( + mappingHasCccLcccWord = 0x80 + mappingHasRawMapping = 0x40 + // unused bit 0x20, + mappingLengthMask = 0x1f +) + +/** + * Constants for normalization modes. + * @deprecated ICU 56 Use unorm2.h instead. + */ +type Mode int32 + +const ( + /** No decomposition/composition. @deprecated ICU 56 Use unorm2.h instead. */ + NormNone Mode = 1 + /** Canonical decomposition. @deprecated ICU 56 Use unorm2.h instead. */ + NormNfd Mode = 2 + /** Compatibility decomposition. @deprecated ICU 56 Use unorm2.h instead. */ + NormNfkd Mode = 3 + /** Canonical decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */ + NormNfc Mode = 4 + /** Default normalization. @deprecated ICU 56 Use unorm2.h instead. */ + NormDefault Mode = NormNfc + /** Compatibility decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */ + NormNfkc Mode = 5 + /** "Fast C or D" form. @deprecated ICU 56 Use unorm2.h instead. */ + NormFcd Mode = 6 +) + +/** + * Result values for normalization quick check functions. + * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms + * @stable ICU 2.0 + */ +type CheckResult int + +const ( + /** + * The input string is not in the normalization form. + * @stable ICU 2.0 + */ + No CheckResult = iota + /** + * The input string is in the normalization form. + * @stable ICU 2.0 + */ + Yes + /** + * The input string may or may not be in the normalization form. + * This value is only returned for composition forms like NFC and FCC, + * when a backward-combining character is found for which the surrounding text + * would have to be analyzed further. + * @stable ICU 2.0 + */ + Maybe +) diff --git a/go/mysql/icuregex/internal/normalizer/normalizer.go b/go/mysql/icuregex/internal/normalizer/normalizer.go new file mode 100644 index 00000000000..c13a4878deb --- /dev/null +++ b/go/mysql/icuregex/internal/normalizer/normalizer.go @@ -0,0 +1,482 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package normalizer + +import ( + "errors" + "sync" + + "vitess.io/vitess/go/mysql/icuregex/internal/icudata" + "vitess.io/vitess/go/mysql/icuregex/internal/udata" + "vitess.io/vitess/go/mysql/icuregex/internal/uset" + "vitess.io/vitess/go/mysql/icuregex/internal/utf16" + "vitess.io/vitess/go/mysql/icuregex/internal/utrie" +) + +type Normalizer struct { + minDecompNoCP rune + minCompNoMaybeCP rune + minLcccCP rune + + // Norm16 value thresholds for quick check combinations and types of extra data. + minYesNo uint16 + minYesNoMappingsOnly uint16 + minNoNo uint16 + minNoNoCompBoundaryBefore uint16 + minNoNoCompNoMaybeCC uint16 + minNoNoEmpty uint16 + limitNoNo uint16 + centerNoNoDelta uint16 + minMaybeYes uint16 + + normTrie *utrie.UcpTrie + + maybeYesCompositions []uint16 + extraData []uint16 // mappings and/or compositions for yesYes, yesNo & noNo characters + smallFCD []uint8 // [0x100] one bit per 32 BMP code points, set if any FCD!=0 +} + +var nfc *Normalizer +var nfkc *Normalizer + +var normalizerOnce sync.Once + +func loadNormalizer() { + normalizerOnce.Do(func() { + nfc = &Normalizer{} + if err := nfc.load(icudata.Nfc); err != nil { + panic(err) + } + + nfkc = &Normalizer{} + if err := nfkc.load(icudata.Nfkc); err != nil { + panic(err) + } + }) +} + +const ixNormTrieOffset = 0 +const ixExtraDataOffset = 1 +const ixSmallFcdOffset = 2 +const ixReserved3Offset = 3 +const ixTotalSize = 7 + +const ixMinDecompNoCp = 8 +const ixMinCompNoMaybeCp = 9 + +/** Mappings & compositions in [minYesNo..minYesNoMappingsOnly[. */ +const ixMinYesNo = 10 + +/** Mappings are comp-normalized. */ +const ixMinNoNo = 11 +const ixLimitNoNo = 12 +const ixMinMaybeYes = 13 + +/** Mappings only in [minYesNoMappingsOnly..minNoNo[. */ +const ixMinYesNoMappingsOnly = 14 + +/** Mappings are not comp-normalized but have a comp boundary before. */ +const ixMinNoNoCompBoundaryBefore = 15 + +/** Mappings do not have a comp boundary before. */ +const ixMinNoNoCompNoMaybeCc = 16 + +/** Mappings to the empty string. */ +const ixMinNoNoEmpty = 17 + +const ixMinLcccCp = 18 +const ixCount = 20 + +func (n *Normalizer) load(data []byte) error { + bytes := udata.NewBytes(data) + + err := bytes.ReadHeader(func(info *udata.DataInfo) bool { + return info.Size >= 20 && + info.IsBigEndian == 0 && + info.CharsetFamily == 0 && + info.DataFormat[0] == 0x4e && /* dataFormat="unam" */ + info.DataFormat[1] == 0x72 && + info.DataFormat[2] == 0x6d && + info.DataFormat[3] == 0x32 && + info.FormatVersion[0] == 4 + }) + if err != nil { + return err + } + + indexesLength := int32(bytes.Uint32()) / 4 + if indexesLength <= ixMinLcccCp { + return errors.New("normalizer2 data: not enough indexes") + } + indexes := make([]int32, indexesLength) + indexes[0] = indexesLength * 4 + for i := int32(1); i < indexesLength; i++ { + indexes[i] = bytes.Int32() + } + + n.minDecompNoCP = indexes[ixMinDecompNoCp] + n.minCompNoMaybeCP = indexes[ixMinCompNoMaybeCp] + n.minLcccCP = indexes[ixMinLcccCp] + + n.minYesNo = uint16(indexes[ixMinYesNo]) + n.minYesNoMappingsOnly = uint16(indexes[ixMinYesNoMappingsOnly]) + n.minNoNo = uint16(indexes[ixMinNoNo]) + n.minNoNoCompBoundaryBefore = uint16(indexes[ixMinNoNoCompBoundaryBefore]) + n.minNoNoCompNoMaybeCC = uint16(indexes[ixMinNoNoCompNoMaybeCc]) + n.minNoNoEmpty = uint16(indexes[ixMinNoNoEmpty]) + n.limitNoNo = uint16(indexes[ixLimitNoNo]) + n.minMaybeYes = uint16(indexes[ixMinMaybeYes]) + + n.centerNoNoDelta = uint16(indexes[ixMinMaybeYes]>>deltaShift) - maxDelta - 1 + + offset := indexes[ixNormTrieOffset] + nextOffset := indexes[ixExtraDataOffset] + triePosition := bytes.Position() + + n.normTrie, err = utrie.UcpTrieFromBytes(bytes) + if err != nil { + return err + } + + trieLength := bytes.Position() - triePosition + if trieLength > nextOffset-offset { + return errors.New("normalizer2 data: not enough bytes for normTrie") + } + bytes.Skip((nextOffset - offset) - trieLength) // skip padding after trie bytes + + // Read the composition and mapping data. + offset = nextOffset + nextOffset = indexes[ixSmallFcdOffset] + numChars := (nextOffset - offset) / 2 + if numChars != 0 { + n.maybeYesCompositions = bytes.Uint16Slice(numChars) + n.extraData = n.maybeYesCompositions[((minNormalMaybeYes - n.minMaybeYes) >> offsetShift):] + } + + // smallFCD: new in formatVersion 2 + n.smallFCD = bytes.Uint8Slice(0x100) + return nil +} + +func Nfc() *Normalizer { + loadNormalizer() + return nfc +} + +func Nfkc() *Normalizer { + loadNormalizer() + return nfkc +} + +func (n *Normalizer) AddPropertyStarts(u *uset.UnicodeSet) { + var start, end rune + var value uint32 + for { + end, value = nfc.normTrie.GetRange(start, utrie.UcpMapRangeFixedLeadSurrogates, inert, nil) + if end < 0 { + break + } + u.AddRune(start) + if start != end && n.isAlgorithmicNoNo(uint16(value)) && (value&deltaTcccMask) > deltaTccc1 { + // Range of code points with same-norm16-value algorithmic decompositions. + // They might have different non-zero FCD16 values. + prevFCD16 := n.GetFCD16(start) + for { + start++ + if start > end { + break + } + fcd16 := n.GetFCD16(start) + if fcd16 != prevFCD16 { + u.AddRune(start) + prevFCD16 = fcd16 + } + } + } + start = end + 1 + } + + // add Hangul LV syllables and LV+1 because of skippables + for c := hangulBase; c < hangulLimit; c += jamoTCount { + u.AddRune(c) + u.AddRune(c + 1) + } + u.AddRune(hangulLimit) +} + +func (n *Normalizer) isAlgorithmicNoNo(norm16 uint16) bool { + return n.limitNoNo <= norm16 && norm16 < n.minMaybeYes +} + +func (n *Normalizer) GetFCD16(c rune) uint16 { + if c < n.minDecompNoCP { + return 0 + } else if c <= 0xffff { + if !n.singleLeadMightHaveNonZeroFCD16(c) { + return 0 + } + } + return n.getFCD16FromNormData(c) +} + +func (n *Normalizer) singleLeadMightHaveNonZeroFCD16(lead rune) bool { + // 0<=lead<=0xffff + bits := n.smallFCD[lead>>8] + if bits == 0 { + return false + } + return ((bits >> ((lead >> 5) & 7)) & 1) != 0 +} + +func (n *Normalizer) getFCD16FromNormData(c rune) uint16 { + norm16 := n.getNorm16(c) + if norm16 >= n.limitNoNo { + if norm16 >= minNormalMaybeYes { + // combining mark + norm16 = uint16(n.getCCFromNormalYesOrMaybe(norm16)) + return norm16 | (norm16 << 8) + } else if norm16 >= n.minMaybeYes { + return 0 + } else { // isDecompNoAlgorithmic(norm16) + deltaTrailCC := norm16 & deltaTcccMask + if deltaTrailCC <= deltaTccc1 { + return deltaTrailCC >> offsetShift + } + // Maps to an isCompYesAndZeroCC. + c = n.mapAlgorithmic(c, norm16) + norm16 = n.getRawNorm16(c) + } + } + + if norm16 <= n.minYesNo || n.isHangulLVT(norm16) { + // no decomposition or Hangul syllable, all zeros + return 0 + } + // c decomposes, get everything from the variable-length extra data + mapping := n.getMapping(norm16) + firstUnit := mapping[1] + if firstUnit&mappingHasCccLcccWord != 0 { + norm16 |= mapping[0] & 0xff00 + } + return norm16 +} + +func (n *Normalizer) getMapping(norm16 uint16) []uint16 { + return n.extraData[(norm16>>offsetShift)-1:] +} + +func (n *Normalizer) getNorm16(c rune) uint16 { + if utf16.IsLead(c) { + return inert + } + return n.getRawNorm16(c) +} + +func (n *Normalizer) getRawNorm16(c rune) uint16 { + return uint16(n.normTrie.Get(c)) +} + +func (n *Normalizer) getCCFromNormalYesOrMaybe(norm16 uint16) uint8 { + return uint8(norm16 >> offsetShift) +} + +func (n *Normalizer) mapAlgorithmic(c rune, norm16 uint16) rune { + return c + rune(norm16>>deltaShift) - rune(n.centerNoNoDelta) +} + +func (n *Normalizer) isHangulLV(norm16 uint16) bool { + return norm16 == n.minYesNo +} + +func (n *Normalizer) isHangulLVT(norm16 uint16) bool { + return norm16 == n.hangulLVT() +} + +func (n *Normalizer) hangulLVT() uint16 { + return n.minYesNoMappingsOnly | hasCompBoundaryAfter +} + +func (n *Normalizer) getComposeQuickCheck(c rune) CheckResult { + return n.getCompQuickCheck(n.getNorm16(c)) +} + +func (n *Normalizer) getDecomposeQuickCheck(c rune) CheckResult { + if n.isDecompYes(n.getNorm16(c)) { + return Yes + } + return No +} + +func QuickCheck(c rune, mode Mode) CheckResult { + if mode <= NormNone || NormFcd <= mode { + return Yes + } + switch mode { + case NormNfc: + return Nfc().getComposeQuickCheck(c) + case NormNfd: + return Nfc().getDecomposeQuickCheck(c) + case NormNfkc: + return Nfkc().getComposeQuickCheck(c) + case NormNfkd: + return Nfkc().getDecomposeQuickCheck(c) + default: + return Maybe + } +} + +func IsInert(c rune, mode Mode) bool { + switch mode { + case NormNfc: + return Nfc().isCompInert(c) + case NormNfd: + return Nfc().isDecompInert(c) + case NormNfkc: + return Nfkc().isCompInert(c) + case NormNfkd: + return Nfkc().isDecompInert(c) + default: + return true + } +} + +func (n *Normalizer) isDecompYes(norm16 uint16) bool { + return norm16 < n.minYesNo || n.minMaybeYes <= norm16 +} + +func (n *Normalizer) getCompQuickCheck(norm16 uint16) CheckResult { + if norm16 < n.minNoNo || minYesYesWithCC <= norm16 { + return Yes + } else if n.minMaybeYes <= norm16 { + return Maybe + } else { + return No + } +} + +func (n *Normalizer) isMaybeOrNonZeroCC(norm16 uint16) bool { + return norm16 >= n.minMaybeYes +} + +func (n *Normalizer) isDecompNoAlgorithmic(norm16 uint16) bool { + return norm16 >= n.limitNoNo +} + +func (n *Normalizer) IsCompNo(c rune) bool { + norm16 := n.getNorm16(c) + return n.minNoNo <= norm16 && norm16 < n.minMaybeYes +} + +func (n *Normalizer) Decompose(c rune) []rune { + norm16 := n.getNorm16(c) + if c < n.minDecompNoCP || n.isMaybeOrNonZeroCC(norm16) { + // c does not decompose + return nil + } + var decomp []rune + + if n.isDecompNoAlgorithmic(norm16) { + // Maps to an isCompYesAndZeroCC. + c = n.mapAlgorithmic(c, norm16) + decomp = append(decomp, c) + // The mapping might decompose further. + norm16 = n.getRawNorm16(c) + } + if norm16 < n.minYesNo { + return decomp + } else if n.isHangulLV(norm16) || n.isHangulLVT(norm16) { + // Hangul syllable: decompose algorithmically + parts := hangulDecompose(c) + for len(parts) > 0 { + c = rune(parts[0]) + decomp = append(decomp, c) + parts = parts[1:] + } + return decomp + } + // c decomposes, get everything from the variable-length extra data + mapping := n.getMapping(norm16) + length := mapping[1] & mappingLengthMask + mapping = mapping[2 : 2+length] + + for len(mapping) > 0 { + c, mapping = utf16.NextUnsafe(mapping) + decomp = append(decomp, c) + } + + return decomp +} + +func hangulDecompose(c rune) []uint16 { + c -= hangulBase + c2 := c % jamoTCount + c /= jamoTCount + var buffer []uint16 + buffer = append(buffer, uint16(jamoLBase+c/jamoVCount)) + buffer = append(buffer, uint16(jamoVBase+c%jamoVCount)) + if c2 != 0 { + buffer = append(buffer, uint16(jamoTBase+c2)) + } + return buffer +} + +func (n *Normalizer) isCompInert(c rune) bool { + norm16 := n.getNorm16(c) + return n.isCompYesAndZeroCC(norm16) && (norm16&hasCompBoundaryAfter) != 0 +} + +func (n *Normalizer) isDecompInert(c rune) bool { + return n.isDecompYesAndZeroCC(n.getNorm16(c)) +} + +func (n *Normalizer) isCompYesAndZeroCC(norm16 uint16) bool { + return norm16 < n.minNoNo +} + +func (n *Normalizer) isDecompYesAndZeroCC(norm16 uint16) bool { + return norm16 < n.minYesNo || + norm16 == jamoVt || + (n.minMaybeYes <= norm16 && norm16 <= minNormalMaybeYes) +} + +func (n *Normalizer) CombiningClass(c rune) uint8 { + return n.getCC(n.getNorm16(c)) +} + +func (n *Normalizer) getCC(norm16 uint16) uint8 { + if norm16 >= minNormalMaybeYes { + return n.getCCFromNormalYesOrMaybe(norm16) + } + if norm16 < n.minNoNo || n.limitNoNo <= norm16 { + return 0 + } + return n.getCCFromNoNo(norm16) + +} + +func (n *Normalizer) getCCFromNoNo(norm16 uint16) uint8 { + mapping := n.getMapping(norm16) + if mapping[1]&mappingHasCccLcccWord != 0 { + return uint8(mapping[0]) + } + return 0 +} diff --git a/go/mysql/icuregex/internal/pattern/unescape.go b/go/mysql/icuregex/internal/pattern/unescape.go new file mode 100644 index 00000000000..e4a554ff612 --- /dev/null +++ b/go/mysql/icuregex/internal/pattern/unescape.go @@ -0,0 +1,314 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pattern + +import ( + "strings" + "unicode/utf8" + + "vitess.io/vitess/go/mysql/icuregex/internal/utf16" +) + +/* Convert one octal digit to a numeric value 0..7, or -1 on failure */ +func _digit8(c rune) rune { + if c >= 0x0030 && c <= 0x0037 { + return (c - 0x0030) + } + return -1 +} + +/* Convert one hex digit to a numeric value 0..F, or -1 on failure */ +func _digit16(c rune) rune { + if c >= 0x0030 && c <= 0x0039 { + return (c - 0x0030) + } + if c >= 0x0041 && c <= 0x0046 { + return (c - (0x0041 - 10)) + } + if c >= 0x0061 && c <= 0x0066 { + return (c - (0x0061 - 10)) + } + return -1 +} + +var unscapeMap = []byte{ + /*" 0x22, 0x22 */ + /*' 0x27, 0x27 */ + /*? 0x3F, 0x3F */ + /*\ 0x5C, 0x5C */ + /*a*/ 0x61, 0x07, + /*b*/ 0x62, 0x08, + /*e*/ 0x65, 0x1b, + /*f*/ 0x66, 0x0c, + /*n*/ 0x6E, 0x0a, + /*r*/ 0x72, 0x0d, + /*t*/ 0x74, 0x09, + /*v*/ 0x76, 0x0b, +} + +func Unescape(str string) (string, bool) { + var idx int + if idx = strings.IndexByte(str, '\\'); idx < 0 { + return str, true + } + + var result strings.Builder + result.WriteString(str[:idx]) + str = str[idx:] + + for len(str) > 0 { + if str[0] == '\\' { + var r rune + r, str = UnescapeAt(str[1:]) + if r < 0 { + return "", false + } + result.WriteRune(r) + } else { + result.WriteByte(str[0]) + str = str[1:] + } + } + return result.String(), true +} + +func UnescapeAt(str string) (rune, string) { + c, w := utf8.DecodeRuneInString(str) + str = str[w:] + if c == utf8.RuneError && (w == 0 || w == 1) { + return -1, str + } + + var minDig, maxDig, n int + var braces bool + var bitsPerDigit = 4 + var result rune + + switch c { + case 'u': + minDig = 4 + maxDig = 4 + case 'U': + minDig = 8 + maxDig = 8 + case 'x': + minDig = 1 + if len(str) > 0 && str[0] == '{' { + str = str[1:] + braces = true + maxDig = 8 + } else { + maxDig = 2 + } + default: + if dig := _digit8(c); dig >= 0 { + minDig = 1 + maxDig = 4 + n = 1 + bitsPerDigit = 3 + result = dig + } + } + + if minDig != 0 { + for n < maxDig && len(str) > 0 { + c, w = utf8.DecodeRuneInString(str) + if c == utf8.RuneError && w == 1 { + return -1, str + } + + var dig rune + if bitsPerDigit == 3 { + dig = _digit8(c) + } else { + dig = _digit16(c) + } + if dig < 0 { + break + } + result = (result << bitsPerDigit) | dig + str = str[w:] + n++ + } + if n < minDig { + return -1, str + } + if braces { + if c != '}' { + return -1, str + } + str = str[1:] + } + if result < 0 || result > utf8.MaxRune { + return -1, str + } + if len(str) > 0 && utf16.IsLead(result) { + c, w = utf8.DecodeRuneInString(str) + if c == utf8.RuneError && (w == 0 || w == 1) { + return -1, str + } + if c == '\\' { + var str2 string + c, str2 = UnescapeAt(str[1:]) + if utf16.IsTrail(c) { + result = utf16.DecodeRune(result, c) + str = str2 + } + } + } + return result, str + } + + if c < utf8.RuneSelf { + for i := 0; i < len(unscapeMap); i += 2 { + if byte(c) == unscapeMap[i] { + return rune(unscapeMap[i+1]), str + } + if byte(c) < unscapeMap[i] { + break + } + } + } + + if c == 'c' && len(str) > 0 { + c, w = utf8.DecodeRuneInString(str) + if c == utf8.RuneError && (w == 0 || w == 1) { + return -1, str + } + return 0x1f & c, str[w:] + } + + return c, str +} + +func UnescapeAtRunes(str []rune) (rune, []rune) { + if len(str) == 0 { + return -1, str + } + + c := str[0] + str = str[1:] + if c == utf8.RuneError { + return -1, str + } + + var minDig, maxDig, n int + var braces bool + var bitsPerDigit = 4 + var result rune + + switch c { + case 'u': + minDig = 4 + maxDig = 4 + case 'U': + minDig = 8 + maxDig = 8 + case 'x': + minDig = 1 + if len(str) > 0 && str[0] == '{' { + str = str[1:] + braces = true + maxDig = 8 + } else { + maxDig = 2 + } + default: + if dig := _digit8(c); dig >= 0 { + minDig = 1 + maxDig = 4 + n = 1 + bitsPerDigit = 3 + result = dig + } + } + + if minDig != 0 { + for n < maxDig && len(str) > 0 { + c = str[0] + if c == utf8.RuneError { + return -1, str + } + + var dig rune + if bitsPerDigit == 3 { + dig = _digit8(c) + } else { + dig = _digit16(c) + } + if dig < 0 { + break + } + result = (result << bitsPerDigit) | dig + str = str[1:] + n++ + } + if n < minDig { + return -1, str + } + if braces { + if c != '}' { + return -1, str + } + str = str[1:] + } + if result < 0 || result > utf8.MaxRune { + return -1, str + } + if len(str) > 0 && utf16.IsLead(result) { + c = str[0] + if c == utf8.RuneError { + return -1, str + } + if c == '\\' { + var str2 []rune + c, str2 = UnescapeAtRunes(str[1:]) + if utf16.IsTrail(c) { + result = utf16.DecodeRune(result, c) + str = str2 + } + } + } + return result, str + } + + if c < utf8.RuneSelf { + for i := 0; i < len(unscapeMap); i += 2 { + if byte(c) == unscapeMap[i] { + return rune(unscapeMap[i+1]), str + } + if byte(c) < unscapeMap[i] { + break + } + } + } + + if c == 'c' && len(str) > 0 { + c = str[0] + if c == utf8.RuneError { + return -1, str + } + return 0x1f & c, str[1:] + } + + return c, str +} diff --git a/go/mysql/icuregex/internal/pattern/unescape_test.go b/go/mysql/icuregex/internal/pattern/unescape_test.go new file mode 100644 index 00000000000..0bb76c2bfdb --- /dev/null +++ b/go/mysql/icuregex/internal/pattern/unescape_test.go @@ -0,0 +1,48 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pattern + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestUnescapeAt(t *testing.T) { + r, str := UnescapeAt("ud800\\ud800\\udc00") + assert.Equal(t, rune(0xd800), r) + assert.Equal(t, "\\ud800\\udc00", str) + + r, str = UnescapeAt(str[1:]) + assert.Equal(t, rune(0x00010000), r) + assert.Equal(t, "", str) +} + +func TestUnescapeAtRunes(t *testing.T) { + r, str := UnescapeAtRunes([]rune("ud800\\ud800\\udc00")) + assert.Equal(t, rune(0xd800), r) + assert.Equal(t, []rune("\\ud800\\udc00"), str) + + r, str = UnescapeAtRunes(str[1:]) + assert.Equal(t, rune(0x00010000), r) + assert.Equal(t, []rune(""), str) +} diff --git a/go/mysql/icuregex/internal/pattern/utils.go b/go/mysql/icuregex/internal/pattern/utils.go new file mode 100644 index 00000000000..4dcf55e9f42 --- /dev/null +++ b/go/mysql/icuregex/internal/pattern/utils.go @@ -0,0 +1,111 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package pattern + +import ( + "strings" + "unicode/utf8" +) + +var patternPropsLatin1 = [256]uint8{ + // WS: 9..D + 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // WS: 20 Syntax: 21..2F + 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + // Syntax: 3A..40 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: 5B..5E + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, + // Syntax: 60 + 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: 7B..7E + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, + // WS: 85 + 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: A1..A7, A9, AB, AC, AE + 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0, + // Syntax: B0, B1, B6, BB, BF + 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: D7 + 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // Syntax: F7 + 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, +} + +func IsWhitespace(c rune) bool { + if c < 0 { + return false + } else if c <= 0xff { + return (patternPropsLatin1[c]>>2)&1 != 0 + } else if 0x200e <= c && c <= 0x2029 { + return c <= 0x200f || 0x2028 <= c + } else { + return false + } +} + +func SkipWhitespace(str string) string { + for { + r, w := utf8.DecodeRuneInString(str) + if r == utf8.RuneError && (w == 0 || w == 1) { + return str[w:] + } + if !IsWhitespace(r) { + return str + } + str = str[w:] + } +} + +func IsUnprintable(c rune) bool { + return !(c >= 0x20 && c <= 0x7E) +} + +// "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" +var digits = [...]byte{ + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, + 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, + 85, 86, 87, 88, 89, 90, +} + +func EscapeUnprintable(w *strings.Builder, c rune) { + w.WriteByte('\\') + if (c & ^0xFFFF) != 0 { + w.WriteByte('U') + w.WriteByte(digits[0xF&(c>>28)]) + w.WriteByte(digits[0xF&(c>>24)]) + w.WriteByte(digits[0xF&(c>>20)]) + w.WriteByte(digits[0xF&(c>>16)]) + } else { + w.WriteByte('u') + } + w.WriteByte(digits[0xF&(c>>12)]) + w.WriteByte(digits[0xF&(c>>8)]) + w.WriteByte(digits[0xF&(c>>4)]) + w.WriteByte(digits[0xF&c]) +} diff --git a/go/mysql/icuregex/internal/ubidi/ubidi.go b/go/mysql/icuregex/internal/ubidi/ubidi.go new file mode 100644 index 00000000000..195e2b1a6dd --- /dev/null +++ b/go/mysql/icuregex/internal/ubidi/ubidi.go @@ -0,0 +1,461 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ubidi + +import ( + "errors" + + "vitess.io/vitess/go/mysql/icuregex/internal/icudata" + "vitess.io/vitess/go/mysql/icuregex/internal/udata" + "vitess.io/vitess/go/mysql/icuregex/internal/utrie" +) + +const ( + ixIndexTop = iota + ixLength + ixTrieSize + ixMirrorLength + + ixJgStart + ixJgLimit + ixJgStart2 /* new in format version 2.2, ICU 54 */ + ixJgLimit2 + + maxValuesIndex + ixTop +) + +var ubidi struct { + indexes []int32 + trie *utrie.UTrie2 + mirrors []uint32 + jg []uint8 + jg2 []uint8 +} + +func readData(bytes *udata.Bytes) error { + err := bytes.ReadHeader(func(info *udata.DataInfo) bool { + return info.DataFormat[0] == 0x42 && + info.DataFormat[1] == 0x69 && + info.DataFormat[2] == 0x44 && + info.DataFormat[3] == 0x69 && + info.FormatVersion[0] == 2 + }) + if err != nil { + return err + } + + count := int32(bytes.Uint32()) + if count < ixTop { + return errors.New("indexes[0] too small in ucase.icu") + } + + ubidi.indexes = make([]int32, count) + ubidi.indexes[0] = count + + for i := int32(1); i < count; i++ { + ubidi.indexes[i] = int32(bytes.Uint32()) + } + + ubidi.trie, err = utrie.UTrie2FromBytes(bytes) + if err != nil { + return err + } + + expectedTrieLength := ubidi.indexes[ixTrieSize] + trieLength := ubidi.trie.SerializedLength() + + if trieLength > expectedTrieLength { + return errors.New("ucase.icu: not enough bytes for the trie") + } + + bytes.Skip(expectedTrieLength - trieLength) + + if n := ubidi.indexes[ixMirrorLength]; n > 0 { + ubidi.mirrors = bytes.Uint32Slice(n) + } + if n := ubidi.indexes[ixJgLimit] - ubidi.indexes[ixJgStart]; n > 0 { + ubidi.jg = bytes.Uint8Slice(n) + } + if n := ubidi.indexes[ixJgLimit2] - ubidi.indexes[ixJgStart2]; n > 0 { + ubidi.jg2 = bytes.Uint8Slice(n) + } + + return nil +} + +func init() { + b := udata.NewBytes(icudata.UBidi) + if err := readData(b); err != nil { + panic(err) + } +} + +const ( + /* UBIDI_CLASS_SHIFT=0, */ /* bidi class: 5 bits (4..0) */ + jtShift = 5 /* joining type: 3 bits (7..5) */ + + bptShift = 8 /* Bidi_Paired_Bracket_Type(bpt): 2 bits (9..8) */ + + joinControlShift = 10 + bidiControlShift = 11 + + isMirroredShift = 12 /* 'is mirrored' */ +) + +/** + * Bidi Paired Bracket Type constants. + * + * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE + * @stable ICU 52 + */ +type UPairedBracketType int32 + +/* + * Note: UBidiPairedBracketType constants are parsed by preparseucd.py. + * It matches lines like + * U_BPT_ + */ +const ( + /** Not a paired bracket. @stable ICU 52 */ + BptNone UPairedBracketType = iota + /** Open paired bracket. @stable ICU 52 */ + BptOpen + /** Close paired bracket. @stable ICU 52 */ + BptClose +) + +const classMask = 0x0000001f +const jtMask = 0x000000e0 +const bptMask = 0x00000300 + +/** + * Joining Type constants. + * + * @see UCHAR_JOINING_TYPE + * @stable ICU 2.2 + */ +type JoiningType int32 + +/* + * Note: UJoiningType constants are parsed by preparseucd.py. + * It matches lines like + * U_JT_ + */ +const ( + JtNonJoining JoiningType = iota /*[U]*/ + JtJoinCausing /*[C]*/ + JtDualJoining /*[D]*/ + JtLeftJoining /*[L]*/ + JtRightJoining /*[R]*/ + JtTransparent /*[T]*/ +) + +/** + * Joining Group constants. + * + * @see UCHAR_JOINING_GROUP + * @stable ICU 2.2 + */ +type JoiningGroup int32 + +/* + * Note: UJoiningGroup constants are parsed by preparseucd.py. + * It matches lines like + * U_JG_ + */ +const ( + JgNoJoiningGroup JoiningGroup = iota + JgAin + JgAlaph + JgAlef + JgBeh + JgBeth + JgDal + JgDalathRish + JgE + JgFeh + JgFinalSemkath + JgGaf + JgGamal + JgHah + JgTehMarbutaGoal /**< @stable ICU 4.6 */ + JgHe + JgHeh + JgHehGoal + JgHeth + JgKaf + JgKaph + JgKnottedHeh + JgLam + JgLamadh + JgMeem + JgMim + JgNoon + JgNun + JgPe + JgQaf + JgQaph + JgReh + JgReversedPe + JgSad + JgSadhe + JgSeen + JgSemkath + JgShin + JgSwashKaf + JgSyriacWaw + JgTah + JgTaw + JgTehMarbuta + JgTeth + JgWaw + JgYeh + JgYehBarree + JgYehWithTail + JgYudh + JgYudhHe + JgZain + JgFe /**< @stable ICU 2.6 */ + JgKhaph /**< @stable ICU 2.6 */ + JgZhain /**< @stable ICU 2.6 */ + JgBurushashkiYehBarree /**< @stable ICU 4.0 */ + JgFarsiYeh /**< @stable ICU 4.4 */ + JgNya /**< @stable ICU 4.4 */ + JgRohingyaYeh /**< @stable ICU 49 */ + JgManichaeanAleph /**< @stable ICU 54 */ + JgManichaeanAyin /**< @stable ICU 54 */ + JgManichaeanBeth /**< @stable ICU 54 */ + JgManichaeanDaleth /**< @stable ICU 54 */ + JgManichaeanDhamedh /**< @stable ICU 54 */ + JgManichaeanFive /**< @stable ICU 54 */ + JgManichaeanGimel /**< @stable ICU 54 */ + JgManichaeanHeth /**< @stable ICU 54 */ + JgManichaeanHundred /**< @stable ICU 54 */ + JgManichaeanKaph /**< @stable ICU 54 */ + JgManichaeanLamedh /**< @stable ICU 54 */ + JgManichaeanMem /**< @stable ICU 54 */ + JgManichaeanNun /**< @stable ICU 54 */ + JgManichaeanOne /**< @stable ICU 54 */ + JgManichaeanPe /**< @stable ICU 54 */ + JgManichaeanQoph /**< @stable ICU 54 */ + JgManichaeanResh /**< @stable ICU 54 */ + JgManichaeanSadhe /**< @stable ICU 54 */ + JgManichaeanSamekh /**< @stable ICU 54 */ + JgManichaeanTaw /**< @stable ICU 54 */ + JgManichaeanTen /**< @stable ICU 54 */ + JgManichaeanTeth /**< @stable ICU 54 */ + JgManichaeanThamedh /**< @stable ICU 54 */ + JgManichaeanTwenty /**< @stable ICU 54 */ + JgManichaeanWaw /**< @stable ICU 54 */ + JgManichaeanYodh /**< @stable ICU 54 */ + JgManichaeanZayin /**< @stable ICU 54 */ + JgStraightWaw /**< @stable ICU 54 */ + JgAfricanFeh /**< @stable ICU 58 */ + JgAfricanNoon /**< @stable ICU 58 */ + JgAfricanQaf /**< @stable ICU 58 */ + + JgMalayalamBha /**< @stable ICU 60 */ + JgMalayalamJa /**< @stable ICU 60 */ + JgMalayalamLla /**< @stable ICU 60 */ + JgMalayalamLlla /**< @stable ICU 60 */ + JgMalayalamNga /**< @stable ICU 60 */ + JgMalayalamNna /**< @stable ICU 60 */ + JgMalayalamNnna /**< @stable ICU 60 */ + JgMalayalamNya /**< @stable ICU 60 */ + JgMalayalamRa /**< @stable ICU 60 */ + JgMalayalamSsa /**< @stable ICU 60 */ + JgMalayalamTta /**< @stable ICU 60 */ + + JgHanafiRohingyaKinnaYa /**< @stable ICU 62 */ + JgHanafiRohingyaPa /**< @stable ICU 62 */ + + JgThinYeh /**< @stable ICU 70 */ + JgVerticalTail /**< @stable ICU 70 */ +) + +/** + * This specifies the language directional property of a character set. + * @stable ICU 2.0 + */ +type CharDirection int32 + +/* + * Note: UCharDirection constants and their API comments are parsed by preparseucd.py. + * It matches pairs of lines like + * / ** comment... * / + * U_<[A-Z_]+> = , + */ + +const ( + /** L @stable ICU 2.0 */ + LeftToRight CharDirection = 0 + /** R @stable ICU 2.0 */ + RightToLeft CharDirection = 1 + /** EN @stable ICU 2.0 */ + EuropeanNumber CharDirection = 2 + /** ES @stable ICU 2.0 */ + EuropeanNumberSeparator CharDirection = 3 + /** ET @stable ICU 2.0 */ + EuropeanNumberTerminator CharDirection = 4 + /** AN @stable ICU 2.0 */ + ArabicNumber CharDirection = 5 + /** CS @stable ICU 2.0 */ + CommonNumberSeparator CharDirection = 6 + /** B @stable ICU 2.0 */ + BlockSeparator CharDirection = 7 + /** S @stable ICU 2.0 */ + SegmentSeparator CharDirection = 8 + /** WS @stable ICU 2.0 */ + WhiteSpaceNeutral CharDirection = 9 + /** ON @stable ICU 2.0 */ + OtherNeutral CharDirection = 10 + /** LRE @stable ICU 2.0 */ + LeftToRightEmbedding CharDirection = 11 + /** LRO @stable ICU 2.0 */ + LeftToRightOverride CharDirection = 12 + /** AL @stable ICU 2.0 */ + RightToLeftArabic CharDirection = 13 + /** RLE @stable ICU 2.0 */ + RightToLeftEmbedding CharDirection = 14 + /** RLO @stable ICU 2.0 */ + RightToLeftOverride CharDirection = 15 + /** PDF @stable ICU 2.0 */ + PopDirectionalFormat CharDirection = 16 + /** NSM @stable ICU 2.0 */ + DirNonSpacingMark CharDirection = 17 + /** BN @stable ICU 2.0 */ + BoundaryNeutral CharDirection = 18 + /** FSI @stable ICU 52 */ + StrongIsolate CharDirection = 19 + /** LRI @stable ICU 52 */ + LeftToRightIsolate CharDirection = 20 + /** RLI @stable ICU 52 */ + RightToLeftIsolate CharDirection = 21 + /** PDI @stable ICU 52 */ + PopDirectionalIsolate CharDirection = 22 +) + +type propertySet interface { + AddRune(ch rune) + AddRuneRange(from rune, to rune) +} + +func AddPropertyStarts(sa propertySet) { + /* add the start code point of each same-value range of the trie */ + ubidi.trie.Enum(nil, func(start, _ rune, _ uint32) bool { + sa.AddRune(start) + return true + }) + + /* add the code points from the bidi mirroring table */ + length := ubidi.indexes[ixMirrorLength] + for i := int32(0); i < length; i++ { + c := mirrorCodePoint(rune(ubidi.mirrors[i])) + sa.AddRuneRange(c, c+1) + } + + /* add the code points from the Joining_Group array where the value changes */ + start := ubidi.indexes[ixJgStart] + limit := ubidi.indexes[ixJgLimit] + jgArray := ubidi.jg[:] + for { + prev := uint8(0) + for start < limit { + jg := jgArray[0] + jgArray = jgArray[1:] + if jg != prev { + sa.AddRune(start) + prev = jg + } + start++ + } + if prev != 0 { + /* add the limit code point if the last value was not 0 (it is now start==limit) */ + sa.AddRune(limit) + } + if limit == ubidi.indexes[ixJgLimit] { + /* switch to the second Joining_Group range */ + start = ubidi.indexes[ixJgStart2] + limit = ubidi.indexes[ixJgLimit2] + jgArray = ubidi.jg2[:] + } else { + break + } + } + + /* add code points with hardcoded properties, plus the ones following them */ + + /* (none right now) */ +} + +func HasFlag(props uint16, shift int) bool { + return ((props >> shift) & 1) != 0 +} + +func mirrorCodePoint(m rune) rune { + return m & 0x1fffff +} + +func IsJoinControl(c rune) bool { + props := ubidi.trie.Get16(c) + return HasFlag(props, joinControlShift) +} + +func JoinType(c rune) JoiningType { + props := ubidi.trie.Get16(c) + return JoiningType((props & jtMask) >> jtShift) +} + +func JoinGroup(c rune) JoiningGroup { + start := ubidi.indexes[ixJgStart] + limit := ubidi.indexes[ixJgLimit] + if start <= c && c < limit { + return JoiningGroup(ubidi.jg[c-start]) + } + start = ubidi.indexes[ixJgStart2] + limit = ubidi.indexes[ixJgLimit2] + if start <= c && c < limit { + return JoiningGroup(ubidi.jg2[c-start]) + } + return JgNoJoiningGroup +} + +func IsMirrored(c rune) bool { + props := ubidi.trie.Get16(c) + return HasFlag(props, isMirroredShift) +} + +func IsBidiControl(c rune) bool { + props := ubidi.trie.Get16(c) + return HasFlag(props, bidiControlShift) +} + +func PairedBracketType(c rune) UPairedBracketType { + props := ubidi.trie.Get16(c) + return UPairedBracketType((props & bptMask) >> bptShift) +} + +func Class(c rune) CharDirection { + props := ubidi.trie.Get16(c) + return CharDirection(props & classMask) +} diff --git a/go/mysql/icuregex/internal/ucase/fold.go b/go/mysql/icuregex/internal/ucase/fold.go new file mode 100644 index 00000000000..88d4f026c65 --- /dev/null +++ b/go/mysql/icuregex/internal/ucase/fold.go @@ -0,0 +1,243 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ucase + +import ( + "math/bits" + + "vitess.io/vitess/go/mysql/icuregex/internal/utf16" +) + +func FoldRunes(str []rune) []rune { + out := make([]rune, 0, len(str)) + for _, c := range str { + r, exp := FullFolding(c) + if exp == nil { + out = append(out, r) + continue + } + + for len(exp) > 0 { + r, exp = utf16.NextUnsafe(exp) + out = append(out, r) + } + } + return out +} + +/* + - Case folding is similar to lowercasing. + - The result may be a simple mapping, i.e., a single code point, or + - a full mapping, i.e., a string. + - If the case folding for a code point is the same as its simple (1:1) lowercase mapping, + - then only the lowercase mapping is stored. + * + - Some special cases are hardcoded because their conditions cannot be + - parsed and processed from CaseFolding.txt. + * + - Unicode 3.2 CaseFolding.txt specifies for its status field: + +# C: common case folding, common mappings shared by both simple and full mappings. +# F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. +# S: simple case folding, mappings to single characters where different from F. +# T: special case for uppercase I and dotted uppercase I +# - For non-Turkic languages, this mapping is normally not used. +# - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. +# +# Usage: +# A. To do a simple case folding, use the mappings with status C + S. +# B. To do a full case folding, use the mappings with status C + F. +# +# The mappings with status T can be used or omitted depending on the desired case-folding +# behavior. (The default option is to exclude them.) + + - Unicode 3.2 has 'T' mappings as follows: + +0049; T; 0131; # LATIN CAPITAL LETTER I +0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE + + - while the default mappings for these code points are: + +0049; C; 0069; # LATIN CAPITAL LETTER I +0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE + + - U+0130 has no simple case folding (simple-case-folds to itself). +*/ +func Fold(c rune) rune { + props := ucase.trie.Get16(c) + if !hasException(props) { + if isUpperOrTitle(props) { + c += getDelta(props) + } + } else { + pe := getExceptions(props) + excWord := pe[0] + pe = pe[1:] + if (excWord & excConditionalFold) != 0 { + /* special case folding mappings, hardcoded */ + /* default mappings */ + if c == 0x49 { + /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ + return 0x69 + } else if c == 0x130 { + /* no simple case folding for U+0130 */ + return c + } + } + if (excWord & excNoSimpleCaseFolding) != 0 { + return c + } + if hasSlot(excWord, excDelta) && isUpperOrTitle(props) { + var delta int32 + delta, _ = getSlotValue(excWord, excDelta, pe) + if excWord&excDeltaIsNegative == 0 { + return c + delta + } + return c - delta + } + + var idx int32 + if hasSlot(excWord, excFold) { + idx = excFold + } else if hasSlot(excWord, excLower) { + idx = excLower + } else { + return c + } + c, _ = getSlotValue(excWord, idx, pe) + } + return c +} + +func FullFolding(c rune) (rune, []uint16) { + result := c + props := ucase.trie.Get16(c) + + if !hasException(props) { + if isUpperOrTitle(props) { + result = c + getDelta(props) + } + return result, nil + } + + pe := getExceptions(props) + excWord := pe[0] + pe = pe[1:] + var idx int32 + + if excWord&excConditionalFold != 0 { + /* use hardcoded conditions and mappings */ + /* default mappings */ + if c == 0x49 { + /* 0049; C; 0069; # LATIN CAPITAL LETTER I */ + return 0x69, nil + } else if c == 0x130 { + /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */ + return -1, []uint16{0x69, 0x307} + } + } else if hasSlot(excWord, excFullMappings) { + full, pe := getSlotValue(excWord, excFullMappings, pe) + + /* start of full case mapping strings */ + pe = pe[1:] + + /* skip the lowercase result string */ + pe = pe[full&fullLower:] + full = (full >> 4) & 0xf + + if full != 0 { + /* set the output pointer to the result string */ + return -1, pe[:full] + } + } + + if excWord&excNoSimpleCaseFolding != 0 { + return result, nil + } + if hasSlot(excWord, excDelta) && isUpperOrTitle(props) { + delta, _ := getSlotValue(excWord, excDelta, pe) + if excWord&excDeltaIsNegative == 0 { + return c + delta, nil + } + return c - delta, nil + } + if hasSlot(excWord, excFold) { + idx = excFold + } else if hasSlot(excWord, excLower) { + idx = excLower + } else { + return c, nil + } + result, _ = getSlotValue(excWord, idx, pe) + return result, nil +} + +const ( + excLower = iota + excFold + excUpper + excTitle + excDelta + exc5 /* reserved */ + excClosure + excFullMappings +) + +const ( + /* complex/conditional mappings */ + excConditionalSpecial = 0x4000 + excConditionalFold = 0x8000 + excNoSimpleCaseFolding = 0x200 + excDeltaIsNegative = 0x400 + excSensitive = 0x800 + + excDoubleSlots = 0x100 +) + +func isUpperOrTitle(props uint16) bool { + return props&2 != 0 +} + +func getDelta(props uint16) rune { + return rune(int16(props) >> 7) +} + +func getExceptions(props uint16) []uint16 { + return ucase.exceptions[props>>4:] +} + +func hasSlot(flags uint16, idx int32) bool { + return (flags & (1 << idx)) != 0 +} + +func slotOffset(flags uint16, idx int32) int { + return bits.OnesCount8(uint8(flags & ((1 << idx) - 1))) +} + +func getSlotValue(excWord uint16, idx int32, pExc16 []uint16) (int32, []uint16) { + if excWord&excDoubleSlots == 0 { + pExc16 = pExc16[slotOffset(excWord, idx):] + return int32(pExc16[0]), pExc16 + } + pExc16 = pExc16[2*slotOffset(excWord, idx):] + return (int32(pExc16[0]) << 16) | int32(pExc16[1]), pExc16[1:] +} diff --git a/go/mysql/icuregex/internal/ucase/ucase.go b/go/mysql/icuregex/internal/ucase/ucase.go new file mode 100644 index 00000000000..9fb8407ea66 --- /dev/null +++ b/go/mysql/icuregex/internal/ucase/ucase.go @@ -0,0 +1,425 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ucase + +import ( + "errors" + + "vitess.io/vitess/go/mysql/icuregex/internal/icudata" + "vitess.io/vitess/go/mysql/icuregex/internal/udata" + "vitess.io/vitess/go/mysql/icuregex/internal/utf16" + "vitess.io/vitess/go/mysql/icuregex/internal/utrie" +) + +var ucase struct { + trie *utrie.UTrie2 + exceptions []uint16 + unfold []uint16 +} + +const ( + ixIndexTop = 0 + ixLength = 1 + ixTrieSize = 2 + ixExcLength = 3 + ixUnfoldLength = 4 + ixMaxFullLength = 15 + ixTop = 16 +) + +func readData(bytes *udata.Bytes) error { + err := bytes.ReadHeader(func(info *udata.DataInfo) bool { + return info.DataFormat[0] == 0x63 && + info.DataFormat[1] == 0x41 && + info.DataFormat[2] == 0x53 && + info.DataFormat[3] == 0x45 && + info.FormatVersion[0] == 4 + }) + if err != nil { + return err + } + + count := int32(bytes.Uint32()) + if count < ixTop { + return errors.New("indexes[0] too small in ucase.icu") + } + + indexes := make([]int32, count) + indexes[0] = count + + for i := int32(1); i < count; i++ { + indexes[i] = int32(bytes.Uint32()) + } + + ucase.trie, err = utrie.UTrie2FromBytes(bytes) + if err != nil { + return err + } + + expectedTrieLength := indexes[ixTrieSize] + trieLength := ucase.trie.SerializedLength() + + if trieLength > expectedTrieLength { + return errors.New("ucase.icu: not enough bytes for the trie") + } + + bytes.Skip(expectedTrieLength - trieLength) + + if n := indexes[ixExcLength]; n > 0 { + ucase.exceptions = bytes.Uint16Slice(n) + } + if n := indexes[ixUnfoldLength]; n > 0 { + ucase.unfold = bytes.Uint16Slice(n) + } + + return nil +} + +func init() { + b := udata.NewBytes(icudata.UCase) + if err := readData(b); err != nil { + panic(err) + } +} + +type propertySet interface { + AddRune(ch rune) +} + +func AddPropertyStarts(sa propertySet) { + /* add the start code point of each same-value range of the trie */ + ucase.trie.Enum(nil, func(start, _ rune, _ uint32) bool { + sa.AddRune(start) + return true + }) + + /* add code points with hardcoded properties, plus the ones following them */ + + /* (none right now, see comment below) */ + + /* + * Omit code points with hardcoded specialcasing properties + * because we do not build property UnicodeSets for them right now. + */ +} + +const ( + fullMappingsMaxLength = (4 * 0xf) + closureMaxLength = 0xf + + fullLower = 0xf + fullFolding = 0xf0 + fullUpper = 0xf00 + fullTitle = 0xf000 +) + +func AddCaseClosure(c rune, sa propertySet) { + /* + * Hardcode the case closure of i and its relatives and ignore the + * data file data for these characters. + * The Turkic dotless i and dotted I with their case mapping conditions + * and case folding option make the related characters behave specially. + * This code matches their closure behavior to their case folding behavior. + */ + + switch c { + case 0x49: + /* regular i and I are in one equivalence class */ + sa.AddRune(0x69) + return + case 0x69: + sa.AddRune(0x49) + return + case 0x130: + /* dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>) */ + // the Regex engine calls removeAllStrings() on all UnicodeSets, so we don't need to insert them + // sa->addString(sa->set, iDot, 2); + return + case 0x131: + /* dotless i is in a class by itself */ + return + default: + /* otherwise use the data file data */ + break + } + + props := ucase.trie.Get16(c) + if !hasException(props) { + if getPropsType(props) != None { + /* add the one simple case mapping, no matter what type it is */ + delta := getDelta(props) + if delta != 0 { + sa.AddRune(c + delta) + } + } + } else { + /* + * c has exceptions, so there may be multiple simple and/or + * full case mappings. Add them all. + */ + pe := getExceptions(props) + excWord := pe[0] + pe = pe[1:] + var idx int32 + var closure []uint16 + + /* add all simple case mappings */ + for idx = excLower; idx <= excTitle; idx++ { + if hasSlot(excWord, idx) { + c, _ = getSlotValue(excWord, idx, pe) + sa.AddRune(c) + } + } + if hasSlot(excWord, excDelta) { + delta, _ := getSlotValue(excWord, excDelta, pe) + if excWord&excDeltaIsNegative == 0 { + sa.AddRune(c + delta) + } else { + sa.AddRune(c - delta) + } + } + + /* get the closure string pointer & length */ + if hasSlot(excWord, excClosure) { + closureLength, pe1 := getSlotValue(excWord, excClosure, pe) + closureLength &= closureMaxLength /* higher bits are reserved */ + closure = pe1[1 : 1+closureLength] /* behind this slot, unless there are full case mappings */ + } + + /* add the full case folding */ + if hasSlot(excWord, excFullMappings) { + fullLength, pe1 := getSlotValue(excWord, excFullMappings, pe) + + /* start of full case mapping strings */ + pe1 = pe1[1:] + + fullLength &= 0xffff /* bits 16 and higher are reserved */ + + /* skip the lowercase result string */ + pe1 = pe1[fullLength&fullLower:] + fullLength >>= 4 + + /* skip adding the case folding strings */ + length := fullLength & 0xf + pe1 = pe1[length:] + + /* skip the uppercase and titlecase strings */ + fullLength >>= 4 + pe1 = pe1[fullLength&0xf:] + fullLength >>= 4 + pe1 = pe1[fullLength:] + + closure = pe1[:len(closure)] + } + + /* add each code point in the closure string */ + for len(closure) > 0 { + c, closure = utf16.NextUnsafe(closure) + sa.AddRune(c) + } + } +} + +const dotMask = 0x60 + +const ( + noDot = 0 /* normal characters with cc=0 */ + softDotted = 0x20 /* soft-dotted characters with cc=0 */ + above = 0x40 /* "above" accents with cc=230 */ + otherAccent = 0x60 /* other accent character (0> excDotShift) & dotMask) +} + +func IsCaseSensitive(c rune) bool { + props := ucase.trie.Get16(c) + if !hasException(props) { + return (props & sensitive) != 0 + } + pe := getExceptions(props) + return (pe[0] & excSensitive) != 0 +} + +func ToFullLower(c rune) rune { + // The sign of the result has meaning, input must be non-negative so that it can be returned as is. + result := c + props := ucase.trie.Get16(c) + if !hasException(props) { + if isUpperOrTitle(props) { + result = c + getDelta(props) + } + } else { + pe := getExceptions(props) + excWord := pe[0] + pe = pe[1:] + + if excWord&excConditionalSpecial != 0 { + /* use hardcoded conditions and mappings */ + if c == 0x130 { + return 2 + } + /* no known conditional special case mapping, use a normal mapping */ + } else if hasSlot(excWord, excFullMappings) { + full, _ := getSlotValue(excWord, excFullMappings, pe) + full = full & fullLower + if full != 0 { + /* return the string length */ + return full + } + } + + if hasSlot(excWord, excDelta) && isUpperOrTitle(props) { + delta, _ := getSlotValue(excWord, excDelta, pe) + if (excWord & excDeltaIsNegative) == 0 { + return c + delta + } + return c - delta + } + if hasSlot(excWord, excLower) { + result, _ = getSlotValue(excWord, excLower, pe) + } + } + + if result == c { + return ^result + } + return result +} + +func ToFullUpper(c rune) rune { + return toUpperOrTitle(c, true) +} + +func ToFullTitle(c rune) rune { + return toUpperOrTitle(c, false) +} + +func toUpperOrTitle(c rune, upperNotTitle bool) rune { + result := c + props := ucase.trie.Get16(c) + if !hasException(props) { + if getPropsType(props) == Lower { + result = c + getDelta(props) + } + } else { + pe := getExceptions(props) + excWord := pe[0] + pe = pe[1:] + + if excWord&excConditionalSpecial != 0 { + if c == 0x0587 { + return 2 + } + /* no known conditional special case mapping, use a normal mapping */ + } else if hasSlot(excWord, excFullMappings) { + full, _ := getSlotValue(excWord, excFullMappings, pe) + + /* skip the lowercase and case-folding result strings */ + full >>= 8 + + if upperNotTitle { + full &= 0xf + } else { + /* skip the uppercase result string */ + full = (full >> 4) & 0xf + } + + if full != 0 { + /* return the string length */ + return full + } + } + + if hasSlot(excWord, excDelta) && getPropsType(props) == Lower { + delta, _ := getSlotValue(excWord, excDelta, pe) + if (excWord & excDeltaIsNegative) == 0 { + return c + delta + } + return c - delta + } + var idx int32 + if !upperNotTitle && hasSlot(excWord, excTitle) { + idx = excTitle + } else if hasSlot(excWord, excUpper) { + /* here, titlecase is same as uppercase */ + idx = excUpper + } else { + return ^c + } + result, _ = getSlotValue(excWord, idx, pe) + } + + if result == c { + return ^result + } + return result +} + +func GetTypeOrIgnorable(c rune) int32 { + props := ucase.trie.Get16(c) + return int32(props & 7) +} + +type Type int32 + +const ( + None Type = iota + Lower + Upper + Title +) + +const typeMask = 3 + +func GetType(c rune) Type { + props := ucase.trie.Get16(c) + return getPropsType(props) +} + +func getPropsType(props uint16) Type { + return Type(props & typeMask) +} diff --git a/go/mysql/icuregex/internal/uchar/constants.go b/go/mysql/icuregex/internal/uchar/constants.go new file mode 100644 index 00000000000..1ab96751b5c --- /dev/null +++ b/go/mysql/icuregex/internal/uchar/constants.go @@ -0,0 +1,240 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uchar + +import "golang.org/x/exp/constraints" + +func uMask[T constraints.Integer](x T) uint32 { + return 1 << x +} + +type Category int8 + +const ( + /* + * Note: UCharCategory constants and their API comments are parsed by preparseucd.py. + * It matches pairs of lines like + * / ** comment... * / + * U_<[A-Z_]+> = , + */ + + /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */ + Unassigned Category = 0 + /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */ + GeneralOtherTypes Category = iota - 1 + /** Lu @stable ICU 2.0 */ + UppercaseLetter + /** Ll @stable ICU 2.0 */ + LowercaseLetter + /** Lt @stable ICU 2.0 */ + TitlecaseLetter + /** Lm @stable ICU 2.0 */ + ModifierLetter + /** Lo @stable ICU 2.0 */ + OtherLetter + /** Mn @stable ICU 2.0 */ + NonSpacingMask + /** Me @stable ICU 2.0 */ + EnclosingMark + /** Mc @stable ICU 2.0 */ + CombiningSpacingMask + /** Nd @stable ICU 2.0 */ + DecimalDigitNumber + /** Nl @stable ICU 2.0 */ + LetterNumber + /** No @stable ICU 2.0 */ + OtherNumber + /** Zs @stable ICU 2.0 */ + SpaceSeparator + /** Zl @stable ICU 2.0 */ + LineSeparator + /** Zp @stable ICU 2.0 */ + ParagraphSeparator + /** Cc @stable ICU 2.0 */ + ControlChar + /** Cf @stable ICU 2.0 */ + FormatChar + /** Co @stable ICU 2.0 */ + PrivateUseChar + /** Cs @stable ICU 2.0 */ + Surrogate + /** Pd @stable ICU 2.0 */ + DashPunctuation + /** Ps @stable ICU 2.0 */ + StartPunctuation + /** Pe @stable ICU 2.0 */ + EndPunctuation + /** Pc @stable ICU 2.0 */ + ConnectorPunctuation + /** Po @stable ICU 2.0 */ + OtherPunctuation + /** Sm @stable ICU 2.0 */ + MathSymbol + /** Sc @stable ICU 2.0 */ + CurrencySymbol + /** Sk @stable ICU 2.0 */ + ModifierSymbol + /** So @stable ICU 2.0 */ + OtherSymbol + /** Pi @stable ICU 2.0 */ + InitialPunctuation + /** Pf @stable ICU 2.0 */ + FinalPunctuation + /** + * One higher than the last enum UCharCategory constant. + * This numeric value is stable (will not change), see + * http://www.unicode.org/policies/stability_policy.html#Property_Value + * + * @stable ICU 2.0 + */ + CharCategoryCount +) + +var ( + GcCnMask = uMask(GeneralOtherTypes) + + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcLuMask = uMask(UppercaseLetter) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcLlMask = uMask(LowercaseLetter) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcLtMask = uMask(TitlecaseLetter) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcLmMask = uMask(ModifierLetter) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcLoMask = uMask(OtherLetter) + + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcMnMask = uMask(NonSpacingMask) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcMeMask = uMask(EnclosingMark) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcMcMask = uMask(CombiningSpacingMask) + + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcNdMask = uMask(DecimalDigitNumber) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcNlMask = uMask(LetterNumber) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcNoMask = uMask(OtherNumber) + + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcZsMask = uMask(SpaceSeparator) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcZlMask = uMask(LineSeparator) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcZpMask = uMask(ParagraphSeparator) + + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcCcMask = uMask(ControlChar) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcCfMask = uMask(FormatChar) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcCoMask = uMask(PrivateUseChar) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcCsMask = uMask(Surrogate) + + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcPdMask = uMask(DashPunctuation) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcPsMask = uMask(StartPunctuation) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcPeMask = uMask(EndPunctuation) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcPcMask = uMask(ConnectorPunctuation) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcPoMask = uMask(OtherPunctuation) + + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcSmMask = uMask(MathSymbol) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcScMask = uMask(CurrencySymbol) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcSkMask = uMask(ModifierSymbol) + /** Mask constant for a UCharCategory. @stable ICU 2.1 */ + GcSoMask = uMask(OtherSymbol) + + /** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */ + GcLMask = (GcLuMask | GcLlMask | GcLtMask | GcLmMask | GcLoMask) + + /** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */ + GcLcMask = (GcLuMask | GcLlMask | GcLtMask) + + /** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */ + GcMMask = (GcMnMask | GcMeMask | GcMcMask) + + /** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */ + GcNMask = (GcNdMask | GcNlMask | GcNoMask) + + /** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */ + GcZMask = (GcZsMask | GcZlMask | GcZpMask) +) + +const upropsAgeShift = 24 +const maxVersionLength = 4 +const versionDelimiter = '.' + +type UVersionInfo [maxVersionLength]uint8 + +const ( + /** No numeric value. */ + UPropsNtvNone = 0 + /** Decimal digits: nv=0..9 */ + UPropsNtvDecimalStart = 1 + /** Other digits: nv=0..9 */ + UPropsNtvDigitStart = 11 + /** Small integers: nv=0..154 */ + UPropsNtvNumericStart = 21 + /** Fractions: ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 */ + UPropsNtvFractionStart = 0xb0 + /** + * Large integers: + * ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33) + * (only one significant decimal digit) + */ + UPropsNtvLargeStart = 0x1e0 + /** + * Sexagesimal numbers: + * ((ntv>>2)-0xbf) * 60^((ntv&3)+1) = (1..9)*(60^1..60^4) + */ + UPropsNtvBase60Start = 0x300 + /** + * Fraction-20 values: + * frac20 = ntv-0x324 = 0..0x17 -> 1|3|5|7 / 20|40|80|160|320|640 + * numerator: num = 2*(frac20&3)+1 + * denominator: den = 20<<(frac20>>2) + */ + UPropsNtvFraction20Start = UPropsNtvBase60Start + 36 // 0x300+9*4=0x324 + /** + * Fraction-32 values: + * frac32 = ntv-0x34c = 0..15 -> 1|3|5|7 / 32|64|128|256 + * numerator: num = 2*(frac32&3)+1 + * denominator: den = 32<<(frac32>>2) + */ + UPropsNtvFraction32Start = UPropsNtvFraction20Start + 24 // 0x324+6*4=0x34c + /** No numeric value (yet). */ + UPropsNtvReservedStart = UPropsNtvFraction32Start + 16 // 0x34c+4*4=0x35c + + UPropsNtvMaxSmallInt = UPropsNtvFractionStart - UPropsNtvNumericStart - 1 +) + +const noNumericValue = -123456789.0 diff --git a/go/mysql/icuregex/internal/uchar/uchar.go b/go/mysql/icuregex/internal/uchar/uchar.go new file mode 100644 index 00000000000..a2c758ea1c0 --- /dev/null +++ b/go/mysql/icuregex/internal/uchar/uchar.go @@ -0,0 +1,405 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uchar + +import ( + "errors" + "strconv" + + "vitess.io/vitess/go/mysql/icuregex/internal/icudata" + "vitess.io/vitess/go/mysql/icuregex/internal/udata" + "vitess.io/vitess/go/mysql/icuregex/internal/utrie" +) + +var uprops struct { + trie *utrie.UTrie2 + trie2 *utrie.UTrie2 + vectorsColumns int32 + vectors []uint32 + scriptExtensions []uint16 +} + +func readData(bytes *udata.Bytes) error { + err := bytes.ReadHeader(func(info *udata.DataInfo) bool { + return info.DataFormat[0] == 0x55 && + info.DataFormat[1] == 0x50 && + info.DataFormat[2] == 0x72 && + info.DataFormat[3] == 0x6f && + info.FormatVersion[0] == 7 + }) + if err != nil { + return err + } + + propertyOffset := bytes.Int32() + /* exceptionOffset = */ bytes.Int32() + /* caseOffset = */ bytes.Int32() + additionalOffset := bytes.Int32() + additionalVectorsOffset := bytes.Int32() + uprops.vectorsColumns = bytes.Int32() + scriptExtensionsOffset := bytes.Int32() + reservedOffset7 := bytes.Int32() + /* reservedOffset8 = */ bytes.Int32() + /* dataTopOffset = */ bytes.Int32() + _ = bytes.Int32() + _ = bytes.Int32() + bytes.Skip((16 - 12) << 2) + + uprops.trie, err = utrie.UTrie2FromBytes(bytes) + if err != nil { + return err + } + + expectedTrieLength := (propertyOffset - 16) * 4 + trieLength := uprops.trie.SerializedLength() + + if trieLength > expectedTrieLength { + return errors.New("ucase.icu: not enough bytes for the trie") + } + + bytes.Skip(expectedTrieLength - trieLength) + bytes.Skip((additionalOffset - propertyOffset) * 4) + + if uprops.vectorsColumns > 0 { + uprops.trie2, err = utrie.UTrie2FromBytes(bytes) + if err != nil { + return err + } + + expectedTrieLength = (additionalVectorsOffset - additionalOffset) * 4 + trieLength = uprops.trie2.SerializedLength() + + if trieLength > expectedTrieLength { + return errors.New("ucase.icu: not enough bytes for the trie") + } + + bytes.Skip(expectedTrieLength - trieLength) + uprops.vectors = bytes.Uint32Slice(scriptExtensionsOffset - additionalVectorsOffset) + } + + if n := (reservedOffset7 - scriptExtensionsOffset) * 2; n > 0 { + uprops.scriptExtensions = bytes.Uint16Slice(n) + } + + return nil +} + +func init() { + b := udata.NewBytes(icudata.UProps) + if err := readData(b); err != nil { + panic(err) + } +} + +type PropertySet interface { + AddRune(ch rune) +} + +func VecAddPropertyStarts(sa PropertySet) { + uprops.trie2.Enum(nil, func(start, _ rune, _ uint32) bool { + sa.AddRune(start) + return true + }) +} + +const ( + tab = 0x0009 + lf = 0x000a + ff = 0x000c + cr = 0x000d + nbsp = 0x00a0 + cgj = 0x034f + figuresp = 0x2007 + hairsp = 0x200a + zwnj = 0x200c + zwj = 0x200d + rlm = 0x200f + nnbsp = 0x202f + zwnbsp = 0xfef +) + +func AddPropertyStarts(sa PropertySet) { + /* add the start code point of each same-value range of the main trie */ + uprops.trie.Enum(nil, func(start, _ rune, _ uint32) bool { + sa.AddRune(start) + return true + }) + + /* add code points with hardcoded properties, plus the ones following them */ + + /* add for u_isblank() */ + sa.AddRune(tab) + sa.AddRune(tab + 1) + + /* add for IS_THAT_CONTROL_SPACE() */ + sa.AddRune(cr + 1) /* range TAB..CR */ + sa.AddRune(0x1c) + sa.AddRune(0x1f + 1) + sa.AddRune(0x85) // NEXT LINE (NEL) + sa.AddRune(0x85 + 1) + + /* add for u_isIDIgnorable() what was not added above */ + sa.AddRune(0x7f) /* range DEL..NBSP-1, NBSP added below */ + sa.AddRune(hairsp) + sa.AddRune(rlm + 1) + sa.AddRune(0x206a) // INHIBIT SYMMETRIC SWAPPING + sa.AddRune(0x206f + 1) // NOMINAL DIGIT SHAPES + sa.AddRune(zwnbsp) + sa.AddRune(zwnbsp + 1) + + /* add no-break spaces for u_isWhitespace() what was not added above */ + sa.AddRune(nbsp) + sa.AddRune(nbsp + 1) + sa.AddRune(figuresp) + sa.AddRune(figuresp + 1) + sa.AddRune(nnbsp) + sa.AddRune(nnbsp + 1) + + /* add for u_digit() */ + sa.AddRune('a') + sa.AddRune('z' + 1) + sa.AddRune('A') + sa.AddRune('Z' + 1) + // fullwidth + sa.AddRune('a') + sa.AddRune('z' + 1) + sa.AddRune('A') + sa.AddRune('Z' + 1) + + /* add for u_isxdigit() */ + sa.AddRune('f' + 1) + sa.AddRune('F' + 1) + // fullwidth + sa.AddRune('f' + 1) + sa.AddRune('F' + 1) + + /* add for UCHAR_DEFAULT_IGNORABLE_CODE_POINT what was not added above */ + sa.AddRune(0x2060) /* range 2060..206f */ + sa.AddRune(0xfff0) + sa.AddRune(0xfffb + 1) + sa.AddRune(0xe0000) + sa.AddRune(0xe0fff + 1) + + /* add for UCHAR_GRAPHEME_BASE and others */ + sa.AddRune(cgj) + sa.AddRune(cgj + 1) +} + +func CharType(c rune) Category { + props := uprops.trie.Get16(c) + return getCategory(props) +} + +func GetProperties(c rune) uint16 { + return uprops.trie.Get16(c) +} + +func getCategory(props uint16) Category { + return Category(props & 0x1f) +} + +func GetUnicodeProperties(c rune, column int) uint32 { + if column >= int(uprops.vectorsColumns) { + return 0 + } + vecIndex := uprops.trie2.Get16(c) + return uprops.vectors[int(vecIndex)+column] +} + +func ScriptExtension(idx uint32) uint16 { + return uprops.scriptExtensions[idx] +} + +func ScriptExtensions(idx uint32) []uint16 { + return uprops.scriptExtensions[idx:] +} + +func IsDigit(c rune) bool { + return CharType(c) == DecimalDigitNumber +} + +func IsPOSIXPrint(c rune) bool { + return CharType(c) == SpaceSeparator || IsGraphPOSIX(c) +} + +func IsGraphPOSIX(c rune) bool { + props := uprops.trie.Get16(c) + /* \p{space}\p{gc=Control} == \p{gc=Z}\p{Control} */ + /* comparing ==0 returns FALSE for the categories mentioned */ + return uMask(getCategory(props))&(GcCcMask|GcCsMask|GcCnMask|GcZMask) == 0 +} + +func IsXDigit(c rune) bool { + /* check ASCII and Fullwidth ASCII a-fA-F */ + if (c <= 0x66 && c >= 0x41 && (c <= 0x46 || c >= 0x61)) || + (c >= 0xff21 && c <= 0xff46 && (c <= 0xff26 || c >= 0xff41)) { + return true + } + return IsDigit(c) +} + +func IsBlank(c rune) bool { + if c <= 0x9f { + return c == 9 || c == 0x20 /* TAB or SPACE */ + } + /* Zs */ + return CharType(c) == SpaceSeparator +} + +func CharAge(c rune) UVersionInfo { + version := GetUnicodeProperties(c, 0) >> upropsAgeShift + return UVersionInfo{uint8(version >> 4), uint8(version & 0xf), 0, 0} +} + +func VersionFromString(str string) (version UVersionInfo) { + part := 0 + for len(str) > 0 && part < maxVersionLength { + if str[0] == versionDelimiter { + str = str[1:] + } + str, version[part] = parseInt(str) + part++ + } + return +} + +// parseInt is simplified but aims to mimic strtoul usage +// as it is used for ICU version parsing. +func parseInt(str string) (string, uint8) { + if str == "" { + return str, 0 + } + + start := 0 + end := 0 +whitespace: + for i := 0; i < len(str); i++ { + switch str[i] { + case ' ', '\f', '\n', '\r', '\t', '\v': + start++ + continue + default: + break whitespace + } + } + str = str[start:] + + for i := 0; i < len(str); i++ { + if str[i] < '0' || str[i] > '9' { + end = i + break + } + end++ + } + + val, err := strconv.ParseUint(str[start:end], 10, 8) + if err != nil { + return str[end:], 0 + } + return str[end:], uint8(val) +} + +const upropsNumericTypeValueShift = 6 + +func NumericTypeValue(c rune) uint16 { + props := uprops.trie.Get16(c) + return props >> upropsNumericTypeValueShift +} + +func NumericValue(c rune) float64 { + ntv := int32(NumericTypeValue(c)) + + if ntv == UPropsNtvNone { + return noNumericValue + } else if ntv < UPropsNtvDigitStart { + /* decimal digit */ + return float64(ntv - UPropsNtvDecimalStart) + } else if ntv < UPropsNtvNumericStart { + /* other digit */ + return float64(ntv - UPropsNtvDigitStart) + } else if ntv < UPropsNtvFractionStart { + /* small integer */ + return float64(ntv - UPropsNtvNumericStart) + } else if ntv < UPropsNtvLargeStart { + /* fraction */ + numerator := (ntv >> 4) - 12 + denominator := (ntv & 0xf) + 1 + return float64(numerator) / float64(denominator) + } else if ntv < UPropsNtvBase60Start { + /* large, single-significant-digit integer */ + mant := (ntv >> 5) - 14 + exp := (ntv & 0x1f) + 2 + numValue := float64(mant) + + /* multiply by 10^exp without math.h */ + for exp >= 4 { + numValue *= 10000. + exp -= 4 + } + switch exp { + case 3: + numValue *= 1000.0 + case 2: + numValue *= 100.0 + case 1: + numValue *= 10.0 + case 0: + default: + } + + return numValue + } else if ntv < UPropsNtvFraction20Start { + /* sexagesimal (base 60) integer */ + numValue := (ntv >> 2) - 0xbf + exp := (ntv & 3) + 1 + + switch exp { + case 4: + numValue *= 60 * 60 * 60 * 60 + case 3: + numValue *= 60 * 60 * 60 + case 2: + numValue *= 60 * 60 + case 1: + numValue *= 60 + case 0: + default: + } + + return float64(numValue) + } else if ntv < UPropsNtvFraction32Start { + // fraction-20 e.g. 3/80 + frac20 := ntv - UPropsNtvFraction20Start // 0..0x17 + numerator := 2*(frac20&3) + 1 + denominator := 20 << (frac20 >> 2) + return float64(numerator) / float64(denominator) + } else if ntv < UPropsNtvReservedStart { + // fraction-32 e.g. 3/64 + frac32 := ntv - UPropsNtvFraction32Start // 0..15 + numerator := 2*(frac32&3) + 1 + denominator := 32 << (frac32 >> 2) + return float64(numerator) / float64(denominator) + } else { + /* reserved */ + return noNumericValue + } +} diff --git a/go/mysql/icuregex/internal/udata/udata.go b/go/mysql/icuregex/internal/udata/udata.go new file mode 100644 index 00000000000..f20f8be1efa --- /dev/null +++ b/go/mysql/icuregex/internal/udata/udata.go @@ -0,0 +1,155 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package udata + +import ( + "encoding/binary" + "errors" + "unsafe" +) + +type DataInfo struct { + /** sizeof(UDataInfo) + * @stable ICU 2.0 */ + Size uint16 + + /** unused, set to 0 + * @stable ICU 2.0*/ + ReservedWord uint16 + + /* platform data properties */ + /** 0 for little-endian machine, 1 for big-endian + * @stable ICU 2.0 */ + IsBigEndian uint8 + + /** see U_CHARSET_FAMILY values in utypes.h + * @stable ICU 2.0*/ + CharsetFamily uint8 + + /** sizeof(UChar), one of { 1, 2, 4 } + * @stable ICU 2.0*/ + SizeofUChar uint8 + + /** unused, set to 0 + * @stable ICU 2.0*/ + ReservedByte uint8 + + /** data format identifier + * @stable ICU 2.0*/ + DataFormat [4]uint8 + + /** versions: [0] major [1] minor [2] milli [3] micro + * @stable ICU 2.0*/ + FormatVersion [4]uint8 + + /** versions: [0] major [1] minor [2] milli [3] micro + * @stable ICU 2.0*/ + DataVersion [4]uint8 +} + +type Bytes struct { + buf []byte + orig []byte + enc binary.ByteOrder +} + +func NewBytes(b []byte) *Bytes { + return &Bytes{buf: b, orig: b, enc: binary.LittleEndian} +} + +func (b *Bytes) ReadHeader(isValid func(info *DataInfo) bool) error { + type MappedData struct { + headerSize uint16 + magic1 uint8 + magic2 uint8 + } + + type DataHeader struct { + dataHeader MappedData + info DataInfo + } + + data := unsafe.SliceData(b.buf) + header := (*DataHeader)(unsafe.Pointer(data)) + + if header.dataHeader.magic1 != 0xda || header.dataHeader.magic2 != 0x27 { + return errors.New("invalid magic number") + } + + if header.info.IsBigEndian != 0 { + return errors.New("unsupported: BigEndian data source") + } + + if !isValid(&header.info) { + return errors.New("failed to validate data header") + } + + b.buf = b.buf[header.dataHeader.headerSize:] + return nil +} + +func (b *Bytes) Uint8() uint8 { + u := b.buf[0] + b.buf = b.buf[1:] + return u +} +func (b *Bytes) Uint16() uint16 { + u := b.enc.Uint16(b.buf) + b.buf = b.buf[2:] + return u +} + +func (b *Bytes) Uint16Slice(size int32) []uint16 { + s := unsafe.Slice((*uint16)(unsafe.Pointer(unsafe.SliceData(b.buf))), size) + b.buf = b.buf[2*size:] + return s +} + +func (b *Bytes) Uint32Slice(size int32) []uint32 { + s := unsafe.Slice((*uint32)(unsafe.Pointer(unsafe.SliceData(b.buf))), size) + b.buf = b.buf[4*size:] + return s +} + +func (b *Bytes) Uint32() uint32 { + u := b.enc.Uint32(b.buf) + b.buf = b.buf[4:] + return u +} + +func (b *Bytes) Int32() int32 { + return int32(b.Uint32()) +} + +func (b *Bytes) Skip(size int32) { + b.buf = b.buf[size:] +} + +func (b *Bytes) Uint8Slice(n int32) []uint8 { + s := b.buf[:n] + b.buf = b.buf[n:] + return s +} + +func (b *Bytes) Position() int32 { + return int32(len(b.orig) - len(b.buf)) +} diff --git a/go/mysql/icuregex/internal/ulayout/ulayout.go b/go/mysql/icuregex/internal/ulayout/ulayout.go new file mode 100644 index 00000000000..dbf21d9460b --- /dev/null +++ b/go/mysql/icuregex/internal/ulayout/ulayout.go @@ -0,0 +1,128 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package ulayout + +import ( + "errors" + "sync" + + "vitess.io/vitess/go/mysql/icuregex/internal/icudata" + "vitess.io/vitess/go/mysql/icuregex/internal/udata" + "vitess.io/vitess/go/mysql/icuregex/internal/utrie" +) + +var inpcTrie *utrie.UcpTrie +var inscTrie *utrie.UcpTrie +var voTrie *utrie.UcpTrie + +const ( + ixInpcTrieTop = 1 + ixInscTrieTop = 2 + ixVoTrieTop = 3 + + ixCount = 12 +) + +func InpcTrie() *utrie.UcpTrie { + loadLayouts() + return inpcTrie +} + +func InscTrie() *utrie.UcpTrie { + loadLayouts() + return inscTrie +} + +func VoTrie() *utrie.UcpTrie { + loadLayouts() + return voTrie +} + +var layoutsOnce sync.Once + +func loadLayouts() { + layoutsOnce.Do(func() { + b := udata.NewBytes(icudata.ULayout) + if err := readData(b); err != nil { + panic(err) + } + }) +} + +func readData(bytes *udata.Bytes) error { + err := bytes.ReadHeader(func(info *udata.DataInfo) bool { + return info.DataFormat[0] == 0x4c && + info.DataFormat[1] == 0x61 && + info.DataFormat[2] == 0x79 && + info.DataFormat[3] == 0x6f && + info.FormatVersion[0] == 1 + }) + if err != nil { + return err + } + + startPos := bytes.Position() + indexesLength := int32(bytes.Uint32()) // inIndexes[IX_INDEXES_LENGTH] + if indexesLength < ixCount { + return errors.New("text layout properties data: not enough indexes") + } + index := make([]int32, indexesLength) + index[0] = indexesLength + for i := int32(1); i < indexesLength; i++ { + index[i] = int32(bytes.Uint32()) + } + + offset := indexesLength * 4 + top := index[ixInpcTrieTop] + trieSize := top - offset + if trieSize >= 16 { + inpcTrie, err = utrie.UcpTrieFromBytes(bytes) + if err != nil { + return err + } + } + + pos := bytes.Position() - startPos + bytes.Skip(top - pos) + offset = top + top = index[ixInscTrieTop] + trieSize = top - offset + if trieSize >= 16 { + inscTrie, err = utrie.UcpTrieFromBytes(bytes) + if err != nil { + return err + } + } + + pos = bytes.Position() - startPos + bytes.Skip(top - pos) + offset = top + top = index[ixVoTrieTop] + trieSize = top - offset + if trieSize >= 16 { + voTrie, err = utrie.UcpTrieFromBytes(bytes) + if err != nil { + return err + } + } + return nil +} diff --git a/go/mysql/icuregex/internal/unames/unames.go b/go/mysql/icuregex/internal/unames/unames.go new file mode 100644 index 00000000000..45920be8292 --- /dev/null +++ b/go/mysql/icuregex/internal/unames/unames.go @@ -0,0 +1,471 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package unames + +import ( + "bytes" + "strconv" + "strings" + "sync" + + "vitess.io/vitess/go/mysql/icuregex/internal/icudata" + "vitess.io/vitess/go/mysql/icuregex/internal/udata" +) + +var charNamesOnce sync.Once +var charNames *unames + +type unames struct { + tokens []uint16 + tokenStrings []uint8 + groups []uint16 + groupNames []uint8 + algNames []algorithmicRange +} + +func loadCharNames() { + charNamesOnce.Do(func() { + b := udata.NewBytes(icudata.UNames) + if err := b.ReadHeader(func(info *udata.DataInfo) bool { + return info.Size >= 20 && + info.IsBigEndian == 0 && + info.CharsetFamily == 0 && + info.DataFormat[0] == 0x75 && /* dataFormat="unam" */ + info.DataFormat[1] == 0x6e && + info.DataFormat[2] == 0x61 && + info.DataFormat[3] == 0x6d && + info.FormatVersion[0] == 1 + }); err != nil { + panic(err) + } + + tokenStringOffset := int32(b.Uint32() - 16) + groupsOffset := int32(b.Uint32() - 16) + groupStringOffset := int32(b.Uint32() - 16) + algNamesOffset := int32(b.Uint32() - 16) + charNames = &unames{ + tokens: b.Uint16Slice(tokenStringOffset / 2), + tokenStrings: b.Uint8Slice(groupsOffset - tokenStringOffset), + groups: b.Uint16Slice((groupStringOffset - groupsOffset) / 2), + groupNames: b.Uint8Slice(algNamesOffset - groupStringOffset), + } + + algCount := b.Uint32() + charNames.algNames = make([]algorithmicRange, 0, algCount) + + for i := uint32(0); i < algCount; i++ { + ar := algorithmicRange{ + start: b.Uint32(), + end: b.Uint32(), + typ: b.Uint8(), + variant: b.Uint8(), + } + size := b.Uint16() + switch ar.typ { + case 0: + ar.s = b.Uint8Slice(int32(size) - 12) + case 1: + ar.factors = b.Uint16Slice(int32(ar.variant)) + ar.s = b.Uint8Slice(int32(size) - 12 - int32(ar.variant)*2) + } + charNames.algNames = append(charNames.algNames, ar) + } + }) +} + +func (names *unames) getGroupName(group []uint16) []uint8 { + return names.groupNames[names.getGroupOffset(group):] +} + +type NameChoice int32 + +const ( + UnicodeCharName NameChoice = iota + /** + * The Unicode_1_Name property value which is of little practical value. + * Beginning with ICU 49, ICU APIs return an empty string for this name choice. + * @deprecated ICU 49 + */ + Unicode10CharName + /** Standard or synthetic character name. @stable ICU 2.0 */ + ExtendedCharName + /** Corrected name from NameAliases.txt. @stable ICU 4.4 */ + CharNameAlias +) + +type algorithmicRange struct { + start, end uint32 + typ, variant uint8 + factors []uint16 + s []uint8 +} + +func (ar *algorithmicRange) findAlgName(otherName string) rune { + switch ar.typ { + case 0: + s := ar.s + + for s[0] != 0 && len(otherName) > 0 { + if s[0] != otherName[0] { + return -1 + } + s = s[1:] + otherName = otherName[1:] + } + + var code rune + count := int(ar.variant) + for i := 0; i < count && len(otherName) > 0; i++ { + c := rune(otherName[0]) + otherName = otherName[1:] + if '0' <= c && c <= '9' { + code = (code << 4) | (c - '0') + } else if 'A' <= c && c <= 'F' { + code = (code << 4) | (c - 'A' + 10) + } else { + return -1 + } + } + + if len(otherName) == 0 && ar.start <= uint32(code) && uint32(code) <= ar.end { + return code + } + case 1: + factors := ar.factors + s := ar.s + + for s[0] != 0 && len(otherName) > 0 { + if s[0] != otherName[0] { + return -1 + } + s = s[1:] + otherName = otherName[1:] + } + s = s[1:] + + start := rune(ar.start) + limit := rune(ar.end + 1) + + var indexes [8]uint16 + var buf strings.Builder + var elements [8][]byte + var elementBases [8][]byte + + ar.writeFactorSuffix0(factors, s, &buf, &elements, &elementBases) + if buf.String() == otherName { + return start + } + + for start+1 < limit { + start++ + i := len(factors) + + for { + i-- + idx := indexes[i] + 1 + if idx < factors[i] { + indexes[i] = idx + s = elements[i] + s = s[bytes.IndexByte(s, 0)+1:] + elements[i] = s + break + } + + indexes[i] = 0 + elements[i] = elementBases[i] + } + + t := otherName + for i = 0; i < len(factors); i++ { + s = elements[i] + + for s[0] != 0 && len(t) > 0 { + if s[0] != t[0] { + s = nil + i = 99 + break + } + s = s[1:] + t = t[1:] + } + } + if i < 99 && len(t) == 0 { + return start + } + } + } + return -1 +} + +func (ar *algorithmicRange) writeFactorSuffix0(factors []uint16, s []uint8, buf *strings.Builder, elements, elementBases *[8][]byte) { + /* write each element */ + for i := 0; i < len(factors); i++ { + (*elements)[i] = s + (*elementBases)[i] = s + + nul := bytes.IndexByte(s, 0) + buf.Write(s[:nul]) + s = s[nul+1:] + + factor := int(factors[i] - 1) + for factor > 0 { + s = s[bytes.IndexByte(s, 0)+1:] + factor-- + } + } +} + +func CharForName(nameChoice NameChoice, name string) rune { + loadCharNames() + + lower := strings.ToLower(name) + upper := strings.ToUpper(name) + + if lower[0] == '<' { + if nameChoice == ExtendedCharName && lower[len(lower)-1] == '>' { + if limit := strings.LastIndexByte(lower, '-'); limit >= 2 { + cp, err := strconv.ParseUint(lower[limit+1:len(lower)-1], 16, 32) + if err != nil || cp > 0x10ffff { + return -1 + } + return rune(cp) + } + } + return -1 + } + + for _, ar := range charNames.algNames { + if cp := ar.findAlgName(upper); cp != -1 { + return cp + } + } + + return charNames.enumNames(0, 0x10ffff+1, upper, nameChoice) +} + +const groupShift = 5 +const linesPerGroup = 1 << groupShift +const groupMask = linesPerGroup - 1 + +const ( + groupMsb = iota + groupOffsetHigh + groupOffsetLow + groupLength +) + +func (names *unames) enumNames(start, limit rune, otherName string, nameChoice NameChoice) rune { + startGroupMSB := uint16(start >> groupShift) + endGroupMSB := uint16((limit - 1) >> groupShift) + + group := names.getGroup(start) + + if startGroupMSB < group[groupMsb] && nameChoice == ExtendedCharName { + extLimit := rune(group[groupMsb]) << groupShift + if extLimit > limit { + extLimit = limit + } + start = extLimit + } + + if startGroupMSB == endGroupMSB { + if startGroupMSB == group[groupMsb] { + return names.enumGroupNames(group, start, limit-1, otherName, nameChoice) + } + } else { + if startGroupMSB == group[groupMsb] { + if start&groupMask != 0 { + if cp := names.enumGroupNames(group, start, (rune(startGroupMSB)< group[groupMsb] { + group = group[groupLength:] + } + + for len(group) > 0 && group[groupMsb] < endGroupMSB { + start = rune(group[groupMsb]) << groupShift + if cp := names.enumGroupNames(group, start, start+linesPerGroup-1, otherName, nameChoice); cp != -1 { + return cp + } + group = group[groupLength:] + } + + if len(group) > 0 && group[groupMsb] == endGroupMSB { + return names.enumGroupNames(group, (limit-1)&^groupMask, limit-1, otherName, nameChoice) + } + } + + return -1 +} + +func (names *unames) getGroup(code rune) []uint16 { + groups := names.groups + groupMSB := uint16(code >> groupShift) + + start := 0 + groupCount := int(groups[0]) + limit := groupCount + groups = groups[1:] + + for start < limit-1 { + number := (start + limit) / 2 + if groupMSB < groups[number*groupLength+groupMsb] { + limit = number + } else { + start = number + } + } + + return groups[start*groupLength : (groupCount-start)*groupLength] +} + +func (names *unames) getGroupOffset(group []uint16) uint32 { + return (uint32(group[groupOffsetHigh]) << 16) | uint32(group[groupOffsetLow]) +} + +func (names *unames) enumGroupNames(group []uint16, start, end rune, otherName string, choice NameChoice) rune { + var offsets [linesPerGroup + 2]uint16 + var lengths [linesPerGroup + 2]uint16 + + s := names.getGroupName(group) + s = expandGroupLengths(s, offsets[:0], lengths[:0]) + + for start < end { + name := s[offsets[start&groupMask]:] + nameLen := lengths[start&groupMask] + if names.compareName(name[:nameLen], choice, otherName) { + return start + } + start++ + } + return -1 +} + +func expandGroupLengths(s []uint8, offsets []uint16, lengths []uint16) []uint8 { + /* read the lengths of the 32 strings in this group and get each string's offset */ + var i, offset, length uint16 + var lengthByte uint8 + + /* all 32 lengths must be read to get the offset of the first group string */ + for i < linesPerGroup { + lengthByte = s[0] + s = s[1:] + + /* read even nibble - MSBs of lengthByte */ + if length >= 12 { + /* double-nibble length spread across two bytes */ + length = ((length&0x3)<<4 | uint16(lengthByte)>>4) + 12 + lengthByte &= 0xf + } else if (lengthByte /* &0xf0 */) >= 0xc0 { + /* double-nibble length spread across this one byte */ + length = (uint16(lengthByte) & 0x3f) + 12 + } else { + /* single-nibble length in MSBs */ + length = uint16(lengthByte) >> 4 + lengthByte &= 0xf + } + + offsets = append(offsets, offset) + lengths = append(lengths, length) + + offset += length + i++ + + /* read odd nibble - LSBs of lengthByte */ + if (lengthByte & 0xf0) == 0 { + /* this nibble was not consumed for a double-nibble length above */ + length = uint16(lengthByte) + if length < 12 { + /* single-nibble length in LSBs */ + offsets = append(offsets, offset) + lengths = append(lengths, length) + + offset += length + i++ + } + } else { + length = 0 /* prevent double-nibble detection in the next iteration */ + } + } + + /* now, s is at the first group string */ + return s +} + +func (names *unames) compareName(name []byte, choice NameChoice, otherName string) bool { + tokens := names.tokens + + tokenCount := tokens[0] + tokens = tokens[1:] + + otherNameLen := len(otherName) + + for len(name) > 0 && len(otherName) > 0 { + c := name[0] + name = name[1:] + + if uint16(c) >= tokenCount { + if c != ';' { + if c != otherName[0] { + return false + } + otherName = otherName[1:] + } else { + break + } + } else { + token := tokens[c] + if int16(token) == -2 { + token = tokens[int(c)<<8|int(name[0])] + name = name[1:] + } + if int16(token) == -1 { + if c != ';' { + if c != otherName[0] { + return false + } + otherName = otherName[1:] + } else { + if len(otherName) == otherNameLen && choice == ExtendedCharName { + if ';' >= tokenCount || int16(tokens[';']) == -1 { + continue + } + } + break + } + } else { + tokenString := names.tokenStrings[token:] + for tokenString[0] != 0 && len(otherName) > 0 { + if tokenString[0] != otherName[0] { + return false + } + tokenString = tokenString[1:] + otherName = otherName[1:] + } + } + } + } + + return len(otherName) == 0 +} diff --git a/go/mysql/icuregex/internal/unames/unames_test.go b/go/mysql/icuregex/internal/unames/unames_test.go new file mode 100644 index 00000000000..f15353eef8d --- /dev/null +++ b/go/mysql/icuregex/internal/unames/unames_test.go @@ -0,0 +1,64 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package unames + +import ( + "testing" +) + +func TestCharForName(t *testing.T) { + var TestNames = []struct { + code rune + name, oldName, extName string + }{ + {0x0061, "LATIN SMALL LETTER A", "", "LATIN SMALL LETTER A"}, + {0x01a2, "LATIN CAPITAL LETTER OI", "", "LATIN CAPITAL LETTER OI"}, + {0x0284, "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK", "", "LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK"}, + {0x0fd0, "TIBETAN MARK BSKA- SHOG GI MGO RGYAN", "", "TIBETAN MARK BSKA- SHOG GI MGO RGYAN"}, + {0x3401, "CJK UNIFIED IDEOGRAPH-3401", "", "CJK UNIFIED IDEOGRAPH-3401"}, + {0x7fed, "CJK UNIFIED IDEOGRAPH-7FED", "", "CJK UNIFIED IDEOGRAPH-7FED"}, + {0xac00, "HANGUL SYLLABLE GA", "", "HANGUL SYLLABLE GA"}, + {0xd7a3, "HANGUL SYLLABLE HIH", "", "HANGUL SYLLABLE HIH"}, + {0xd800, "", "", ""}, + {0xdc00, "", "", ""}, + {0xff08, "FULLWIDTH LEFT PARENTHESIS", "", "FULLWIDTH LEFT PARENTHESIS"}, + {0xffe5, "FULLWIDTH YEN SIGN", "", "FULLWIDTH YEN SIGN"}, + {0xffff, "", "", ""}, + {0x1d0c5, "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS", "", "BYZANTINE MUSICAL SYMBOL FHTORA SKLIRON CHROMA VASIS"}, + {0x23456, "CJK UNIFIED IDEOGRAPH-23456", "", "CJK UNIFIED IDEOGRAPH-23456"}, + } + + for _, tn := range TestNames { + if tn.name != "" { + r := CharForName(UnicodeCharName, tn.name) + if r != tn.code { + t.Errorf("CharFromName(U_UNICODE_CHAR_NAME, %q) = '%c' (U+%d), expected %c (U+%d)", tn.name, r, r, tn.code, tn.code) + } + } + if tn.extName != "" { + r := CharForName(ExtendedCharName, tn.extName) + if r != tn.code { + t.Errorf("CharFromName(U_EXTENDED_CHAR_NAME, %q) = '%c' (U+%d), expected %c (U+%d)", tn.extName, r, r, tn.code, tn.code) + } + } + } +} diff --git a/go/mysql/icuregex/internal/uprops/constants.go b/go/mysql/icuregex/internal/uprops/constants.go new file mode 100644 index 00000000000..3cfe250599a --- /dev/null +++ b/go/mysql/icuregex/internal/uprops/constants.go @@ -0,0 +1,613 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uprops + +type Property int32 + +const ( + /* + * Note: UProperty constants are parsed by preparseucd.py. + * It matches lines like + * UCHAR_=, + */ + + /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that + debuggers display UCHAR_ALPHABETIC as the symbolic name for 0, + rather than UCHAR_BINARY_START. Likewise for other *_START + identifiers. */ + + /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha. + Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */ + UCharAlphabetic Property = 0 + /** First constant for binary Unicode properties. @stable ICU 2.1 */ + UCharBinaryStart = UCharAlphabetic + /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */ + UCharASCIIHexDigit Property = 1 + /** Binary property Bidi_Control. + Format controls which have specific functions + in the Bidi Algorithm. @stable ICU 2.1 */ + UCharBidiControl Property = 2 + /** Binary property Bidi_Mirrored. + Characters that may change display in RTL text. + Same as u_isMirrored. + See Bidi Algorithm, UTR 9. @stable ICU 2.1 */ + UCharBidiMirrored Property = 3 + /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */ + UCharDash Property = 4 + /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2). + Ignorable in most processing. + <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */ + UCharDefaultIgnorableCodePoint Property = 5 + /** Binary property Deprecated (new in Unicode 3.2). + The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */ + UCharDeprecated Property = 6 + /** Binary property Diacritic. Characters that linguistically modify + the meaning of another character to which they apply. @stable ICU 2.1 */ + UCharDiacritic Property = 7 + /** Binary property Extender. + Extend the value or shape of a preceding alphabetic character, + e.g., length and iteration marks. @stable ICU 2.1 */ + UCharExtender Property = 8 + /** Binary property Full_Composition_Exclusion. + CompositionExclusions.txt+Singleton Decompositions+ + Non-Starter Decompositions. @stable ICU 2.1 */ + UCharFullCompositionExclusion Property = 9 + /** Binary property Grapheme_Base (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */ + UCharGraphemeBase Property = 10 + /** Binary property Grapheme_Extend (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. + Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */ + UCharGraphemeExtend Property = 11 + /** Binary property Grapheme_Link (new in Unicode 3.2). + For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */ + UCharGraphemeLink Property = 12 + /** Binary property Hex_Digit. + Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */ + UCharHexDigit Property = 13 + /** Binary property Hyphen. Dashes used to mark connections + between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */ + UCharHyphen Property = 14 + /** Binary property ID_Continue. + Characters that can continue an identifier. + DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out." + ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */ + UCharIDContinue Property = 15 + /** Binary property ID_Start. + Characters that can start an identifier. + Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */ + UCharIDStart Property = 16 + /** Binary property Ideographic. + CJKV ideographs. @stable ICU 2.1 */ + UCharIdeographic Property = 17 + /** Binary property IDS_Binary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCharIdsBinaryOperator Property = 18 + /** Binary property IDS_Trinary_Operator (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCharIdsTrinaryOperator Property = 19 + /** Binary property Join_Control. + Format controls for cursive joining and ligation. @stable ICU 2.1 */ + UCharJoinControl Property = 20 + /** Binary property Logical_Order_Exception (new in Unicode 3.2). + Characters that do not use logical order and + require special handling in most processing. @stable ICU 2.1 */ + UCharLogicalOrderException Property = 21 + /** Binary property Lowercase. Same as u_isULowercase, different from u_islower. + Ll+Other_Lowercase @stable ICU 2.1 */ + UCharLowercase Property = 22 + /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */ + UCharMath Property = 23 + /** Binary property Noncharacter_Code_Point. + Code points that are explicitly defined as illegal + for the encoding of characters. @stable ICU 2.1 */ + UCharNoncharacterCodePoint Property = 24 + /** Binary property Quotation_Mark. @stable ICU 2.1 */ + UCharQuotationMark Property = 25 + /** Binary property Radical (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCharRadical Property = 26 + /** Binary property Soft_Dotted (new in Unicode 3.2). + Characters with a "soft dot", like i or j. + An accent placed on these characters causes + the dot to disappear. @stable ICU 2.1 */ + UCharSoftDotted Property = 27 + /** Binary property Terminal_Punctuation. + Punctuation characters that generally mark + the end of textual units. @stable ICU 2.1 */ + UCharTerminalPunctuation Property = 28 + /** Binary property Unified_Ideograph (new in Unicode 3.2). + For programmatic determination of + Ideographic Description Sequences. @stable ICU 2.1 */ + UCharUnifiedIdeograph Property = 29 + /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper. + Lu+Other_Uppercase @stable ICU 2.1 */ + UCharUppercase Property = 30 + /** Binary property White_Space. + Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace. + Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */ + UCharWhiteSpace Property = 31 + /** Binary property XID_Continue. + ID_Continue modified to allow closure under + normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCharXidContinue Property = 32 + /** Binary property XID_Start. ID_Start modified to allow + closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */ + UCharXidStart Property = 33 + /** Binary property Case_Sensitive. Either the source of a case + mapping or _in_ the target of a case mapping. Not the same as + the general category Cased_Letter. @stable ICU 2.6 */ + UCharCaseSensitive Property = 34 + /** Binary property STerm (new in Unicode 4.0.1). + Sentence Terminal. Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + @stable ICU 3.0 */ + UCharSTerm Property = 35 + /** Binary property Variation_Selector (new in Unicode 4.0.1). + Indicates all those characters that qualify as Variation Selectors. + For details on the behavior of these characters, + see StandardizedVariants.html and 15.6 Variation Selectors. + @stable ICU 3.0 */ + UCharVariationSelector Property = 36 + /** Binary property NFD_Inert. + ICU-specific property for characters that are inert under NFD, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCharNfdInert Property = 37 + /** Binary property NFKD_Inert. + ICU-specific property for characters that are inert under NFKD, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCharNfkdInert Property = 38 + /** Binary property NFC_Inert. + ICU-specific property for characters that are inert under NFC, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCharNfcInert Property = 39 + /** Binary property NFKC_Inert. + ICU-specific property for characters that are inert under NFKC, + i.e., they do not interact with adjacent characters. + See the documentation for the Normalizer2 class and the + Normalizer2::isInert() method. + @stable ICU 3.0 */ + UCharNfkcInert Property = 40 + /** Binary Property Segment_Starter. + ICU-specific property for characters that are starters in terms of + Unicode normalization and combining character sequences. + They have ccc=0 and do not occur in non-initial position of the + canonical decomposition of any character + (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)). + ICU uses this property for segmenting a string for generating a set of + canonically equivalent strings, e.g. for canonical closure while + processing collation tailoring rules. + @stable ICU 3.0 */ + UCharSegmentStarter Property = 41 + /** Binary property Pattern_Syntax (new in Unicode 4.1). + See UAX #31 Identifier and Pattern Syntax + (http://www.unicode.org/reports/tr31/) + @stable ICU 3.4 */ + UCharPatternSyntax Property = 42 + /** Binary property Pattern_White_Space (new in Unicode 4.1). + See UAX #31 Identifier and Pattern Syntax + (http://www.unicode.org/reports/tr31/) + @stable ICU 3.4 */ + UCharPatternWhiteSpace Property = 43 + /** Binary property alnum (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCharPosixAlnum Property = 44 + /** Binary property blank (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCharPosixBlank Property = 45 + /** Binary property graph (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCharPosixGraph Property = 46 + /** Binary property print (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCharPosixPrint Property = 47 + /** Binary property xdigit (a C/POSIX character class). + Implemented according to the UTS #18 Annex C Standard Recommendation. + See the uchar.h file documentation. + @stable ICU 3.4 */ + UCharPosixXdigit Property = 48 + /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */ + UCharCased Property = 49 + /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */ + UCharCaseIgnorable Property = 50 + /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */ + UCharChangesWhenLowercased Property = 51 + /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */ + UCharChangesWhenUppercased Property = 52 + /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */ + UCharChangesWhenTitlecased Property = 53 + /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */ + UCharChangesWhenCasefolded Property = 54 + /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */ + UCharChangesWhenCasemapped Property = 55 + /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */ + UCharChangesWhenNfkcCasefolded Property = 56 + /** + * Binary property Emoji. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCharEmoji Property = 57 + /** + * Binary property Emoji_Presentation. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCharEmojiPresentation Property = 58 + /** + * Binary property Emoji_Modifier. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCharEmojiModifier Property = 59 + /** + * Binary property Emoji_Modifier_Base. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 57 + */ + UCharEmojiModifierBase Property = 60 + /** + * Binary property Emoji_Component. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 60 + */ + UCharEmojiComponent Property = 61 + /** + * Binary property Regional_Indicator. + * @stable ICU 60 + */ + UCharRegionalIndicator Property = 62 + /** + * Binary property Prepended_Concatenation_Mark. + * @stable ICU 60 + */ + UCharPrependedConcatenationMark Property = 63 + /** + * Binary property Extended_Pictographic. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 62 + */ + UCharExtendedPictographic Property = 64 + + /** Enumerated property Bidi_Class. + Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */ + UCharBidiClass Property = 0x1000 + /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */ + UCharIntStart = UCharBidiClass + /** Enumerated property Block. + Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */ + UCharBlock Property = 0x1001 + /** Enumerated property Canonical_Combining_Class. + Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */ + UCharCanonicalCombiningClass Property = 0x1002 + /** Enumerated property Decomposition_Type. + Returns UDecompositionType values. @stable ICU 2.2 */ + UCharDecompositionType Property = 0x1003 + /** Enumerated property East_Asian_Width. + See http://www.unicode.org/reports/tr11/ + Returns UEastAsianWidth values. @stable ICU 2.2 */ + UCharEastAsianWidth Property = 0x1004 + /** Enumerated property General_Category. + Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */ + UCharGeneralCategory Property = 0x1005 + /** Enumerated property Joining_Group. + Returns UJoiningGroup values. @stable ICU 2.2 */ + UCharJoiningGroup Property = 0x1006 + /** Enumerated property Joining_Type. + Returns UJoiningType values. @stable ICU 2.2 */ + UCharJoiningType Property = 0x1007 + /** Enumerated property Line_Break. + Returns ULineBreak values. @stable ICU 2.2 */ + UCharLineBreak Property = 0x1008 + /** Enumerated property Numeric_Type. + Returns UNumericType values. @stable ICU 2.2 */ + UCharNumericType Property = 0x1009 + /** Enumerated property Script. + Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */ + UCharScript Property = 0x100A + /** Enumerated property Hangul_Syllable_Type, new in Unicode 4. + Returns UHangulSyllableType values. @stable ICU 2.6 */ + UCharHangulSyllableType Property = 0x100B + /** Enumerated property NFD_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCharNfdQuickCheck Property = 0x100C + /** Enumerated property NFKD_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCharNfkdQuickCheck Property = 0x100D + /** Enumerated property NFC_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCharNfcQuickCheck Property = 0x100E + /** Enumerated property NFKC_Quick_Check. + Returns UNormalizationCheckResult values. @stable ICU 3.0 */ + UCharNfkcQuickCheck Property = 0x100F + /** Enumerated property Lead_Canonical_Combining_Class. + ICU-specific property for the ccc of the first code point + of the decomposition, or lccc(c)=ccc(NFD(c)[0]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ + UCharLeadCanonicalCombiningClass Property = 0x1010 + /** Enumerated property Trail_Canonical_Combining_Class. + ICU-specific property for the ccc of the last code point + of the decomposition, or tccc(c)=ccc(NFD(c)[last]). + Useful for checking for canonically ordered text; + see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD . + Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */ + UCharTrailCanonicalCombiningClass Property = 0x1011 + /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns UGraphemeClusterBreak values. @stable ICU 3.4 */ + UCharGraphemeClusterBreak Property = 0x1012 + /** Enumerated property Sentence_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns USentenceBreak values. @stable ICU 3.4 */ + UCharSentenceBreak Property = 0x1013 + /** Enumerated property Word_Break (new in Unicode 4.1). + Used in UAX #29: Text Boundaries + (http://www.unicode.org/reports/tr29/) + Returns UWordBreakValues values. @stable ICU 3.4 */ + UCharWordBreak Property = 0x1014 + /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). + Used in UAX #9: Unicode Bidirectional Algorithm + (http://www.unicode.org/reports/tr9/) + Returns UBidiPairedBracketType values. @stable ICU 52 */ + UCharBidiPairedBracketType Property = 0x1015 + /** + * Enumerated property Indic_Positional_Category. + * New in Unicode 6.0 as provisional property Indic_Matra_Category; + * renamed and changed to informative in Unicode 8.0. + * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt + * @stable ICU 63 + */ + UCharIndicPositionalCategory Property = 0x1016 + /** + * Enumerated property Indic_Syllabic_Category. + * New in Unicode 6.0 as provisional; informative since Unicode 8.0. + * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt + * @stable ICU 63 + */ + UCharIndicSyllableCategory Property = 0x1017 + /** + * Enumerated property Vertical_Orientation. + * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). + * New as a UCD property in Unicode 10.0. + * @stable ICU 63 + */ + UCharVerticalOrientation Property = 0x1018 + + /** Bitmask property General_Category_Mask. + This is the General_Category property returned as a bit mask. + When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)), + returns bit masks for UCharCategory values where exactly one bit is set. + When used with u_getPropertyValueName() and u_getPropertyValueEnum(), + a multi-bit mask is used for sets of categories like "Letters". + Mask values should be cast to uint32_t. + @stable ICU 2.4 */ + UCharGeneralCategoryMask Property = 0x2000 + /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */ + UCharMaskStart = UCharGeneralCategoryMask + /** Double property Numeric_Value. + Corresponds to u_getNumericValue. @stable ICU 2.4 */ + UCharNumericValue Property = 0x3000 + /** First constant for double Unicode properties. @stable ICU 2.4 */ + UCharDoubleStart = UCharNumericValue + /** String property Age. + Corresponds to u_charAge. @stable ICU 2.4 */ + UCharAge Property = 0x4000 + /** First constant for string Unicode properties. @stable ICU 2.4 */ + UCharStringStart = UCharAge + /** String property Bidi_Mirroring_Glyph. + Corresponds to u_charMirror. @stable ICU 2.4 */ + UCharBidiMirroringGlyph Property = 0x4001 + /** String property Case_Folding. + Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */ + UCharCaseFolding Property = 0x4002 + /** String property Lowercase_Mapping. + Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */ + UCharLowercaseMapping Property = 0x4004 + /** String property Name. + Corresponds to u_charName. @stable ICU 2.4 */ + UCharName Property = 0x4005 + /** String property Simple_Case_Folding. + Corresponds to u_foldCase. @stable ICU 2.4 */ + UCharSimpleCaseFolding Property = 0x4006 + /** String property Simple_Lowercase_Mapping. + Corresponds to u_tolower. @stable ICU 2.4 */ + UCharSimpleLowercaseMapping Property = 0x4007 + /** String property Simple_Titlecase_Mapping. + Corresponds to u_totitle. @stable ICU 2.4 */ + UcharSimpleTitlecaseMapping Property = 0x4008 + /** String property Simple_Uppercase_Mapping. + Corresponds to u_toupper. @stable ICU 2.4 */ + UCharSimpleUppercaseMapping Property = 0x4009 + /** String property Titlecase_Mapping. + Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */ + UCharTitlecaseMapping Property = 0x400A + /** String property Uppercase_Mapping. + Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */ + UCharUppercaseMapping Property = 0x400C + /** String property Bidi_Paired_Bracket (new in Unicode 6.3). + Corresponds to u_getBidiPairedBracket. @stable ICU 52 */ + UCharBidiPairedBracket Property = 0x400D + + /** Miscellaneous property Script_Extensions (new in Unicode 6.0). + Some characters are commonly used in multiple scripts. + For more information, see UAX #24: http://www.unicode.org/reports/tr24/. + Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h. + @stable ICU 4.6 */ + UCharScriptExtensions Property = 0x7000 + /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */ + UCharOtherPropertyStart = UCharScriptExtensions + + /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */ + UCharInvalidCode Property = -1 +) + +const ( + uCharBinaryLimit = 65 + uCharIntLimit = 0x1019 + uCharMaskLimit = 0x2001 + uCharStringLimit = 0x400E +) + +/* + * Properties in vector word 1 + * Each bit encodes one binary property. + * The following constants represent the bit number, use 1<= 0 { + set.AddRuneRange(startHasProperty, c-1) + startHasProperty = -1 + } + } + } + if startHasProperty >= 0 { + set.AddRuneRange(startHasProperty, uset.MaxValue) + } + + inclusionsForProperty[prop] = set + return set, nil +} + +func getInclusionsForIntProperty(prop Property) (*uset.UnicodeSet, error) { + if inc, ok := inclusionsForProperty[prop]; ok { + return inc, nil + } + + src := prop.source() + incl, err := getInclusionsForSource(src) + if err != nil { + return nil, err + } + + intPropIncl := uset.New() + intPropIncl.AddRune(0) + + numRanges := incl.RangeCount() + prevValue := int32(0) + + for i := 0; i < numRanges; i++ { + rangeEnd := incl.RangeEnd(i) + for c := incl.RangeStart(i); c <= rangeEnd; c++ { + value := getIntPropertyValue(c, prop) + if value != prevValue { + intPropIncl.AddRune(c) + prevValue = value + } + } + } + + inclusionsForProperty[prop] = intPropIncl + return intPropIncl, nil +} + +func ApplyIntPropertyValue(u *uset.UnicodeSet, prop Property, value int32) error { + switch { + case prop == UCharGeneralCategoryMask: + inclusions, err := getInclusionsForProperty(prop) + if err != nil { + return err + } + u.ApplyFilter(inclusions, func(ch rune) bool { + return (uMask(uchar.CharType(ch)) & uint32(value)) != 0 + }) + case prop == UCharScriptExtensions: + inclusions, err := getInclusionsForProperty(prop) + if err != nil { + return err + } + u.ApplyFilter(inclusions, func(ch rune) bool { + return uscriptHasScript(ch, code(value)) + }) + case 0 <= prop && prop < uCharBinaryLimit: + if value == 0 || value == 1 { + set, err := getInclusionsForBinaryProperty(prop) + if err != nil { + return err + } + u.CopyFrom(set) + if value == 0 { + u.Complement() + } + } else { + u.Clear() + } + + case UCharIntStart <= prop && prop < uCharIntLimit: + inclusions, err := getInclusionsForProperty(prop) + if err != nil { + return err + } + u.ApplyFilter(inclusions, func(ch rune) bool { + return getIntPropertyValue(ch, prop) == value + }) + default: + return errors.ErrUnsupported + } + return nil +} + +func mungeCharName(charname string) string { + out := make([]byte, 0, len(charname)) + for _, ch := range []byte(charname) { + j := len(out) + if ch == ' ' && (j == 0 || out[j-1] == ' ') { + continue + } + out = append(out, ch) + } + return string(out) +} + +func ApplyPropertyPattern(u *uset.UnicodeSet, pat string) error { + if len(pat) < 5 { + return errors.ErrIllegalArgument + } + + var posix, isName, invert bool + + if isPOSIXOpen(pat) { + posix = true + pat = pattern.SkipWhitespace(pat[2:]) + if len(pat) > 0 && pat[0] == '^' { + pat = pat[1:] + invert = true + } + } else if isPerlOpen(pat) || isNameOpen(pat) { + c := pat[1] + invert = c == 'P' + isName = c == 'N' + pat = pattern.SkipWhitespace(pat[2:]) + if len(pat) == 0 || pat[0] != '{' { + return errors.ErrIllegalArgument + } + pat = pat[1:] + } else { + return errors.ErrIllegalArgument + } + + var closePos int + if posix { + closePos = strings.Index(pat, ":]") + } else { + closePos = strings.IndexByte(pat, '}') + } + if closePos < 0 { + return errors.ErrIllegalArgument + } + + equals := strings.IndexByte(pat, '=') + var propName, valueName string + if equals >= 0 && equals < closePos && !isName { + propName = pat[:equals] + valueName = pat[equals+1 : closePos] + } else { + propName = pat[:closePos] + if isName { + valueName = propName + propName = "na" + } + } + + if err := ApplyPropertyAlias(u, propName, valueName); err != nil { + return err + } + if invert { + u.Complement() + } + return nil +} + +func isPOSIXOpen(pattern string) bool { + return pattern[0] == '[' && pattern[1] == ':' +} + +func isNameOpen(pattern string) bool { + return pattern[0] == '\\' && pattern[1] == 'N' +} + +func isPerlOpen(pattern string) bool { + return pattern[0] == '\\' && (pattern[1] == 'p' || pattern[1] == 'P') +} + +func ApplyPropertyAlias(u *uset.UnicodeSet, prop, value string) error { + var p Property + var v int32 + var invert bool + + if len(value) > 0 { + p = getPropertyEnum(prop) + if p == -1 { + return errors.ErrIllegalArgument + } + if p == UCharGeneralCategory { + p = UCharGeneralCategoryMask + } + + if (p >= UCharBinaryStart && p < uCharBinaryLimit) || + (p >= UCharIntStart && p < uCharIntLimit) || + (p >= UCharMaskStart && p < uCharMaskLimit) { + v = getPropertyValueEnum(p, value) + if v == -1 { + // Handle numeric CCC + if p == UCharCanonicalCombiningClass || + p == UCharTrailCanonicalCombiningClass || + p == UCharLeadCanonicalCombiningClass { + val, err := strconv.ParseUint(value, 10, 8) + if err != nil { + return errors.ErrIllegalArgument + } + v = int32(val) + } else { + return errors.ErrIllegalArgument + } + } + } else { + switch p { + case UCharNumericValue: + val, err := strconv.ParseFloat(value, 64) + if err != nil { + return errors.ErrIllegalArgument + } + incl, err := getInclusionsForProperty(p) + if err != nil { + return err + } + u.ApplyFilter(incl, func(ch rune) bool { + return uchar.NumericValue(ch) == val + }) + return nil + case UCharName: + // Must munge name, since u_charFromName() does not do + // 'loose' matching. + charName := mungeCharName(value) + ch := unames.CharForName(unames.ExtendedCharName, charName) + if ch < 0 { + return errors.ErrIllegalArgument + } + u.Clear() + u.AddRune(ch) + return nil + case UCharAge: + // Must munge name, since u_versionFromString() does not do + // 'loose' matching. + charName := mungeCharName(value) + version := uchar.VersionFromString(charName) + incl, err := getInclusionsForProperty(p) + if err != nil { + return err + } + u.ApplyFilter(incl, func(ch rune) bool { + return uchar.CharAge(ch) == version + }) + return nil + case UCharScriptExtensions: + v = getPropertyValueEnum(UCharScript, value) + if v == -1 { + return errors.ErrIllegalArgument + } + default: + // p is a non-binary, non-enumerated property that we + // don't support (yet). + return errors.ErrIllegalArgument + } + } + } else { + // value is empty. Interpret as General Category, Script, or + // Binary property. + p = UCharGeneralCategoryMask + v = getPropertyValueEnum(p, prop) + if v == -1 { + p = UCharScript + v = getPropertyValueEnum(p, prop) + if v == -1 { + p = getPropertyEnum(prop) + if p >= UCharBinaryStart && p < uCharBinaryLimit { + v = 1 + } else if 0 == comparePropertyNames("ANY", prop) { + u.Clear() + u.AddRuneRange(uset.MinValue, uset.MaxValue) + return nil + } else if 0 == comparePropertyNames("ASCII", prop) { + u.Clear() + u.AddRuneRange(0, 0x7F) + return nil + } else if 0 == comparePropertyNames("Assigned", prop) { + // [:Assigned:]=[:^Cn:] + p = UCharGeneralCategoryMask + v = int32(uchar.GcCnMask) + invert = true + } else { + return errors.ErrIllegalArgument + } + } + } + } + + err := ApplyIntPropertyValue(u, p, v) + if err != nil { + return err + } + if invert { + u.Complement() + } + return nil +} + +func AddULayoutPropertyStarts(src propertySource, u *uset.UnicodeSet) { + var trie *utrie.UcpTrie + switch src { + case srcInpc: + trie = ulayout.InpcTrie() + case srcInsc: + trie = ulayout.InscTrie() + case srcVo: + trie = ulayout.VoTrie() + default: + panic("unreachable") + } + + // Add the start code point of each same-value range of the trie. + var start, end rune + for { + end, _ = trie.GetRange(start, utrie.UcpMapRangeNormal, 0, nil) + if end < 0 { + break + } + u.AddRune(start) + start = end + 1 + } +} + +func AddCategory(u *uset.UnicodeSet, mask uint32) error { + set := uset.New() + err := ApplyIntPropertyValue(set, UCharGeneralCategoryMask, int32(mask)) + if err != nil { + return err + } + u.AddAll(set) + return nil +} + +func NewUnicodeSetFomPattern(pattern string, flags uset.USet) (*uset.UnicodeSet, error) { + u := uset.New() + if err := ApplyPropertyPattern(u, pattern); err != nil { + return nil, err + } + if flags&uset.CaseInsensitive != 0 { + u.CloseOver(uset.CaseInsensitive) + } + return u, nil +} + +func MustNewUnicodeSetFomPattern(pattern string, flags uset.USet) *uset.UnicodeSet { + u, err := NewUnicodeSetFomPattern(pattern, flags) + if err != nil { + panic(err) + } + return u +} diff --git a/go/mysql/icuregex/internal/uprops/uprops.go b/go/mysql/icuregex/internal/uprops/uprops.go new file mode 100644 index 00000000000..ddf0989b5d8 --- /dev/null +++ b/go/mysql/icuregex/internal/uprops/uprops.go @@ -0,0 +1,269 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uprops + +import ( + "fmt" + + "vitess.io/vitess/go/mysql/icuregex/internal/bytestrie" + "vitess.io/vitess/go/mysql/icuregex/internal/icudata" + "vitess.io/vitess/go/mysql/icuregex/internal/uchar" + "vitess.io/vitess/go/mysql/icuregex/internal/udata" +) + +var pnames struct { + valueMaps []uint32 + byteTrie []uint8 +} + +const ( + ixValueMapsOffset = 0 + ixByteTriesOffset = 1 + ixNameGroupsOffset = 2 + ixReserved3Offset = 3 +) + +func readData(bytes *udata.Bytes) error { + err := bytes.ReadHeader(func(info *udata.DataInfo) bool { + return info.DataFormat[0] == 0x70 && + info.DataFormat[1] == 0x6e && + info.DataFormat[2] == 0x61 && + info.DataFormat[3] == 0x6d && + info.FormatVersion[0] == 2 + }) + if err != nil { + return err + } + + count := bytes.Int32() / 4 + if count < 8 { + return fmt.Errorf("indexes[0] too small in ucase.icu") + } + + indexes := make([]int32, count) + indexes[0] = count * 4 + + for i := int32(1); i < count; i++ { + indexes[i] = bytes.Int32() + } + + offset := indexes[ixValueMapsOffset] + nextOffset := indexes[ixByteTriesOffset] + numInts := (nextOffset - offset) / 4 + + pnames.valueMaps = bytes.Uint32Slice(numInts) + + offset = nextOffset + nextOffset = indexes[ixNameGroupsOffset] + numBytes := nextOffset - offset + + pnames.byteTrie = bytes.Uint8Slice(numBytes) + return nil +} + +func init() { + b := udata.NewBytes(icudata.PNames) + if err := readData(b); err != nil { + panic(err) + } +} + +func (prop Property) source() propertySource { + if prop < UCharBinaryStart { + return srcNone /* undefined */ + } else if prop < uCharBinaryLimit { + bprop := binProps[prop] + if bprop.mask != 0 { + return srcPropsvec + } + return bprop.column + } else if prop < UCharIntStart { + return srcNone /* undefined */ + } else if prop < uCharIntLimit { + iprop := intProps[prop-UCharIntStart] + if iprop.mask != 0 { + return srcPropsvec + } + return iprop.column + } else if prop < UCharStringStart { + switch prop { + case UCharGeneralCategoryMask, + UCharNumericValue: + return srcChar + + default: + return srcNone + } + } else if prop < uCharStringLimit { + switch prop { + case UCharAge: + return srcPropsvec + + case UCharBidiMirroringGlyph: + return srcBidi + + case UCharCaseFolding, + UCharLowercaseMapping, + UCharSimpleCaseFolding, + UCharSimpleLowercaseMapping, + UcharSimpleTitlecaseMapping, + UCharSimpleUppercaseMapping, + UCharTitlecaseMapping, + UCharUppercaseMapping: + return srcCase + + /* UCHAR_ISO_COMMENT, UCHAR_UNICODE_1_NAME (deprecated) */ + case UCharName: + return srcNames + + default: + return srcNone + } + } else { + switch prop { + case UCharScriptExtensions: + return srcPropsvec + default: + return srcNone /* undefined */ + } + } +} + +func getPropertyEnum(alias string) Property { + return Property(getPropertyOrValueEnum(0, alias)) +} + +func getPropertyValueEnum(prop Property, alias string) int32 { + valueMapIdx := findProperty(prop) + if valueMapIdx == 0 { + return -1 + } + + valueMapIdx = int32(pnames.valueMaps[valueMapIdx+1]) + if valueMapIdx == 0 { + return -1 + } + // valueMapIndex is the start of the property's valueMap, + // where the first word is the BytesTrie offset. + return getPropertyOrValueEnum(int32(pnames.valueMaps[valueMapIdx]), alias) +} + +func findProperty(prop Property) int32 { + var i = int32(1) + for numRanges := int32(pnames.valueMaps[0]); numRanges > 0; numRanges-- { + start := int32(pnames.valueMaps[i]) + limit := int32(pnames.valueMaps[i+1]) + i += 2 + if int32(prop) < start { + break + } + if int32(prop) < limit { + return i + (int32(prop)-start)*2 + } + i += (limit - start) * 2 + } + return 0 +} + +func getPropertyOrValueEnum(offset int32, alias string) int32 { + trie := bytestrie.New(pnames.byteTrie[offset:]) + if trie.ContainsName(alias) { + return trie.GetValue() + } + return -1 +} + +func comparePropertyNames(name1, name2 string) int { + next := func(s string) (byte, string) { + for len(s) > 0 && (s[0] == 0x2d || s[0] == 0x5f || s[0] == 0x20 || (0x09 <= s[0] && s[0] <= 0x0d)) { + s = s[1:] + } + if len(s) == 0 { + return 0, "" + } + c := s[0] + s = s[1:] + if 'A' <= c && c <= 'Z' { + c += 'a' - 'A' + } + return c, s + } + + var r1, r2 byte + for { + r1, name1 = next(name1) + r2, name2 = next(name2) + + if r1 == 0 && r2 == 0 { + return 0 + } + + /* Compare the lowercased characters */ + if r1 != r2 { + return int(r1) - int(r2) + } + } +} + +func getIntPropertyValue(c rune, which Property) int32 { + if which < UCharIntStart { + if UCharBinaryStart <= which && which < uCharBinaryLimit { + prop := binProps[which] + if prop.contains == nil { + return 0 + } + if prop.contains(prop, c, which) { + return 1 + } + return 0 + } + } else if which < uCharIntLimit { + iprop := intProps[which-UCharIntStart] + return iprop.getValue(iprop, c, which) + } else if which == UCharGeneralCategoryMask { + return int32(uMask(uchar.CharType(c))) + } + return 0 // undefined +} + +func mergeScriptCodeOrIndex(scriptX uint32) uint32 { + return ((scriptX & scriptHighMask) >> scriptHighShift) | + (scriptX & scriptLowMask) +} + +func script(c rune) int32 { + if c > 0x10ffff { + return -1 + } + scriptX := uchar.GetUnicodeProperties(c, 0) & scriptXMask + codeOrIndex := mergeScriptCodeOrIndex(scriptX) + + if scriptX < scriptXWithCommon { + return int32(codeOrIndex) + } else if scriptX < scriptXWithInherited { + return 0 + } else if scriptX < scriptXWithOther { + return 1 + } else { + return int32(uchar.ScriptExtension(codeOrIndex)) + } +} diff --git a/go/mysql/icuregex/internal/uprops/uprops_binary.go b/go/mysql/icuregex/internal/uprops/uprops_binary.go new file mode 100644 index 00000000000..855da92b3b6 --- /dev/null +++ b/go/mysql/icuregex/internal/uprops/uprops_binary.go @@ -0,0 +1,239 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uprops + +import ( + "golang.org/x/exp/constraints" + "golang.org/x/exp/slices" + + "vitess.io/vitess/go/mysql/icuregex/internal/normalizer" + "vitess.io/vitess/go/mysql/icuregex/internal/ubidi" + "vitess.io/vitess/go/mysql/icuregex/internal/ucase" + "vitess.io/vitess/go/mysql/icuregex/internal/uchar" +) + +type binaryProperty struct { + column propertySource + mask uint32 + contains func(prop *binaryProperty, c rune, which Property) bool +} + +func uMask[T constraints.Integer](x T) uint32 { + return 1 << x +} + +func defaultContains(prop *binaryProperty, c rune, _ Property) bool { + return (uchar.GetUnicodeProperties(c, int(prop.column)) & prop.mask) != 0 +} + +var binProps = [uCharBinaryLimit]*binaryProperty{ + /* + * column and mask values for binary properties from u_getUnicodeProperties(). + * Must be in order of corresponding UProperty, + * and there must be exactly one entry per binary UProperty. + * + * Properties with mask==0 are handled in code. + * For them, column is the UPropertySource value. + * + * See also https://unicode-org.github.io/icu/userguide/strings/properties.html + */ + {1, uMask(pAlphabetic), defaultContains}, + {1, uMask(pASCIIHexDigit), defaultContains}, + {srcBidi, 0, isBidiControl}, + {srcBidi, 0, isMirrored}, + {1, uMask(pDash), defaultContains}, + {1, uMask(pDefaultIgnorableCodePoint), defaultContains}, + {1, uMask(pDeprecated), defaultContains}, + {1, uMask(pDiacritic), defaultContains}, + {1, uMask(pExtender), defaultContains}, + {srcNfc, 0, hasFullCompositionExclusion}, + {1, uMask(pGraphemeBase), defaultContains}, + {1, uMask(pGraphemeExtend), defaultContains}, + {1, uMask(pGraphemeLink), defaultContains}, + {1, uMask(pHexDigit), defaultContains}, + {1, uMask(pHyphen), defaultContains}, + {1, uMask(pIDContinue), defaultContains}, + {1, uMask(pIDStart), defaultContains}, + {1, uMask(pIdeographic), defaultContains}, + {1, uMask(pIdsBinaryOperator), defaultContains}, + {1, uMask(pIdsTrinaryOperator), defaultContains}, + {srcBidi, 0, isJoinControl}, + {1, uMask(pLogicalOrderException), defaultContains}, + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_LOWERCASE + {1, uMask(pMath), defaultContains}, + {1, uMask(pNoncharacterCodePoint), defaultContains}, + {1, uMask(pQuotationMark), defaultContains}, + {1, uMask(pRadical), defaultContains}, + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_SOFT_DOTTED + {1, uMask(pTerminalPunctuation), defaultContains}, + {1, uMask(pUnifiedIdeograph), defaultContains}, + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_UPPERCASE + {1, uMask(pWhiteSpace), defaultContains}, + {1, uMask(pXidContinue), defaultContains}, + {1, uMask(pXidStart), defaultContains}, + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CASE_SENSITIVE + {1, uMask(pSTerm), defaultContains}, + {1, uMask(pVariationSelector), defaultContains}, + {srcNfc, 0, isNormInert}, // UCHAR_NFD_INERT + {srcNfkc, 0, isNormInert}, // UCHAR_NFKD_INERT + {srcNfc, 0, isNormInert}, // UCHAR_NFC_INERT + {srcNfkc, 0, isNormInert}, // UCHAR_NFKC_INERT + {srcNfcCanonIter, 0, nil}, // Segment_Starter is currently unsupported + {1, uMask(pPatternSyntax), defaultContains}, + {1, uMask(pPatternWhiteSpace), defaultContains}, + {srcCharAndPropsvec, 0, isPOSIXAlnum}, + {srcChar, 0, isPOSIXBlank}, + {srcChar, 0, isPOSIXGraph}, + {srcChar, 0, isPOSIXPrint}, + {srcChar, 0, isPOSIXXdigit}, + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CASED + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CASE_IGNORABLE + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CHANGES_WHEN_LOWERCASED + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CHANGES_WHEN_UPPERCASED + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CHANGES_WHEN_TITLECASED + {srcCaseAndNorm, 0, changesWhenCasefolded}, + {srcCase, 0, caseBinaryPropertyContains}, // UCHAR_CHANGES_WHEN_CASEMAPPED + {srcNfkcCf, 0, nil}, // Changes_When_NFKC_Casefolded is currently unsupported + {2, uMask(p2Emoji), defaultContains}, + {2, uMask(p2EmojiPresentation), defaultContains}, + {2, uMask(p2EmojiModifier), defaultContains}, + {2, uMask(p2EmojiModifierBase), defaultContains}, + {2, uMask(p2EmojiComponent), defaultContains}, + {2, 0, isRegionalIndicator}, + {1, uMask(pPrependedConcatenationMark), defaultContains}, + {2, uMask(p2ExtendedPictographic), defaultContains}, +} + +func isBidiControl(_ *binaryProperty, c rune, _ Property) bool { + return ubidi.IsBidiControl(c) +} + +func isMirrored(_ *binaryProperty, c rune, _ Property) bool { + return ubidi.IsMirrored(c) +} + +func isRegionalIndicator(_ *binaryProperty, c rune, _ Property) bool { + return 0x1F1E6 <= c && c <= 0x1F1FF +} + +func changesWhenCasefolded(_ *binaryProperty, c rune, _ Property) bool { + if c < 0 { + return false + } + + nfd := normalizer.Nfc().Decompose(c) + if nfd == nil { + nfd = []rune{c} + } + folded := ucase.FoldRunes(nfd) + return !slices.Equal(nfd, folded) +} + +func isPOSIXXdigit(_ *binaryProperty, c rune, _ Property) bool { + return uchar.IsXDigit(c) +} + +func isPOSIXPrint(_ *binaryProperty, c rune, _ Property) bool { + return uchar.IsPOSIXPrint(c) +} + +func isPOSIXGraph(_ *binaryProperty, c rune, _ Property) bool { + return uchar.IsGraphPOSIX(c) +} + +func isPOSIXBlank(_ *binaryProperty, c rune, _ Property) bool { + return uchar.IsBlank(c) +} + +func isPOSIXAlnum(_ *binaryProperty, c rune, _ Property) bool { + return (uchar.GetUnicodeProperties(c, 1)&uMask(pAlphabetic)) != 0 || uchar.IsDigit(c) +} + +func isJoinControl(_ *binaryProperty, c rune, _ Property) bool { + return ubidi.IsJoinControl(c) +} + +func hasFullCompositionExclusion(_ *binaryProperty, c rune, _ Property) bool { + impl := normalizer.Nfc() + return impl.IsCompNo(c) +} + +func caseBinaryPropertyContains(_ *binaryProperty, c rune, which Property) bool { + return HasBinaryPropertyUcase(c, which) +} + +func HasBinaryPropertyUcase(c rune, which Property) bool { + /* case mapping properties */ + switch which { + case UCharLowercase: + return ucase.Lower == ucase.GetType(c) + case UCharUppercase: + return ucase.Upper == ucase.GetType(c) + case UCharSoftDotted: + return ucase.IsSoftDotted(c) + case UCharCaseSensitive: + return ucase.IsCaseSensitive(c) + case UCharCased: + return ucase.None != ucase.GetType(c) + case UCharCaseIgnorable: + return (ucase.GetTypeOrIgnorable(c) >> 2) != 0 + /* + * Note: The following Changes_When_Xyz are defined as testing whether + * the NFD form of the input changes when Xyz-case-mapped. + * However, this simpler implementation of these properties, + * ignoring NFD, passes the tests. + * The implementation needs to be changed if the tests start failing. + * When that happens, optimizations should be used to work with the + * per-single-code point ucase_toFullXyz() functions unless + * the NFD form has more than one code point, + * and the property starts set needs to be the union of the + * start sets for normalization and case mappings. + */ + case UCharChangesWhenLowercased: + return ucase.ToFullLower(c) >= 0 + case UCharChangesWhenUppercased: + return ucase.ToFullUpper(c) >= 0 + case UCharChangesWhenTitlecased: + return ucase.ToFullTitle(c) >= 0 + /* case UCHAR_CHANGES_WHEN_CASEFOLDED: -- in uprops.c */ + case UCharChangesWhenCasemapped: + return ucase.ToFullLower(c) >= 0 || ucase.ToFullUpper(c) >= 0 || ucase.ToFullTitle(c) >= 0 + default: + return false + } +} + +func isNormInert(_ *binaryProperty, c rune, which Property) bool { + mode := normalizer.Mode(int32(which) - int32(UCharNfdInert) + int32(normalizer.NormNfd)) + return normalizer.IsInert(c, mode) +} + +func HasBinaryProperty(c rune, which Property) bool { + if which < UCharBinaryStart || uCharBinaryLimit <= which { + return false + } + prop := binProps[which] + if prop.contains == nil { + return false + } + return prop.contains(prop, c, which) +} diff --git a/go/mysql/icuregex/internal/uprops/uprops_int.go b/go/mysql/icuregex/internal/uprops/uprops_int.go new file mode 100644 index 00000000000..3e62d31184f --- /dev/null +++ b/go/mysql/icuregex/internal/uprops/uprops_int.go @@ -0,0 +1,265 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uprops + +import ( + "vitess.io/vitess/go/mysql/icuregex/internal/normalizer" + "vitess.io/vitess/go/mysql/icuregex/internal/ubidi" + "vitess.io/vitess/go/mysql/icuregex/internal/uchar" + "vitess.io/vitess/go/mysql/icuregex/internal/ulayout" +) + +type intPropertyGetValue func(prop *intProperty, c rune, which Property) int32 + +type intProperty struct { + column propertySource + mask uint32 + shift int32 + getValue intPropertyGetValue +} + +const ( + blockMask = 0x0001ff00 + blockShift = 8 + + eaMask = 0x000e0000 + eaShift = 17 + + lbMask = 0x03f00000 + lbShift = 20 + + sbMask = 0x000f8000 + sbShift = 15 + + wbMask = 0x00007c00 + wbShift = 10 + + gcbMask = 0x000003e0 + gcbShift = 5 + + dtMask = 0x0000001f +) + +type numericType int32 + +/** + * Numeric Type constants. + * + * @see UCHAR_NUMERIC_TYPE + * @stable ICU 2.2 + */ +const ( + /* + * Note: UNumericType constants are parsed by preparseucd.py. + * It matches lines like + * U_NT_ + */ + + ntNone numericType = iota /*[None]*/ + ntDecimal /*[de]*/ + ntDigit /*[di]*/ + ntNumeric /*[nu]*/ + /** + * One more than the highest normal UNumericType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + ntCount +) + +/** + * Hangul Syllable Type constants. + * + * @see UCHAR_HANGUL_SYLLABLE_TYPE + * @stable ICU 2.6 + */ + +type hangunSyllableType int32 + +const ( + /* + * Note: UHangulSyllableType constants are parsed by preparseucd.py. + * It matches lines like + * U_HST_ + */ + + hstNotApplicable hangunSyllableType = iota /*[NA]*/ + hstLeadingJamo /*[L]*/ + hstVowelJamo /*[V]*/ + hstTrailingJamo /*[T]*/ + hstLvSyllable /*[LV]*/ + hstLvtSyllable /*[LVT]*/ + /** + * One more than the highest normal UHangulSyllableType value. + * The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE). + * + * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. + */ + hstCount +) + +var intProps = [uCharIntLimit - UCharIntStart]*intProperty{ + /* + * column, mask and shift values for int-value properties from u_getUnicodeProperties(). + * Must be in order of corresponding UProperty, + * and there must be exactly one entry per int UProperty. + * + * Properties with mask==0 are handled in code. + * For them, column is the UPropertySource value. + */ + {srcBidi, 0, 0, getBiDiClass}, + {0, blockMask, blockShift, defaultGetValue}, + {srcNfc, 0, 0xff, getCombiningClass}, + {2, dtMask, 0, defaultGetValue}, + {0, eaMask, eaShift, defaultGetValue}, + {srcChar, 0, int32(uchar.CharCategoryCount - 1), getGeneralCategory}, + {srcBidi, 0, 0, getJoiningGroup}, + {srcBidi, 0, 0, getJoiningType}, + {2, lbMask, lbShift, defaultGetValue}, + {srcChar, 0, int32(ntCount - 1), getNumericType}, + {srcPropsvec, 0, 0, getScript}, + {srcPropsvec, 0, int32(hstCount - 1), getHangulSyllableType}, + // UCHAR_NFD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" + {srcNfc, 0, int32(normalizer.Yes), getNormQuickCheck}, + // UCHAR_NFKD_QUICK_CHECK: max=1=YES -- never "maybe", only "no" or "yes" + {srcNfkc, 0, int32(normalizer.Yes), getNormQuickCheck}, + // UCHAR_NFC_QUICK_CHECK: max=2=MAYBE + {srcNfc, 0, int32(normalizer.Maybe), getNormQuickCheck}, + // UCHAR_NFKC_QUICK_CHECK: max=2=MAYBE + {srcNfkc, 0, int32(normalizer.Maybe), getNormQuickCheck}, + {srcNfc, 0, 0xff, getLeadCombiningClass}, + {srcNfc, 0, 0xff, getTrailCombiningClass}, + {2, gcbMask, gcbShift, defaultGetValue}, + {2, sbMask, sbShift, defaultGetValue}, + {2, wbMask, wbShift, defaultGetValue}, + {srcBidi, 0, 0, getBiDiPairedBracketType}, + {srcInpc, 0, 0, getInPC}, + {srcInsc, 0, 0, getInSC}, + {srcVo, 0, 0, getVo}, +} + +func getVo(_ *intProperty, c rune, _ Property) int32 { + return int32(ulayout.VoTrie().Get(c)) +} + +func getInSC(_ *intProperty, c rune, _ Property) int32 { + return int32(ulayout.InscTrie().Get(c)) +} + +func getInPC(_ *intProperty, c rune, _ Property) int32 { + return int32(ulayout.InpcTrie().Get(c)) +} + +func getBiDiPairedBracketType(_ *intProperty, c rune, _ Property) int32 { + return int32(ubidi.PairedBracketType(c)) +} + +func getTrailCombiningClass(_ *intProperty, c rune, _ Property) int32 { + return int32(normalizer.Nfc().GetFCD16(c) & 0xff) +} + +func getLeadCombiningClass(_ *intProperty, c rune, _ Property) int32 { + val := int32(normalizer.Nfc().GetFCD16(c) >> 8) + return val +} + +func getNormQuickCheck(_ *intProperty, c rune, which Property) int32 { + return int32(normalizer.QuickCheck(c, normalizer.Mode(int32(which)-int32(UCharNfdQuickCheck)+int32(normalizer.NormNfd)))) +} + +/* + * Map some of the Grapheme Cluster Break values to Hangul Syllable Types. + * Hangul_Syllable_Type is fully redundant with a subset of Grapheme_Cluster_Break. + */ +var gcbToHst = []hangunSyllableType{ + hstNotApplicable, /* U_GCB_OTHER */ + hstNotApplicable, /* U_GCB_CONTROL */ + hstNotApplicable, /* U_GCB_CR */ + hstNotApplicable, /* U_GCB_EXTEND */ + hstLeadingJamo, /* U_GCB_L */ + hstNotApplicable, /* U_GCB_LF */ + hstLvSyllable, /* U_GCB_LV */ + hstLvtSyllable, /* U_GCB_LVT */ + hstTrailingJamo, /* U_GCB_T */ + hstVowelJamo, /* U_GCB_V */ + /* + * Omit GCB values beyond what we need for hst. + * The code below checks for the array length. + */ +} + +func getHangulSyllableType(_ *intProperty, c rune, _ Property) int32 { + /* see comments on gcbToHst[] above */ + gcb := (int32(uchar.GetUnicodeProperties(c, 2)) & gcbMask) >> gcbShift + + if gcb < int32(len(gcbToHst)) { + return int32(gcbToHst[gcb]) + } + return int32(hstNotApplicable) +} + +func getScript(_ *intProperty, c rune, _ Property) int32 { + return script(c) +} + +func getNumericType(_ *intProperty, c rune, _ Property) int32 { + ntv := uchar.NumericTypeValue(c) + return int32(ntvGetType(ntv)) +} + +func getJoiningType(_ *intProperty, c rune, _ Property) int32 { + return int32(ubidi.JoinType(c)) +} + +func getJoiningGroup(_ *intProperty, c rune, _ Property) int32 { + return int32(ubidi.JoinGroup(c)) +} + +func getGeneralCategory(_ *intProperty, c rune, _ Property) int32 { + return int32(uchar.CharType(c)) +} + +func getCombiningClass(_ *intProperty, c rune, _ Property) int32 { + return int32(normalizer.Nfc().CombiningClass(c)) +} + +func defaultGetValue(prop *intProperty, c rune, _ Property) int32 { + return int32(uchar.GetUnicodeProperties(c, int(prop.column))&prop.mask) >> prop.shift +} + +func getBiDiClass(_ *intProperty, c rune, _ Property) int32 { + return int32(ubidi.Class(c)) +} + +func ntvGetType(ntv uint16) numericType { + switch { + case ntv == uchar.UPropsNtvNone: + return ntNone + case ntv < uchar.UPropsNtvDigitStart: + return ntDecimal + case ntv < uchar.UPropsNtvNumericStart: + return ntDigit + default: + return ntNumeric + } +} diff --git a/go/mysql/icuregex/internal/uprops/uscript.go b/go/mysql/icuregex/internal/uprops/uscript.go new file mode 100644 index 00000000000..8a4423849df --- /dev/null +++ b/go/mysql/icuregex/internal/uprops/uscript.go @@ -0,0 +1,505 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uprops + +import "vitess.io/vitess/go/mysql/icuregex/internal/uchar" + +/** + * Constants for ISO 15924 script codes. + * + * The current set of script code constants supports at least all scripts + * that are encoded in the version of Unicode which ICU currently supports. + * The names of the constants are usually derived from the + * Unicode script property value aliases. + * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/) + * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt . + * + * In addition, constants for many ISO 15924 script codes + * are included, for use with language tags, CLDR data, and similar. + * Some of those codes are not used in the Unicode Character Database (UCD). + * For example, there are no characters that have a UCD script property value of + * Hans or Hant. All Han ideographs have the Hani script property value in Unicode. + * + * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR. + * + * Starting with ICU 55, script codes are only added when their scripts + * have been or will certainly be encoded in Unicode, + * and have been assigned Unicode script property value aliases, + * to ensure that their script names are stable and match the names of the constants. + * Script codes like Latf and Aran that are not subject to separate encoding + * may be added at any time. + * + * @stable ICU 2.2 + */ +type code int32 + +/* + * Note: UScriptCode constants and their ISO script code comments + * are parsed by preparseucd.py. + * It matches lines like + * USCRIPT_ = , / * * / + */ + +const ( + /** @stable ICU 2.2 */ + invalidCode code = -1 + /** @stable ICU 2.2 */ + common code = 0 /* Zyyy */ + /** @stable ICU 2.2 */ + inherited code = 1 /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */ + /** @stable ICU 2.2 */ + arabic code = 2 /* Arab */ + /** @stable ICU 2.2 */ + armenian code = 3 /* Armn */ + /** @stable ICU 2.2 */ + bengali code = 4 /* Beng */ + /** @stable ICU 2.2 */ + bopomofo code = 5 /* Bopo */ + /** @stable ICU 2.2 */ + cherokee code = 6 /* Cher */ + /** @stable ICU 2.2 */ + coptic code = 7 /* Copt */ + /** @stable ICU 2.2 */ + cyrillic code = 8 /* Cyrl */ + /** @stable ICU 2.2 */ + deseret code = 9 /* Dsrt */ + /** @stable ICU 2.2 */ + devanagari code = 10 /* Deva */ + /** @stable ICU 2.2 */ + ethiopic code = 11 /* Ethi */ + /** @stable ICU 2.2 */ + georgian code = 12 /* Geor */ + /** @stable ICU 2.2 */ + gothic code = 13 /* Goth */ + /** @stable ICU 2.2 */ + greek code = 14 /* Grek */ + /** @stable ICU 2.2 */ + gujarati code = 15 /* Gujr */ + /** @stable ICU 2.2 */ + gurmukhi code = 16 /* Guru */ + /** @stable ICU 2.2 */ + han code = 17 /* Hani */ + /** @stable ICU 2.2 */ + hangul code = 18 /* Hang */ + /** @stable ICU 2.2 */ + hebrew code = 19 /* Hebr */ + /** @stable ICU 2.2 */ + hiragana code = 20 /* Hira */ + /** @stable ICU 2.2 */ + kannada code = 21 /* Knda */ + /** @stable ICU 2.2 */ + katakana code = 22 /* Kana */ + /** @stable ICU 2.2 */ + khmer code = 23 /* Khmr */ + /** @stable ICU 2.2 */ + lao code = 24 /* Laoo */ + /** @stable ICU 2.2 */ + latin code = 25 /* Latn */ + /** @stable ICU 2.2 */ + malayalam code = 26 /* Mlym */ + /** @stable ICU 2.2 */ + mongolian code = 27 /* Mong */ + /** @stable ICU 2.2 */ + myanmar code = 28 /* Mymr */ + /** @stable ICU 2.2 */ + ogham code = 29 /* Ogam */ + /** @stable ICU 2.2 */ + oldItalic code = 30 /* Ital */ + /** @stable ICU 2.2 */ + oriya code = 31 /* Orya */ + /** @stable ICU 2.2 */ + runic code = 32 /* Runr */ + /** @stable ICU 2.2 */ + sinhala code = 33 /* Sinh */ + /** @stable ICU 2.2 */ + syriac code = 34 /* Syrc */ + /** @stable ICU 2.2 */ + tamil code = 35 /* Taml */ + /** @stable ICU 2.2 */ + telugu code = 36 /* Telu */ + /** @stable ICU 2.2 */ + thaana code = 37 /* Thaa */ + /** @stable ICU 2.2 */ + thai code = 38 /* Thai */ + /** @stable ICU 2.2 */ + tibetan code = 39 /* Tibt */ + /** Canadian_Aboriginal script. @stable ICU 2.6 */ + canadianAboriginal code = 40 /* Cans */ + /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */ + ucas code = canadianAboriginal + /** @stable ICU 2.2 */ + yi code = 41 /* Yiii */ + /* New scripts in Unicode 3.2 */ + /** @stable ICU 2.2 */ + tagalog code = 42 /* Tglg */ + /** @stable ICU 2.2 */ + hanunoo code = 43 /* Hano */ + /** @stable ICU 2.2 */ + buhid code = 44 /* Buhd */ + /** @stable ICU 2.2 */ + tagbanwa code = 45 /* Tagb */ + + /* New scripts in Unicode 4 */ + /** @stable ICU 2.6 */ + braille code = 46 /* Brai */ + /** @stable ICU 2.6 */ + cypriot code = 47 /* Cprt */ + /** @stable ICU 2.6 */ + limbu code = 48 /* Limb */ + /** @stable ICU 2.6 */ + linearB code = 49 /* Linb */ + /** @stable ICU 2.6 */ + osmanya code = 50 /* Osma */ + /** @stable ICU 2.6 */ + shavian code = 51 /* Shaw */ + /** @stable ICU 2.6 */ + taiLe code = 52 /* Tale */ + /** @stable ICU 2.6 */ + ugaratic code = 53 /* Ugar */ + + /** New script code in Unicode 4.0.1 @stable ICU 3.0 */ + katakanaOrHiragana = 54 /*Hrkt */ + + /* New scripts in Unicode 4.1 */ + /** @stable ICU 3.4 */ + buginese code = 55 /* Bugi */ + /** @stable ICU 3.4 */ + glagolitic code = 56 /* Glag */ + /** @stable ICU 3.4 */ + kharoshthi code = 57 /* Khar */ + /** @stable ICU 3.4 */ + sylotiNagri code = 58 /* Sylo */ + /** @stable ICU 3.4 */ + newTaiLue code = 59 /* Talu */ + /** @stable ICU 3.4 */ + tifinagh code = 60 /* Tfng */ + /** @stable ICU 3.4 */ + oldPersian code = 61 /* Xpeo */ + + /* New script codes from Unicode and ISO 15924 */ + /** @stable ICU 3.6 */ + balinese code = 62 /* Bali */ + /** @stable ICU 3.6 */ + batak code = 63 /* Batk */ + /** @stable ICU 3.6 */ + blissymbols code = 64 /* Blis */ + /** @stable ICU 3.6 */ + brahmi code = 65 /* Brah */ + /** @stable ICU 3.6 */ + cham code = 66 /* Cham */ + /** @stable ICU 3.6 */ + cirth code = 67 /* Cirt */ + /** @stable ICU 3.6 */ + oldChurchSlavonicCyrillic code = 68 /* Cyrs */ + /** @stable ICU 3.6 */ + demoticEgyptian code = 69 /* Egyd */ + /** @stable ICU 3.6 */ + hieraticEgyptian code = 70 /* Egyh */ + /** @stable ICU 3.6 */ + egyptianHieroglyphs code = 71 /* Egyp */ + /** @stable ICU 3.6 */ + khutsuri code = 72 /* Geok */ + /** @stable ICU 3.6 */ + simplfiedHan code = 73 /* Hans */ + /** @stable ICU 3.6 */ + traditionalHan code = 74 /* Hant */ + /** @stable ICU 3.6 */ + pahawhHmong code = 75 /* Hmng */ + /** @stable ICU 3.6 */ + oldHungarian code = 76 /* Hung */ + /** @stable ICU 3.6 */ + harappanIndus code = 77 /* Inds */ + /** @stable ICU 3.6 */ + javanese code = 78 /* Java */ + /** @stable ICU 3.6 */ + kayahLi code = 79 /* Kali */ + /** @stable ICU 3.6 */ + latinFraktur code = 80 /* Latf */ + /** @stable ICU 3.6 */ + latinGaelic code = 81 /* Latg */ + /** @stable ICU 3.6 */ + lepcha code = 82 /* Lepc */ + /** @stable ICU 3.6 */ + linearA code = 83 /* Lina */ + /** @stable ICU 4.6 */ + mandaic code = 84 /* Mand */ + /** @stable ICU 3.6 */ + mandaean code = mandaic + /** @stable ICU 3.6 */ + mayanHieroglyphs code = 85 /* Maya */ + /** @stable ICU 4.6 */ + meroiticHieroglyphs code = 86 /* Mero */ + /** @stable ICU 3.6 */ + meroitic code = meroiticHieroglyphs + /** @stable ICU 3.6 */ + nko code = 87 /* Nkoo */ + /** @stable ICU 3.6 */ + orkhon code = 88 /* Orkh */ + /** @stable ICU 3.6 */ + oldPermic code = 89 /* Perm */ + /** @stable ICU 3.6 */ + phagsPa code = 90 /* Phag */ + /** @stable ICU 3.6 */ + phoenician code = 91 /* Phnx */ + /** @stable ICU 52 */ + miao code = 92 /* Plrd */ + /** @stable ICU 3.6 */ + phoneticPollard code = miao + /** @stable ICU 3.6 */ + rongoRongo code = 93 /* Roro */ + /** @stable ICU 3.6 */ + sarati code = 94 /* Sara */ + /** @stable ICU 3.6 */ + extrangeloSyriac code = 95 /* Syre */ + /** @stable ICU 3.6 */ + westernSyriac code = 96 /* Syrj */ + /** @stable ICU 3.6 */ + easternSyriac code = 97 /* Syrn */ + /** @stable ICU 3.6 */ + tengwar code = 98 /* Teng */ + /** @stable ICU 3.6 */ + vai code = 99 /* Vaii */ + /** @stable ICU 3.6 */ + visibleSpeech code = 100 /* Visp */ + /** @stable ICU 3.6 */ + cuneiform code = 101 /* Xsux */ + /** @stable ICU 3.6 */ + unwrittenLanguages code = 102 /* Zxxx */ + /** @stable ICU 3.6 */ + unknown code = 103 /* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */ + + /** @stable ICU 3.8 */ + carian code = 104 /* Cari */ + /** @stable ICU 3.8 */ + japanese code = 105 /* Jpan */ + /** @stable ICU 3.8 */ + lanna code = 106 /* Lana */ + /** @stable ICU 3.8 */ + lycian code = 107 /* Lyci */ + /** @stable ICU 3.8 */ + lydian code = 108 /* Lydi */ + /** @stable ICU 3.8 */ + olChiki code = 109 /* Olck */ + /** @stable ICU 3.8 */ + rejang code = 110 /* Rjng */ + /** @stable ICU 3.8 */ + saurashtra code = 111 /* Saur */ + /** Sutton SignWriting @stable ICU 3.8 */ + signWriting code = 112 /* Sgnw */ + /** @stable ICU 3.8 */ + sundanese code = 113 /* Sund */ + /** @stable ICU 3.8 */ + moon code = 114 /* Moon */ + /** @stable ICU 3.8 */ + meiteiMayek code = 115 /* Mtei */ + + /** @stable ICU 4.0 */ + imperialAramaic code = 116 /* Armi */ + /** @stable ICU 4.0 */ + avestan code = 117 /* Avst */ + /** @stable ICU 4.0 */ + chakma code = 118 /* Cakm */ + /** @stable ICU 4.0 */ + korean code = 119 /* Kore */ + /** @stable ICU 4.0 */ + kaithi code = 120 /* Kthi */ + /** @stable ICU 4.0 */ + manichaean code = 121 /* Mani */ + /** @stable ICU 4.0 */ + inscriptionalPahlavi code = 122 /* Phli */ + /** @stable ICU 4.0 */ + psalterPahlavi code = 123 /* Phlp */ + /** @stable ICU 4.0 */ + bookPahlavi code = 124 /* Phlv */ + /** @stable ICU 4.0 */ + inscriptionalParthian code = 125 /* Prti */ + /** @stable ICU 4.0 */ + samaritan code = 126 /* Samr */ + /** @stable ICU 4.0 */ + taiViet code = 127 /* Tavt */ + /** @stable ICU 4.0 */ + mathematicalNotation code = 128 /* Zmth */ + /** @stable ICU 4.0 */ + symbols code = 129 /* Zsym */ + + /** @stable ICU 4.4 */ + bamum code = 130 /* Bamu */ + /** @stable ICU 4.4 */ + lisu code = 131 /* Lisu */ + /** @stable ICU 4.4 */ + nakhiGeba code = 132 /* Nkgb */ + /** @stable ICU 4.4 */ + oldSouthArabian code = 133 /* Sarb */ + + /** @stable ICU 4.6 */ + bassaVah code = 134 /* Bass */ + /** @stable ICU 54 */ + duployan code = 135 /* Dupl */ + /** @stable ICU 4.6 */ + elbasan code = 136 /* Elba */ + /** @stable ICU 4.6 */ + grantha code = 137 /* Gran */ + /** @stable ICU 4.6 */ + kpelle code = 138 /* Kpel */ + /** @stable ICU 4.6 */ + loma code = 139 /* Loma */ + /** Mende Kikakui @stable ICU 4.6 */ + mende code = 140 /* Mend */ + /** @stable ICU 4.6 */ + meroiticCursive code = 141 /* Merc */ + /** @stable ICU 4.6 */ + oldNorthArabian code = 142 /* Narb */ + /** @stable ICU 4.6 */ + nabataean code = 143 /* Nbat */ + /** @stable ICU 4.6 */ + palmyrene code = 144 /* Palm */ + /** @stable ICU 54 */ + khudawadi code = 145 /* Sind */ + /** @stable ICU 4.6 */ + sindhi code = khudawadi + /** @stable ICU 4.6 */ + warangCiti code = 146 /* Wara */ + + /** @stable ICU 4.8 */ + afaka code = 147 /* Afak */ + /** @stable ICU 4.8 */ + jurchen code = 148 /* Jurc */ + /** @stable ICU 4.8 */ + mro code = 149 /* Mroo */ + /** @stable ICU 4.8 */ + nushu code = 150 /* Nshu */ + /** @stable ICU 4.8 */ + sharada code = 151 /* Shrd */ + /** @stable ICU 4.8 */ + soraSompeng code = 152 /* Sora */ + /** @stable ICU 4.8 */ + takri code = 153 /* Takr */ + /** @stable ICU 4.8 */ + tangut code = 154 /* Tang */ + /** @stable ICU 4.8 */ + woleai code = 155 /* Wole */ + + /** @stable ICU 49 */ + anatolianHieroglyphs code = 156 /* Hluw */ + /** @stable ICU 49 */ + khojki code = 157 /* Khoj */ + /** @stable ICU 49 */ + tirhuta code = 158 /* Tirh */ + + /** @stable ICU 52 */ + caucasianAlbanian code = 159 /* Aghb */ + /** @stable ICU 52 */ + mahajani code = 160 /* Mahj */ + + /** @stable ICU 54 */ + ahom code = 161 /* Ahom */ + /** @stable ICU 54 */ + hatran code = 162 /* Hatr */ + /** @stable ICU 54 */ + modi code = 163 /* Modi */ + /** @stable ICU 54 */ + multani code = 164 /* Mult */ + /** @stable ICU 54 */ + pauCinHau code = 165 /* Pauc */ + /** @stable ICU 54 */ + siddham code = 166 /* Sidd */ + + /** @stable ICU 58 */ + adlam code = 167 /* Adlm */ + /** @stable ICU 58 */ + bhaiksuki code = 168 /* Bhks */ + /** @stable ICU 58 */ + marchen code = 169 /* Marc */ + /** @stable ICU 58 */ + newa code = 170 /* Newa */ + /** @stable ICU 58 */ + osage code = 171 /* Osge */ + + /** @stable ICU 58 */ + hanWithBopomofo code = 172 /* Hanb */ + /** @stable ICU 58 */ + jamo code = 173 /* Jamo */ + /** @stable ICU 58 */ + symbolsEmoji code = 174 /* Zsye */ + + /** @stable ICU 60 */ + masaramGondi code = 175 /* Gonm */ + /** @stable ICU 60 */ + soyombo code = 176 /* Soyo */ + /** @stable ICU 60 */ + zanabazarSquare code = 177 /* Zanb */ + + /** @stable ICU 62 */ + dogra code = 178 /* Dogr */ + /** @stable ICU 62 */ + gunjalaGondi code = 179 /* Gong */ + /** @stable ICU 62 */ + makasar code = 180 /* Maka */ + /** @stable ICU 62 */ + medefaidrin code = 181 /* Medf */ + /** @stable ICU 62 */ + hanifiRohingya code = 182 /* Rohg */ + /** @stable ICU 62 */ + sogdian code = 183 /* Sogd */ + /** @stable ICU 62 */ + oldSogdian code = 184 /* Sogo */ + + /** @stable ICU 64 */ + elymaic code = 185 /* Elym */ + /** @stable ICU 64 */ + nyiakengPuachueHmong code = 186 /* Hmnp */ + /** @stable ICU 64 */ + nandinagari code = 187 /* Nand */ + /** @stable ICU 64 */ + wancho code = 188 /* Wcho */ + + /** @stable ICU 66 */ + chorasmian code = 189 /* Chrs */ + /** @stable ICU 66 */ + divesAkuru code = 190 /* Diak */ + /** @stable ICU 66 */ + khitanSmallScript code = 191 /* Kits */ + /** @stable ICU 66 */ + yezedi code = 192 /* Yezi */ +) + +func uscriptHasScript(c rune, sc code) bool { + scriptX := uchar.GetUnicodeProperties(c, 0) & scriptXMask + codeOrIndex := mergeScriptCodeOrIndex(scriptX) + if scriptX < scriptXWithCommon { + return sc == code(codeOrIndex) + } + + scx := uchar.ScriptExtensions(codeOrIndex) + if scriptX >= scriptXWithOther { + scx = uchar.ScriptExtensions(uint32(scx[1])) + } + sc32 := uint32(sc) + if sc32 > 0x7fff { + /* Guard against bogus input that would make us go past the Script_Extensions terminator. */ + return false + } + for sc32 > uint32(scx[0]) { + scx = scx[1:] + } + return sc32 == uint32(scx[0]&0x7fff) +} diff --git a/go/mysql/icuregex/internal/uset/close.go b/go/mysql/icuregex/internal/uset/close.go new file mode 100644 index 00000000000..bd3f9f0f7e3 --- /dev/null +++ b/go/mysql/icuregex/internal/uset/close.go @@ -0,0 +1,96 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uset + +import "vitess.io/vitess/go/mysql/icuregex/internal/ucase" + +type USet uint32 + +const ( + /** + * Ignore white space within patterns unless quoted or escaped. + * @stable ICU 2.4 + */ + IgnoreSpace USet = 1 + + /** + * Enable case insensitive matching. E.g., "[ab]" with this flag + * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will + * match all except 'a', 'A', 'b', and 'B'. This performs a full + * closure over case mappings, e.g. U+017F for s. + * + * The resulting set is a superset of the input for the code points but + * not for the strings. + * It performs a case mapping closure of the code points and adds + * full case folding strings for the code points, and reduces strings of + * the original set to their full case folding equivalents. + * + * This is designed for case-insensitive matches, for example + * in regular expressions. The full code point case closure allows checking of + * an input character directly against the closure set. + * Strings are matched by comparing the case-folded form from the closure + * set with an incremental case folding of the string in question. + * + * The closure set will also contain single code points if the original + * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.). + * This is not necessary (that is, redundant) for the above matching method + * but results in the same closure sets regardless of whether the original + * set contained the code point or a string. + * + * @stable ICU 2.4 + */ + CaseInsensitive USet = 2 + + /** + * Enable case insensitive matching. E.g., "[ab]" with this flag + * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will + * match all except 'a', 'A', 'b', and 'B'. This adds the lower-, + * title-, and uppercase mappings as well as the case folding + * of each existing element in the set. + * @stable ICU 3.2 + */ + AddCaseMappings USet = 4 +) + +func (u *UnicodeSet) CloseOver(attribute USet) { + if attribute&AddCaseMappings != 0 { + panic("USET_ADD_CASE_MAPPINGS is unsupported") + } + if (attribute & CaseInsensitive) == 0 { + return + } + + foldSet := u.Clone() + n := u.RangeCount() + + for i := 0; i < n; i++ { + start := u.RangeStart(i) + end := u.RangeEnd(i) + + // full case closure + for cp := start; cp <= end; cp++ { + ucase.AddCaseClosure(cp, foldSet) + } + } + + *u = *foldSet +} diff --git a/go/mysql/icuregex/internal/uset/frozen.go b/go/mysql/icuregex/internal/uset/frozen.go new file mode 100644 index 00000000000..2703a4f6975 --- /dev/null +++ b/go/mysql/icuregex/internal/uset/frozen.go @@ -0,0 +1,339 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uset + +type frozen struct { + // One byte 0 or 1 per Latin-1 character. + latin1Contains [0x100]byte + + // true if contains(U+FFFD) + containsFFFD bool + + /* + * One bit per code point from U+0000..U+07FF. + * The bits are organized vertically; consecutive code points + * correspond to the same bit positions in consecutive table words. + * With code point parts + * lead=c{10..6} + * trail=c{5..0} + * it is set.contains(c)==(table7FF[trail] bit lead) + * + * Bits for 0..7F (non-shortest forms) are set to the result of contains(FFFD) + * for faster validity checking at runtime. + */ + table7FF [64]uint32 + + /* + * One bit per 64 BMP code points. + * The bits are organized vertically; consecutive 64-code point blocks + * correspond to the same bit position in consecutive table words. + * With code point parts + * lead=c{15..12} + * t1=c{11..6} + * test bits (lead+16) and lead in bmpBlockBits[t1]. + * If the upper bit is 0, then the lower bit indicates if contains(c) + * for all code points in the 64-block. + * If the upper bit is 1, then the block is mixed and set.contains(c) + * must be called. + * + * Bits for 0..7FF (non-shortest forms) and D800..DFFF are set to + * the result of contains(FFFD) for faster validity checking at runtime. + */ + bmpBlockBits [64]uint32 + + /* + * Inversion list indexes for restricted binary searches in + * findCodePoint(), from + * findCodePoint(U+0800, U+1000, U+2000, .., U+F000, U+10000). + * U+0800 is the first 3-byte-UTF-8 code point. Code points below U+0800 are + * always looked up in the bit tables. + * The last pair of indexes is for finding supplementary code points. + */ + list4kStarts [18]int32 +} + +func freeze(list []rune) *frozen { + f := &frozen{} + + listEnd := int32(len(list) - 1) + + f.list4kStarts[0] = f.findCodePoint(list, 0x800, 0, listEnd) + for i := 1; i <= 0x10; i++ { + f.list4kStarts[i] = f.findCodePoint(list, rune(i)<<12, f.list4kStarts[i-1], listEnd) + } + f.list4kStarts[0x11] = listEnd + f.containsFFFD = f.containsSlow(list, 0xfffd, f.list4kStarts[0xf], f.list4kStarts[0x10]) + + f.initBits(list) + f.overrideIllegal() + + return f +} + +func (f *frozen) containsSlow(list []rune, c rune, lo, hi int32) bool { + return (f.findCodePoint(list, c, lo, hi) & 1) != 0 +} + +func (f *frozen) findCodePoint(list []rune, c rune, lo, hi int32) int32 { + /* Examples: + findCodePoint(c) + set list[] c=0 1 3 4 7 8 + === ============== =========== + [] [110000] 0 0 0 0 0 0 + [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 + [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 + [:Any:] [0, 110000] 1 1 1 1 1 1 + */ + + // Return the smallest i such that c < list[i]. Assume + // list[len - 1] == HIGH and that c is legal (0..HIGH-1). + if c < list[lo] { + return lo + } + // High runner test. c is often after the last range, so an + // initial check for this condition pays off. + if lo >= hi || c >= list[hi-1] { + return hi + } + // invariant: c >= list[lo] + // invariant: c < list[hi] + for { + i := (lo + hi) >> 1 + if i == lo { + break // Found! + } else if c < list[i] { + hi = i + } else { + lo = i + } + } + return hi +} + +func (f *frozen) set32x64bits(table *[64]uint32, start, limit int32) { + lead := start >> 6 // Named for UTF-8 2-byte lead byte with upper 5 bits. + trail := start & 0x3f // Named for UTF-8 2-byte trail byte with lower 6 bits. + + // Set one bit indicating an all-one block. + bits := uint32(1) << lead + if (start + 1) == limit { // Single-character shortcut. + table[trail] |= bits + return + } + + limitLead := limit >> 6 + limitTrail := limit & 0x3f + + if lead == limitLead { + // Partial vertical bit column. + for trail < limitTrail { + table[trail] |= bits + trail++ + } + } else { + // Partial vertical bit column, + // followed by a bit rectangle, + // followed by another partial vertical bit column. + if trail > 0 { + for { + table[trail] |= bits + trail++ + if trail >= 64 { + break + } + } + lead++ + } + if lead < limitLead { + bits = ^((uint32(1) << lead) - 1) + if limitLead < 0x20 { + bits &= (uint32(1) << limitLead) - 1 + } + for trail = 0; trail < 64; trail++ { + table[trail] |= bits + } + } + // limit<=0x800. If limit==0x800 then limitLead=32 and limitTrail=0. + // In that case, bits=1<= 0x100 { + break + } + for { + f.latin1Contains[start] = 1 + start++ + if start >= limit || start >= 0x100 { + break + } + } + if limit > 0x100 { + break + } + } + + // Find the first range overlapping with (or after) 80..FF again, + // to include them in table7FF as well. + listIndex = 0 + for { + start = list[listIndex] + listIndex++ + if listIndex < len(list) { + limit = list[listIndex] + listIndex++ + } else { + limit = 0x110000 + } + if limit > 0x80 { + if start < 0x80 { + start = 0x80 + } + break + } + } + + // Set table7FF[]. + for start < 0x800 { + var end rune + if limit <= 0x800 { + end = limit + } else { + end = 0x800 + } + f.set32x64bits(&f.table7FF, start, end) + if limit > 0x800 { + start = 0x800 + break + } + + start = list[listIndex] + listIndex++ + if listIndex < len(list) { + limit = list[listIndex] + listIndex++ + } else { + limit = 0x110000 + } + } + + // Set bmpBlockBits[]. + minStart := rune(0x800) + for start < 0x10000 { + if limit > 0x10000 { + limit = 0x10000 + } + + if start < minStart { + start = minStart + } + if start < limit { // Else: Another range entirely in a known mixed-value block. + if (start & 0x3f) != 0 { + // Mixed-value block of 64 code points. + start >>= 6 + f.bmpBlockBits[start&0x3f] |= 0x10001 << (start >> 6) + start = (start + 1) << 6 // Round up to the next block boundary. + minStart = start // Ignore further ranges in this block. + } + if start < limit { + if start < (limit &^ 0x3f) { + // Multiple all-ones blocks of 64 code points each. + f.set32x64bits(&f.bmpBlockBits, start>>6, limit>>6) + } + + if (limit & 0x3f) != 0 { + // Mixed-value block of 64 code points. + limit >>= 6 + f.bmpBlockBits[limit&0x3f] |= 0x10001 << (limit >> 6) + limit = (limit + 1) << 6 // Round up to the next block boundary. + minStart = limit // Ignore further ranges in this block. + } + } + } + + if limit == 0x10000 { + break + } + + start = list[listIndex] + listIndex++ + if listIndex < len(list) { + limit = list[listIndex] + listIndex++ + } else { + limit = 0x110000 + } + } +} diff --git a/go/mysql/icuregex/internal/uset/pattern.go b/go/mysql/icuregex/internal/uset/pattern.go new file mode 100644 index 00000000000..20b44da9c6d --- /dev/null +++ b/go/mysql/icuregex/internal/uset/pattern.go @@ -0,0 +1,107 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uset + +import ( + "strings" + + "vitess.io/vitess/go/mysql/icuregex/internal/pattern" +) + +func (u *UnicodeSet) String() string { + var buf strings.Builder + u.ToPattern(&buf, true) + return buf.String() +} + +func (u *UnicodeSet) ToPattern(w *strings.Builder, escapeUnprintable bool) { + w.WriteByte('[') + + // // Check against the predefined categories. We implicitly build + // // up ALL category sets the first time toPattern() is called. + // for (int8_t cat=0; cat 1 && u.RangeStart(0) == MinValue && u.RangeEnd(count-1) == MaxValue { + + // Emit the inverse + w.WriteByte('^') + + for i := 1; i < count; i++ { + start := u.RangeEnd(i-1) + 1 + end := u.RangeStart(i) - 1 + u.appendToPattern(w, start, escapeUnprintable) + if start != end { + if (start + 1) != end { + w.WriteByte('-') + } + u.appendToPattern(w, end, escapeUnprintable) + } + } + } else { + // Default; emit the ranges as pairs + for i := 0; i < count; i++ { + start := u.RangeStart(i) + end := u.RangeEnd(i) + u.appendToPattern(w, start, escapeUnprintable) + if start != end { + if (start + 1) != end { + w.WriteByte('-') + } + u.appendToPattern(w, end, escapeUnprintable) + } + } + } + + w.WriteByte(']') +} + +func (u *UnicodeSet) appendToPattern(w *strings.Builder, c rune, escapeUnprintable bool) { + if escapeUnprintable && pattern.IsUnprintable(c) { + // Use hex escape notation (\uxxxx or \Uxxxxxxxx) for anything + // unprintable + pattern.EscapeUnprintable(w, c) + return + } + + // Okay to let ':' pass through + switch c { + case '[', ']', '-', '^', '&', '\\', '{', '}', ':', '$': + w.WriteByte('\\') + default: + // Escape whitespace + if pattern.IsWhitespace(c) { + w.WriteByte('\\') + } + } + w.WriteRune(c) +} diff --git a/go/mysql/icuregex/internal/uset/unicode_set.go b/go/mysql/icuregex/internal/uset/unicode_set.go new file mode 100644 index 00000000000..3dba317eab2 --- /dev/null +++ b/go/mysql/icuregex/internal/uset/unicode_set.go @@ -0,0 +1,694 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uset + +import ( + "fmt" + + "golang.org/x/exp/slices" +) + +// HIGH_VALUE > all valid values. 110000 for codepoints +const unicodeSetHigh = 0x0110000 + +// LOW <= all valid values. ZERO for codepoints +const unicodeSetLow = 0x000000 + +const ( + /** + * Minimum value that can be stored in a UnicodeSet. + * @stable ICU 2.4 + */ + MinValue = 0 + + /** + * Maximum value that can be stored in a UnicodeSet. + * @stable ICU 2.4 + */ + MaxValue = 0x10ffff +) + +type UnicodeSet struct { + list []rune + buffer []rune + frozen *frozen +} + +func New() *UnicodeSet { + buf := make([]rune, 1, 25) + buf[0] = unicodeSetHigh + return &UnicodeSet{list: buf} +} + +func FromRunes(list []rune) *UnicodeSet { + return &UnicodeSet{list: list} +} + +func (u *UnicodeSet) ensureBufferCapacity(c int) { + if cap(u.buffer) < c { + u.buffer = make([]rune, c) + return + } + u.buffer = u.buffer[:cap(u.buffer)] +} + +func (u *UnicodeSet) addbuffer(other []rune, polarity int8) { + if u.frozen != nil { + panic("UnicodeSet is frozen") + } + u.ensureBufferCapacity(len(u.list) + len(other)) + + i := 1 + j := 1 + k := 0 + + a := u.list[0] + b := other[0] + + for { + switch polarity { + case 0: + if a < b { + if k > 0 && a <= u.buffer[k-1] { + k-- + a = max(u.list[i], u.buffer[k]) + } else { + u.buffer[k] = a + k++ + a = u.list[i] + } + i++ + polarity ^= 1 + } else if b < a { + if k > 0 && b <= u.buffer[k-1] { + k-- + b = max(other[j], u.buffer[k]) + } else { + u.buffer[k] = b + k++ + b = other[j] + } + j++ + polarity ^= 2 + } else { + if a == unicodeSetHigh { + goto loopEnd + } + if k > 0 && a <= u.buffer[k-1] { + k-- + a = max(u.list[i], u.buffer[k]) + } else { + u.buffer[k] = a + k++ + a = u.list[i] + } + i++ + polarity ^= 1 + b = other[j] + j++ + polarity ^= 2 + } + case 3: + if b <= a { + if a == unicodeSetHigh { + goto loopEnd + } + u.buffer[k] = a + k++ + } else { + if b == unicodeSetHigh { + goto loopEnd + } + u.buffer[k] = b + k++ + } + a = u.list[i] + i++ + polarity ^= 1 + b = other[j] + j++ + polarity ^= 2 + case 1: + if a < b { + u.buffer[k] = a + k++ + a = u.list[i] + i++ + polarity ^= 1 + } else if b < a { + b = other[j] + j++ + polarity ^= 2 + } else { + if a == unicodeSetHigh { + goto loopEnd + } + a = u.list[i] + i++ + polarity ^= 1 + b = other[j] + j++ + polarity ^= 2 + } + case 2: + if b < a { + u.buffer[k] = b + k++ + b = other[j] + j++ + polarity ^= 2 + } else if a < b { + a = u.list[i] + i++ + polarity ^= 1 + } else { + if a == unicodeSetHigh { + goto loopEnd + } + a = u.list[i] + i++ + polarity ^= 1 + b = other[j] + j++ + polarity ^= 2 + } + } + } + +loopEnd: + u.buffer[k] = unicodeSetHigh + k++ + + u.list, u.buffer = u.buffer[:k], u.list +} + +func max(a, b rune) rune { + if a > b { + return a + } + return b +} + +func pinCodePoint(c *rune) rune { + if *c < unicodeSetLow { + *c = unicodeSetLow + } else if *c > (unicodeSetHigh - 1) { + *c = unicodeSetHigh - 1 + } + return *c +} + +func (u *UnicodeSet) AddRune(c rune) { + if u.frozen != nil { + panic("UnicodeSet is frozen") + } + + // find smallest i such that c < list[i] + // if odd, then it is IN the set + // if even, then it is OUT of the set + i := u.findCodePoint(pinCodePoint(&c)) + + // already in set? + if (i & 1) != 0 { + return + } + + // HIGH is 0x110000 + // assert(list[len-1] == HIGH); + + // empty = [HIGH] + // [start_0, limit_0, start_1, limit_1, HIGH] + + // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] + // ^ + // list[i] + + // i == 0 means c is before the first range + if c == u.list[i]-1 { + // c is before start of next range + u.list[i] = c + // if we touched the HIGH mark, then add a new one + if c == (unicodeSetHigh - 1) { + u.list = append(u.list, unicodeSetHigh) + } + if i > 0 && c == u.list[i-1] { + // collapse adjacent ranges + + // [..., start_k-1, c, c, limit_k, ..., HIGH] + // ^ + // list[i] + for k := i - 1; k < len(u.list)-2; k++ { + u.list[k] = u.list[k+2] + } + u.list = u.list[:len(u.list)-2] + } + } else if i > 0 && c == u.list[i-1] { + // c is after end of prior range + u.list[i-1]++ + // no need to check for collapse here + } else { + // At this point we know the new char is not adjacent to + // any existing ranges, and it is not 10FFFF. + + // [..., start_k-1, limit_k-1, start_k, limit_k, ..., HIGH] + // ^ + // list[i] + + // [..., start_k-1, limit_k-1, c, c+1, start_k, limit_k, ..., HIGH] + // ^ + // list[i] + u.list = slices.Insert(u.list, i, c, c+1) + } +} + +func (u *UnicodeSet) AddRuneRange(start, end rune) { + if pinCodePoint(&start) < pinCodePoint(&end) { + limit := end + 1 + // Fast path for adding a new range after the last one. + // Odd list length: [..., lastStart, lastLimit, HIGH] + if (len(u.list) & 1) != 0 { + // If the list is empty, set lastLimit low enough to not be adjacent to 0. + var lastLimit rune + if len(u.list) == 1 { + lastLimit = -2 + } else { + lastLimit = u.list[len(u.list)-2] + } + if lastLimit <= start { + if lastLimit == start { + // Extend the last range. + u.list[len(u.list)-2] = limit + if limit == unicodeSetHigh { + u.list = u.list[:len(u.list)-1] + } + } else { + u.list[len(u.list)-1] = start + if limit < unicodeSetHigh { + u.list = append(u.list, limit) + u.list = append(u.list, unicodeSetHigh) + } else { // limit == UNICODESET_HIGH + u.list = append(u.list, unicodeSetHigh) + } + } + return + } + } + // This is slow. Could be much faster using findCodePoint(start) + // and modifying the list, dealing with adjacent & overlapping ranges. + addRange := [3]rune{start, limit, unicodeSetHigh} + u.addbuffer(addRange[:], 0) + } else if start == end { + u.AddRune(start) + } +} + +func (u *UnicodeSet) AddAll(u2 *UnicodeSet) { + if len(u2.list) > 0 { + u.addbuffer(u2.list, 0) + } +} + +func (u *UnicodeSet) Complement() { + if u.frozen != nil { + panic("UnicodeSet is frozen") + } + if u.list[0] == unicodeSetLow { + copy(u.list, u.list[1:]) + u.list = u.list[:len(u.list)-1] + } else { + u.list = slices.Insert(u.list, 0, unicodeSetLow) + } +} + +func (u *UnicodeSet) RemoveRuneRange(start, end rune) { + if pinCodePoint(&start) < pinCodePoint(&end) { + r := [3]rune{start, end + 1, unicodeSetHigh} + u.retain(r[:], 2) + } +} + +func (u *UnicodeSet) RemoveAll(c *UnicodeSet) { + u.retain(c.list, 2) +} + +func (u *UnicodeSet) RetainAll(c *UnicodeSet) { + u.retain(c.list, 0) +} + +func (u *UnicodeSet) retain(other []rune, polarity int8) { + if u.frozen != nil { + panic("UnicodeSet is frozen") + } + + u.ensureBufferCapacity(len(u.list) + len(other)) + + i := 1 + j := 1 + k := 0 + + a := u.list[0] + b := other[0] + + // change from xor is that we have to check overlapping pairs + // polarity bit 1 means a is second, bit 2 means b is. + for { + switch polarity { + case 0: // both first; drop the smaller + if a < b { // drop a + a = u.list[i] + i++ + polarity ^= 1 + } else if b < a { // drop b + b = other[j] + j++ + polarity ^= 2 + } else { // a == b, take one, drop other + if a == unicodeSetHigh { + goto loop_end + } + u.buffer[k] = a + k++ + a = u.list[i] + i++ + polarity ^= 1 + b = other[j] + j++ + polarity ^= 2 + } + case 3: // both second; take lower if unequal + if a < b { // take a + u.buffer[k] = a + k++ + a = u.list[i] + i++ + polarity ^= 1 + } else if b < a { // take b + u.buffer[k] = b + k++ + b = other[j] + j++ + polarity ^= 2 + } else { // a == b, take one, drop other + if a == unicodeSetHigh { + goto loop_end + } + u.buffer[k] = a + k++ + a = u.list[i] + i++ + polarity ^= 1 + b = other[j] + j++ + polarity ^= 2 + } + case 1: // a second, b first; + if a < b { // NO OVERLAP, drop a + a = u.list[i] + i++ + polarity ^= 1 + } else if b < a { // OVERLAP, take b + u.buffer[k] = b + k++ + b = other[j] + j++ + polarity ^= 2 + } else { // a == b, drop both! + if a == unicodeSetHigh { + goto loop_end + } + a = u.list[i] + i++ + polarity ^= 1 + b = other[j] + j++ + polarity ^= 2 + } + case 2: // a first, b second; if a < b, overlap + if b < a { // no overlap, drop b + b = other[j] + j++ + polarity ^= 2 + } else if a < b { // OVERLAP, take a + u.buffer[k] = a + k++ + a = u.list[i] + i++ + polarity ^= 1 + } else { // a == b, drop both! + if a == unicodeSetHigh { + goto loop_end + } + a = u.list[i] + i++ + polarity ^= 1 + b = other[j] + j++ + polarity ^= 2 + } + } + } + +loop_end: + u.buffer[k] = unicodeSetHigh // terminate + k++ + u.list, u.buffer = u.buffer[:k], u.list +} + +func (u *UnicodeSet) Clear() { + if u.frozen != nil { + panic("UnicodeSet is frozen") + } + u.list = u.list[:1] + u.list[0] = unicodeSetHigh +} + +func (u *UnicodeSet) Len() (n int) { + count := u.RangeCount() + for i := 0; i < count; i++ { + n += int(u.RangeEnd(i)) - int(u.RangeStart(i)) + 1 + } + return +} + +func (u *UnicodeSet) RangeCount() int { + return len(u.list) / 2 +} + +func (u *UnicodeSet) RangeStart(idx int) rune { + return u.list[idx*2] +} + +func (u *UnicodeSet) RangeEnd(idx int) rune { + return u.list[idx*2+1] - 1 +} + +func (u *UnicodeSet) RuneAt(idx int) rune { + if idx >= 0 { + // len2 is the largest even integer <= len, that is, it is len + // for even values and len-1 for odd values. With odd values + // the last entry is UNICODESET_HIGH. + len2 := len(u.list) + if (len2 & 0x1) != 0 { + len2-- + } + + var i int + for i < len2 { + start := u.list[i] + count := int(u.list[i+1] - start) + i += 2 + if idx < count { + return start + rune(idx) + } + idx -= count + } + } + return -1 +} + +func (u *UnicodeSet) ContainsRune(c rune) bool { + if f := u.frozen; f != nil { + if c < 0 { + return false + } else if c <= 0xff { + return f.latin1Contains[c] != 0 + } else if c <= 0x7ff { + return (f.table7FF[c&0x3f] & (uint32(1) << (c >> 6))) != 0 + } else if c < 0xd800 || (c >= 0xe000 && c <= 0xffff) { + lead := c >> 12 + twoBits := (f.bmpBlockBits[(c>>6)&0x3f] >> lead) & 0x10001 + if twoBits <= 1 { + // All 64 code points with the same bits 15..6 + // are either in the set or not. + return twoBits != 0 + } + // Look up the code point in its 4k block of code points. + return f.containsSlow(u.list, c, f.list4kStarts[lead], f.list4kStarts[lead+1]) + } else if c <= 0x10ffff { + // surrogate or supplementary code point + return f.containsSlow(u.list, c, f.list4kStarts[0xd], f.list4kStarts[0x11]) + } + // Out-of-range code points get FALSE, consistent with long-standing + // behavior of UnicodeSet::contains(c). + return false + } + + if c >= unicodeSetHigh { + return false + } + i := u.findCodePoint(c) + return (i & 1) != 0 +} + +func (u *UnicodeSet) ContainsRuneRange(from, to rune) bool { + i := u.findCodePoint(from) + return (i&1) != 0 && to < u.list[i] +} + +func (u *UnicodeSet) findCodePoint(c rune) int { + /* Examples: + findCodePoint(c) + set list[] c=0 1 3 4 7 8 + === ============== =========== + [] [110000] 0 0 0 0 0 0 + [\u0000-\u0003] [0, 4, 110000] 1 1 1 2 2 2 + [\u0004-\u0007] [4, 8, 110000] 0 0 0 1 1 2 + [:Any:] [0, 110000] 1 1 1 1 1 1 + */ + + // Return the smallest i such that c < list[i]. Assume + // list[len - 1] == HIGH and that c is legal (0..HIGH-1). + if c < u.list[0] { + return 0 + } + + // High runner test. c is often after the last range, so an + // initial check for this condition pays off. + lo := 0 + hi := len(u.list) - 1 + if lo >= hi || c >= u.list[hi-1] { + return hi + } + + // invariant: c >= list[lo] + // invariant: c < list[hi] + for { + i := (lo + hi) >> 1 + if i == lo { + break // Found! + } else if c < u.list[i] { + hi = i + } else { + lo = i + } + } + return hi +} + +func (u *UnicodeSet) AddString(chars string) { + for _, c := range chars { + u.AddRune(c) + } +} + +type Filter func(ch rune) bool + +func (u *UnicodeSet) ApplyFilter(inclusions *UnicodeSet, filter Filter) { + // Logically, walk through all Unicode characters, noting the start + // and end of each range for which filter.contain(c) is + // true. Add each range to a set. + // + // To improve performance, use an inclusions set which + // encodes information about character ranges that are known + // to have identical properties. + // inclusions contains the first characters of + // same-value ranges for the given property. + + u.Clear() + + startHasProperty := rune(-1) + limitRange := inclusions.RangeCount() + + for j := 0; j < limitRange; j++ { + // get current range + start := inclusions.RangeStart(j) + end := inclusions.RangeEnd(j) + + // for all the code points in the range, process + for ch := start; ch <= end; ch++ { + // only add to this UnicodeSet on inflection points -- + // where the hasProperty value changes to false + if filter(ch) { + if startHasProperty < 0 { + startHasProperty = ch + } + } else if startHasProperty >= 0 { + u.AddRuneRange(startHasProperty, ch-1) + startHasProperty = -1 + } + } + } + if startHasProperty >= 0 { + u.AddRuneRange(startHasProperty, 0x10FFFF) + } +} + +func (u *UnicodeSet) Clone() *UnicodeSet { + return &UnicodeSet{list: slices.Clone(u.list)} +} + +func (u *UnicodeSet) IsEmpty() bool { + return len(u.list) == 1 +} + +func (u *UnicodeSet) CopyFrom(set *UnicodeSet) { + if u.frozen != nil { + panic("UnicodeSet is frozen") + } + u.list = slices.Clone(set.list) +} + +func (u *UnicodeSet) Equals(other *UnicodeSet) bool { + return slices.Equal(u.list, other.list) +} + +func (u *UnicodeSet) Freeze() *UnicodeSet { + u.frozen = freeze(u.list) + return u +} + +func (u *UnicodeSet) FreezeCheck_() error { + if u == nil { + return nil + } + if u.frozen == nil { + return fmt.Errorf("UnicodeSet is not frozen") + } + for r := rune(0); r <= 0x10ffff; r++ { + want := (u.findCodePoint(r) & 1) != 0 + got := u.ContainsRune(r) + if want != got { + return fmt.Errorf("rune '%c' (U+%04X) did not freeze", r, r) + } + } + return nil +} diff --git a/go/mysql/icuregex/internal/uset/unicode_set_test.go b/go/mysql/icuregex/internal/uset/unicode_set_test.go new file mode 100644 index 00000000000..908abd8889d --- /dev/null +++ b/go/mysql/icuregex/internal/uset/unicode_set_test.go @@ -0,0 +1,43 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package uset + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSimpleBelong(t *testing.T) { + ss1 := New() + ss1.AddString("*?+[(){}^$|\\.") + ss2 := New() + ss2.AddString("*?+[(){}^$|\\.") + ss2.Complement() + ss3 := New() + ss3.AddRune('*') + ss3.AddRune('?') + + assert.True(t, ss1.ContainsRune('(')) + assert.False(t, ss2.ContainsRune('(')) + assert.True(t, ss3.ContainsRune('*')) +} diff --git a/go/mysql/icuregex/internal/utf16/helpers.go b/go/mysql/icuregex/internal/utf16/helpers.go new file mode 100644 index 00000000000..bdf53ae731c --- /dev/null +++ b/go/mysql/icuregex/internal/utf16/helpers.go @@ -0,0 +1,65 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utf16 + +import "unicode/utf16" + +func IsLead(c rune) bool { + return (uint32(c) & 0xfffffc00) == 0xd800 +} + +func IsTrail(c rune) bool { + return (uint32(c) & 0xfffffc00) == 0xdc00 +} + +/** + * Is this code point a surrogate (U+d800..U+dfff)? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +func IsSurrogate(c rune) bool { + return (uint32(c) & 0xfffff800) == 0xd800 +} + +/** + * Assuming c is a surrogate code point (U_IS_SURROGATE(c)), + * is it a lead surrogate? + * @param c 32-bit code point + * @return true or false + * @stable ICU 2.4 + */ +func IsSurrogateLead(c rune) bool { + return (uint32(c) & 0x400) == 0 +} + +func DecodeRune(a, b rune) rune { + return utf16.DecodeRune(a, b) +} + +func NextUnsafe(s []uint16) (rune, []uint16) { + c := rune(s[0]) + if !IsLead(c) { + return c, s[1:] + } + return DecodeRune(c, rune(s[1])), s[2:] +} diff --git a/go/mysql/icuregex/internal/utrie/ucptrie.go b/go/mysql/icuregex/internal/utrie/ucptrie.go new file mode 100644 index 00000000000..74e4eb9b2fa --- /dev/null +++ b/go/mysql/icuregex/internal/utrie/ucptrie.go @@ -0,0 +1,708 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utrie + +import ( + "errors" + "fmt" + + "vitess.io/vitess/go/mysql/icuregex/internal/udata" +) + +type UcpTrie struct { + index []uint16 + data8 []uint8 + data16 []uint16 + data32 []uint32 + + indexLength, dataLength int32 + /** Start of the last range which ends at U+10FFFF. @internal */ + highStart rune + shifted12HighStart uint16 + + typ ucpTrieType + valueWidth ucpTrieValueWidth + + /** + * Internal index-3 null block offset. + * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. + * @internal + */ + index3NullOffset uint16 + /** + * Internal data null block offset, not shifted. + * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. + * @internal + */ + dataNullOffset int32 + + nullValue uint32 +} + +/** + * Selectors for the type of a UCPTrie. + * Different trade-offs for size vs. speed. + * + * @see umutablecptrie_buildImmutable + * @see ucptrie_openFromBinary + * @see ucptrie_getType + * @stable ICU 63 + */ +type ucpTrieType int8 + +const ( + /** + * For ucptrie_openFromBinary() to accept any type. + * ucptrie_getType() will return the actual type. + * @stable ICU 63 + */ + typeAny ucpTrieType = iota - 1 + /** + * Fast/simple/larger BMP data structure. Use functions and "fast" macros. + * @stable ICU 63 + */ + typeFast + /** + * Small/slower BMP data structure. Use functions and "small" macros. + * @stable ICU 63 + */ + typeSmall +) + +/** + * Selectors for the number of bits in a UCPTrie data value. + * + * @see umutablecptrie_buildImmutable + * @see ucptrie_openFromBinary + * @see ucptrie_getValueWidth + * @stable ICU 63 + */ +type ucpTrieValueWidth int8 + +const ( + /** + * For ucptrie_openFromBinary() to accept any data value width. + * ucptrie_getValueWidth() will return the actual data value width. + * @stable ICU 63 + */ + valueBitsAny ucpTrieValueWidth = iota - 1 + /** + * The trie stores 16 bits per data value. + * It returns them as unsigned values 0..0xffff=65535. + * @stable ICU 63 + */ + valueBits16 + /** + * The trie stores 32 bits per data value. + * @stable ICU 63 + */ + valueBits32 + /** + * The trie stores 8 bits per data value. + * It returns them as unsigned values 0..0xff=255. + * @stable ICU 63 + */ + valueBits8 +) + +const ucpTrieSig = 0x54726933 +const ucpTrieOESig = 0x33697254 + +/** + * Constants for use with UCPTrieHeader.options. + * @internal + */ +const ( + optionsDataLengthMask = 0xf000 + optionsDataNullOffsetMask = 0xf00 + optionsReservedMask = 0x38 + optionsValueBitsMask = 7 +) + +const ( + /** @internal */ + fastShift = 6 + + /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ + fastDataBlockLength = 1 << fastShift + + /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ + fastDataMask = fastDataBlockLength - 1 + + /** @internal */ + smallMax = 0xfff + + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for out-of-range code points and ill-formed UTF-8/16. + * @internal + */ + errorValueNegDataOffset = 1 + /** + * Offset from dataLength (to be subtracted) for fetching the + * value returned for code points highStart..U+10FFFF. + * @internal + */ + highValueNegDataOffset = 2 +) + +// Internal constants. +const ( + /** The length of the BMP index table. 1024=0x400 */ + bmpIndexLength = 0x10000 >> fastShift + + smallLimit = 0x1000 + smallIndexLength = smallLimit >> fastShift + + /** Shift size for getting the index-3 table offset. */ + ucpShift3 = 4 + + /** Shift size for getting the index-2 table offset. */ + ucpShift2 = 5 + ucpShift3 + + /** Shift size for getting the index-1 table offset. */ + ucpShift1 = 5 + ucpShift2 + + /** + * Difference between two shift sizes, + * for getting an index-2 offset from an index-3 offset. 5=9-4 + */ + ucpShift2Min3 = ucpShift2 - ucpShift3 + + /** + * Difference between two shift sizes, + * for getting an index-1 offset from an index-2 offset. 5=14-9 + */ + ucpShift1Min2 = ucpShift1 - ucpShift2 + + /** + * Number of index-1 entries for the BMP. (4) + * This part of the index-1 table is omitted from the serialized form. + */ + ucpOmittedBmpIndex1Length = 0x10000 >> ucpShift1 + + /** Number of entries in an index-2 block. 32=0x20 */ + ucpIndex2BlockLength = 1 << ucpShift1Min2 + + /** Mask for getting the lower bits for the in-index-2-block offset. */ + ucpIndex2Mask = ucpIndex2BlockLength - 1 + + /** Number of code points per index-2 table entry. 512=0x200 */ + ucpCpPerIndex2Entry = 1 << ucpShift2 + + /** Number of entries in an index-3 block. 32=0x20 */ + ucpIndex3BlockLength = 1 << ucpShift2Min3 + + /** Mask for getting the lower bits for the in-index-3-block offset. */ + ucpIndex3Mask = ucpIndex3BlockLength - 1 + + /** Number of entries in a small data block. 16=0x10 */ + ucpSmallDataBlockLength = 1 << ucpShift3 + + /** Mask for getting the lower bits for the in-small-data-block offset. */ + ucpSmallDataMask = ucpSmallDataBlockLength - 1 +) + +func UcpTrieFromBytes(bytes *udata.Bytes) (*UcpTrie, error) { + type ucpHeader struct { + /** "Tri3" in big-endian US-ASCII (0x54726933) */ + signature uint32 + + /** + * Options bit field: + * Bits 15..12: Data length bits 19..16. + * Bits 11..8: Data null block offset bits 19..16. + * Bits 7..6: UCPTrieType + * Bits 5..3: Reserved (0). + * Bits 2..0: UCPTrieValueWidth + */ + options uint16 + + /** Total length of the index tables. */ + indexLength uint16 + + /** Data length bits 15..0. */ + dataLength uint16 + + /** Index-3 null block offset, 0x7fff or 0xffff if none. */ + index3NullOffset uint16 + + /** Data null block offset bits 15..0, 0xfffff if none. */ + dataNullOffset uint16 + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by UCPTRIE_SHIFT_2. + */ + shiftedHighStart uint16 + } + + var header ucpHeader + header.signature = bytes.Uint32() + + switch header.signature { + case ucpTrieSig: + case ucpTrieOESig: + return nil, errors.New("unsupported: BigEndian encoding") + default: + return nil, fmt.Errorf("invalid signature for UcpTrie: 0x%08x", header.signature) + } + + header.options = bytes.Uint16() + header.indexLength = bytes.Uint16() + header.dataLength = bytes.Uint16() + header.index3NullOffset = bytes.Uint16() + header.dataNullOffset = bytes.Uint16() + header.shiftedHighStart = bytes.Uint16() + + typeInt := (header.options >> 6) & 3 + valueWidthInt := header.options & optionsValueBitsMask + if typeInt > uint16(typeSmall) || valueWidthInt > uint16(valueBits8) || + (header.options&optionsReservedMask) != 0 { + return nil, errors.New("invalid options for serialized UcpTrie") + } + actualType := ucpTrieType(typeInt) + actualValueWidth := ucpTrieValueWidth(valueWidthInt) + + trie := &UcpTrie{ + indexLength: int32(header.indexLength), + dataLength: int32(((header.options & optionsDataLengthMask) << 4) | header.dataLength), + index3NullOffset: header.index3NullOffset, + dataNullOffset: int32(((header.options & optionsDataNullOffsetMask) << 8) | header.dataNullOffset), + highStart: rune(header.shiftedHighStart) << ucpShift2, + typ: actualType, + valueWidth: actualValueWidth, + } + nullValueOffset := trie.dataNullOffset + if nullValueOffset >= trie.dataLength { + nullValueOffset = trie.dataLength - highValueNegDataOffset + } + + trie.shifted12HighStart = uint16((trie.highStart + 0xfff) >> 12) + trie.index = bytes.Uint16Slice(int32(header.indexLength)) + switch actualValueWidth { + case valueBits16: + trie.data16 = bytes.Uint16Slice(trie.dataLength) + trie.nullValue = uint32(trie.data16[nullValueOffset]) + case valueBits32: + trie.data32 = bytes.Uint32Slice(trie.dataLength) + trie.nullValue = trie.data32[nullValueOffset] + case valueBits8: + trie.data8 = bytes.Uint8Slice(trie.dataLength) + trie.nullValue = uint32(trie.data8[nullValueOffset]) + } + + return trie, nil +} + +func (t *UcpTrie) Get(c rune) uint32 { + var dataIndex int32 + if c <= 0x7f { + // linear ASCII + dataIndex = c + } else { + var fastMax rune + if t.typ == typeFast { + fastMax = 0xffff + } else { + fastMax = smallMax + } + dataIndex = t.cpIndex(fastMax, c) + } + return t.getValue(dataIndex) +} + +func (t *UcpTrie) getValue(dataIndex int32) uint32 { + switch t.valueWidth { + case valueBits16: + return uint32(t.data16[dataIndex]) + case valueBits32: + return t.data32[dataIndex] + case valueBits8: + return uint32(t.data8[dataIndex]) + default: + // Unreachable if the trie is properly initialized. + return 0xffffffff + } +} + +/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */ +func (t *UcpTrie) fastIndex(c rune) int32 { + return int32(t.index[c>>fastShift]) + (c & fastDataMask) +} + +/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */ +func (t *UcpTrie) smallIndex(c rune) int32 { + if c >= t.highStart { + return t.dataLength - highValueNegDataOffset + } + return t.internalSmallIndex(c) +} + +func (t *UcpTrie) internalSmallIndex(c rune) int32 { + i1 := c >> ucpShift1 + if t.typ == typeFast { + i1 += bmpIndexLength - ucpOmittedBmpIndex1Length + } else { + i1 += smallIndexLength + } + i3Block := int32(t.index[int32(t.index[i1])+((c>>ucpShift2)&ucpIndex2Mask)]) + i3 := (c >> ucpShift3) & ucpIndex3Mask + var dataBlock int32 + if (i3Block & 0x8000) == 0 { + // 16-bit indexes + dataBlock = int32(t.index[i3Block+i3]) + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + i3Block = (i3Block & 0x7fff) + (i3 & ^7) + (i3 >> 3) + i3 &= 7 + dataBlock = int32(t.index[i3Block]) << (2 + (2 * i3)) & 0x30000 + i3Block++ + dataBlock |= int32(t.index[i3Block+i3]) + } + return dataBlock + (c & ucpSmallDataMask) +} + +/** + * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF. + * Returns the data index. + * @internal + */ +func (t *UcpTrie) cpIndex(fastMax, c rune) int32 { + if c <= fastMax { + return t.fastIndex(c) + } + if c <= 0x10ffff { + return t.smallIndex(c) + } + return t.dataLength - errorValueNegDataOffset +} + +/** + * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates. + * Most users should use UCPMAP_RANGE_NORMAL. + * + * @see ucpmap_getRange + * @see ucptrie_getRange + * @see umutablecptrie_getRange + * @stable ICU 63 + */ +type UcpMapRangeOption int8 + +const ( + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map. + * Most users should use this option. + * @stable ICU 63 + */ + UcpMapRangeNormal UcpMapRangeOption = iota + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map, + * except that lead surrogates (U+D800..U+DBFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_LEAD(c). + * + * Most users should use UCPMAP_RANGE_NORMAL instead. + * + * This option is useful for maps that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * @stable ICU 63 + */ + UcpMapRangeFixedLeadSurrogates + /** + * ucpmap_getRange() enumerates all same-value ranges as stored in the map, + * except that all surrogates (U+D800..U+DFFF) are treated as having the + * surrogateValue, which is passed to getRange() as a separate parameter. + * The surrogateValue is not transformed via filter(). + * See U_IS_SURROGATE(c). + * + * Most users should use UCPMAP_RANGE_NORMAL instead. + * + * This option is useful for maps that map surrogate code *units* to + * special values optimized for UTF-16 string processing + * or for special error behavior for unpaired surrogates, + * but those values are not to be associated with the lead surrogate code *points*. + * @stable ICU 63 + */ + UcpMapRangeFixedAllSurrogates +) + +/** + * Callback function type: Modifies a map value. + * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange(). + * The modified value will be returned by the getRange function. + * + * Can be used to ignore some of the value bits, + * make a filter for one of several values, + * return a value index computed from the map value, etc. + * + * @param context an opaque pointer, as passed into the getRange function + * @param value a value from the map + * @return the modified value + * @stable ICU 63 + */ +type UcpMapValueFilter func(value uint32) uint32 + +/** + * GetRange returns the last code point such that all those from start to there have the same value. + * Can be used to efficiently iterate over all same-value ranges in a trie. + * (This is normally faster than iterating over code points and get()ting each value, + * but much slower than a data structure that stores ranges directly.) + * + * If the UCPMapValueFilter function pointer is not NULL, then + * the value to be delivered is passed through that function, and the return value is the end + * of the range where all values are modified to the same actual value. + * The value is unchanged if that function pointer is NULL. + * + * Example: + * \code + * UChar32 start = 0, end; + * uint32_t value; + * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0, + * NULL, NULL, &value)) >= 0) { + * // Work with the range start..end and its value. + * start = end + 1; + * } + * \endcode + * + * @param trie the trie + * @param start range start + * @param option defines whether surrogates are treated normally, + * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL + * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL + * @param filter a pointer to a function that may modify the trie data value, + * or NULL if the values from the trie are to be used unmodified + * @param context an opaque pointer that is passed on to the filter function + * @param pValue if not NULL, receives the value that every code point start..end has; + * may have been modified by filter(context, trie value) + * if that function pointer is not NULL + * @return the range end code point, or -1 if start is not a valid code point + * @stable ICU 63 + */ +func (t *UcpTrie) GetRange(start rune, option UcpMapRangeOption, surrogateValue uint32, filter UcpMapValueFilter) (rune, uint32) { + if option == UcpMapRangeNormal { + return t.getRange(start, filter) + } + + var surrEnd rune + if option == UcpMapRangeFixedAllSurrogates { + surrEnd = 0xdfff + } else { + surrEnd = 0xdbff + } + end, value := t.getRange(start, filter) + if end < 0xd7ff || start > surrEnd { + return end, value + } + if value == surrogateValue { + if end >= surrEnd { + // Surrogates followed by a non-surrogateValue range, + // or surrogates are part of a larger surrogateValue range. + return end, value + } + } else { + if start <= 0xd7ff { + return 0xd7ff, value // Non-surrogateValue range ends before surrogateValue surrogates. + } + // Start is a surrogate with a non-surrogateValue code *unit* value. + // Return a surrogateValue code *point* range. + value = surrogateValue + if end > surrEnd { + return surrEnd, value // Surrogate range ends before non-surrogateValue rest of range. + } + } + // See if the surrogateValue surrogate range can be merged with + // an immediately following range. + end2, value2 := t.getRange(surrEnd+1, filter) + if value2 == surrogateValue { + return end2, value + } + return surrEnd, value +} + +const maxUnicode = 0x10ffff + +func (t *UcpTrie) getRange(start rune, filter UcpMapValueFilter) (rune, uint32) { + if start > maxUnicode { + return -1, 0 + } + + if start >= t.highStart { + di := t.dataLength - highValueNegDataOffset + value := t.getValue(di) + if filter != nil { + value = filter(value) + } + return maxUnicode, value + } + + nullValue := t.nullValue + if filter != nil { + nullValue = filter(nullValue) + } + index := t.index + + prevI3Block := int32(-1) + prevBlock := int32(-1) + c := start + var trieValue uint32 + value := nullValue + haveValue := false + for { + var i3Block, i3, i3BlockLength, dataBlockLength int32 + if c <= 0xffff && (t.typ == typeFast || c <= smallMax) { + i3Block = 0 + i3 = c >> fastShift + if t.typ == typeFast { + i3BlockLength = bmpIndexLength + } else { + i3BlockLength = smallIndexLength + } + dataBlockLength = fastDataBlockLength + } else { + // Use the multi-stage index. + i1 := c >> ucpShift1 + if t.typ == typeFast { + i1 += bmpIndexLength - ucpOmittedBmpIndex1Length + } else { + i1 += smallIndexLength + } + shft := c >> ucpShift2 + idx := int32(t.index[i1]) + (shft & ucpIndex2Mask) + i3Block = int32(t.index[idx]) + if i3Block == prevI3Block && (c-start) >= ucpCpPerIndex2Entry { + // The index-3 block is the same as the previous one, and filled with value. + c += ucpCpPerIndex2Entry + continue + } + prevI3Block = i3Block + if i3Block == int32(t.index3NullOffset) { + // This is the index-3 null block. + if haveValue { + if nullValue != value { + return c - 1, value + } + } else { + trieValue = t.nullValue + value = nullValue + haveValue = true + } + prevBlock = t.dataNullOffset + c = (c + ucpCpPerIndex2Entry) & ^(ucpCpPerIndex2Entry - 1) + continue + } + i3 = (c >> ucpShift3) & ucpIndex3Mask + i3BlockLength = ucpIndex3BlockLength + dataBlockLength = ucpSmallDataBlockLength + } + + // Enumerate data blocks for one index-3 block. + for { + var block int32 + if (i3Block & 0x8000) == 0 { + block = int32(index[i3Block+i3]) + } else { + // 18-bit indexes stored in groups of 9 entries per 8 indexes. + group := (i3Block & 0x7fff) + (i3 & ^7) + (i3 >> 3) + gi := i3 & 7 + block = (int32(index[group]) << (2 + (2 * gi))) & 0x30000 + group++ + block |= int32(index[group+gi]) + } + if block == prevBlock && (c-start) >= dataBlockLength { + // The block is the same as the previous one, and filled with value. + c += dataBlockLength + } else { + dataMask := dataBlockLength - 1 + prevBlock = block + if block == t.dataNullOffset { + // This is the data null block. + if haveValue { + if nullValue != value { + return c - 1, value + } + } else { + trieValue = t.nullValue + value = nullValue + haveValue = true + } + c = (c + dataBlockLength) & ^dataMask + } else { + di := block + (c & dataMask) + trieValue2 := t.getValue(di) + if haveValue { + if trieValue2 != trieValue { + if filter == nil || maybeFilterValue(trieValue2, t.nullValue, nullValue, filter) != value { + return c - 1, value + } + trieValue = trieValue2 // may or may not help + } + } else { + trieValue = trieValue2 + value = maybeFilterValue(trieValue2, t.nullValue, nullValue, filter) + haveValue = true + } + for { + c++ + if c&dataMask == 0 { + break + } + di++ + trieValue2 = t.getValue(di) + if trieValue2 != trieValue { + if filter == nil || maybeFilterValue(trieValue2, t.nullValue, nullValue, filter) != value { + return c - 1, value + } + trieValue = trieValue2 // may or may not help + } + } + } + } + i3++ + if i3 >= i3BlockLength { + break + } + } + if c >= t.highStart { + break + } + } + + di := t.dataLength - highValueNegDataOffset + highValue := t.getValue(di) + if maybeFilterValue(highValue, t.nullValue, nullValue, filter) != value { + return c - 1, value + } + return maxUnicode, value +} + +func maybeFilterValue(value uint32, trieNullValue uint32, nullValue uint32, filter UcpMapValueFilter) uint32 { + if value == trieNullValue { + value = nullValue + } else if filter != nil { + value = filter(value) + } + return value +} diff --git a/go/mysql/icuregex/internal/utrie/utrie2.go b/go/mysql/icuregex/internal/utrie/utrie2.go new file mode 100644 index 00000000000..a2c80cf1c50 --- /dev/null +++ b/go/mysql/icuregex/internal/utrie/utrie2.go @@ -0,0 +1,440 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utrie + +import ( + "errors" + "fmt" + + "vitess.io/vitess/go/mysql/icuregex/internal/udata" + "vitess.io/vitess/go/mysql/icuregex/internal/utf16" +) + +type UTrie2 struct { + index []uint16 + data16 []uint16 + data32 []uint32 + + indexLength, dataLength int + index2NullOffset uint16 + dataNullOffset uint16 + InitialValue uint32 + ErrorValue uint32 + + HighStart rune + HighValueIndex int +} + +func (t *UTrie2) SerializedLength() int32 { + return 16 + int32(t.indexLength+t.dataLength)*2 +} + +func (t *UTrie2) getIndex(asciiOffset int, c rune) uint16 { + return t.index[t.indexFromCp(asciiOffset, c)] +} + +func (t *UTrie2) Get16(c rune) uint16 { + return t.getIndex(t.indexLength, c) +} + +func (t *UTrie2) indexFromCp(asciiOffset int, c rune) int { + switch { + case c < 0xd800: + return indexRaw(0, t.index, c) + case c <= 0xffff: + var offset int32 + if c <= 0xdbff { + offset = lscpIndex2Offset - (0xd800 >> shift2) + } + return indexRaw(offset, t.index, c) + case c > 0x10ffff: + return asciiOffset + badUtf8DataOffset + case c >= t.HighStart: + return t.HighValueIndex + default: + return indexFromSupp(t.index, c) + } +} + +type EnumRange func(start, end rune, value uint32) bool +type EnumValue func(value uint32) uint32 + +func (t *UTrie2) Enum(enumValue EnumValue, enumRange EnumRange) { + t.enumEitherTrie(0, 0x110000, enumValue, enumRange) +} + +func enumSameValue(value uint32) uint32 { + return value +} + +func min(a, b rune) rune { + if a < b { + return a + } + return b +} + +func (t *UTrie2) enumEitherTrie(start, limit rune, enumValue EnumValue, enumRange EnumRange) { + if enumRange == nil { + return + } + if enumValue == nil { + enumValue = enumSameValue + } + + /* frozen trie */ + var ( + idx = t.index + data32 = t.data32 + index2NullOffset = int(t.index2NullOffset) + nullBlock = int(t.dataNullOffset) + + c rune + prev = start + highStart = t.HighStart + + /* get the enumeration value that corresponds to an initial-value trie data entry */ + initialValue = enumValue(t.InitialValue) + + /* set variables for previous range */ + i2Block int + block int + prevI2Block = -1 + prevBlock = -1 + prevValue = uint32(0) + ) + + /* enumerate index-2 blocks */ + for c = start; c < limit && c < highStart; { + /* Code point limit for iterating inside this i2Block. */ + tempLimit := c + cpPerIndex1Entry + if limit < tempLimit { + tempLimit = limit + } + if c <= 0xffff { + if !utf16.IsSurrogate(c) { + i2Block = int(c >> shift2) + } else if utf16.IsSurrogateLead(c) { + /* + * Enumerate values for lead surrogate code points, not code units: + * This special block has half the normal length. + */ + i2Block = lscpIndex2Offset + tempLimit = min(0xdc00, limit) + } else { + /* + * Switch back to the normal part of the index-2 table. + * Enumerate the second half of the surrogates block. + */ + i2Block = 0xd800 >> shift2 + tempLimit = min(0xe000, limit) + } + } else { + /* supplementary code points */ + i2Block = int(idx[(index1Offset-omittedBmpIndex1Length)+(c>>shift1)]) + if i2Block == prevI2Block && (c-prev) >= cpPerIndex1Entry { + /* + * The index-2 block is the same as the previous one, and filled with prevValue. + * Only possible for supplementary code points because the linear-BMP index-2 + * table creates unique i2Block values. + */ + c += cpPerIndex1Entry + continue + } + } + prevI2Block = i2Block + if i2Block == index2NullOffset { + /* this is the null index-2 block */ + if prevValue != initialValue { + if prev < c && !enumRange(prev, c-1, prevValue) { + return + } + prevBlock = nullBlock + prev = c + prevValue = initialValue + } + c += cpPerIndex1Entry + } else { + /* enumerate data blocks for one index-2 block */ + var i2Limit int + if (c >> shift1) == (tempLimit >> shift1) { + i2Limit = int(tempLimit>>shift2) & index2Mask + } else { + i2Limit = index2BlockLength + } + for i2 := int(c>>shift2) & index2Mask; i2 < i2Limit; i2++ { + block = int(idx[i2Block+i2] << indexShift) + if block == prevBlock && (c-prev) >= dataBlockLength { + /* the block is the same as the previous one, and filled with prevValue */ + c += dataBlockLength + continue + } + prevBlock = block + if block == nullBlock { + /* this is the null data block */ + if prevValue != initialValue { + if prev < c && !enumRange(prev, c-1, prevValue) { + return + } + prev = c + prevValue = initialValue + } + c += dataBlockLength + } else { + for j := 0; j < dataBlockLength; j++ { + var value uint32 + if data32 != nil { + value = data32[block+j] + } else { + value = uint32(idx[block+j]) + } + value = enumValue(value) + if value != prevValue { + if prev < c && !enumRange(prev, c-1, prevValue) { + return + } + prev = c + prevValue = value + } + c++ + } + } + } + } + } + + if c > limit { + c = limit /* could be higher if in the index2NullOffset */ + } else if c < limit { + /* c==highStart>shift1)]) + return (int(index[i1+int((c>>shift2)&index2Mask)]) << indexShift) + int(c&dataMask) +} + +func indexRaw(offset int32, index []uint16, c rune) int { + return int(index[offset+(c>>shift2)]<> shift1 + + /** Number of code points per index-1 table entry. 2048=0x800 */ + cpPerIndex1Entry = 1 << shift1 + + /** Number of entries in an index-2 block. 64=0x40 */ + index2BlockLength = 1 << shift1min2 + + /** Mask for getting the lower bits for the in-index-2-block offset. */ + index2Mask = index2BlockLength - 1 + + /** Number of entries in a data block. 32=0x20 */ + dataBlockLength = 1 << shift2 + + /** Mask for getting the lower bits for the in-data-block offset. */ + dataMask = dataBlockLength - 1 + + /** + * Shift size for shifting left the index array values. + * Increases possible data size with 16-bit index values at the cost + * of compactability. + * This requires data blocks to be aligned by UTRIE2_DATA_GRANULARITY. + */ + indexShift = 2 + + /** The alignment size of a data block. Also the granularity for compaction. */ + dataGranularity = 1 << indexShift + + /* Fixed layout of the first part of the index array. ------------------- */ + + /** + * The part of the index-2 table for U+D800..U+DBFF stores values for + * lead surrogate code _units_ not code _points_. + * Values for lead surrogate code _points_ are indexed with this portion of the table. + * Length=32=0x20=0x400>>UTRIE2_SHIFT_2. (There are 1024=0x400 lead surrogates.) + */ + lscpIndex2Offset = 0x10000 >> shift2 + lscpIndex2Length = 0x400 >> shift2 + + /** Count the lengths of both BMP pieces. 2080=0x820 */ + index2BmpLength = lscpIndex2Offset + lscpIndex2Length + + /** + * The 2-byte UTF-8 version of the index-2 table follows at offset 2080=0x820. + * Length 32=0x20 for lead bytes C0..DF, regardless of UTRIE2_SHIFT_2. + */ + utf82BIndex2Offset = index2BmpLength + utf82BIndex2Length = 0x800 >> 6 /* U+0800 is the first code point after 2-byte UTF-8 */ + + /** + * The index-1 table, only used for supplementary code points, at offset 2112=0x840. + * Variable length, for code points up to highStart, where the last single-value range starts. + * Maximum length 512=0x200=0x100000>>UTRIE2_SHIFT_1. + * (For 0x100000 supplementary code points U+10000..U+10ffff.) + * + * The part of the index-2 table for supplementary code points starts + * after this index-1 table. + * + * Both the index-1 table and the following part of the index-2 table + * are omitted completely if there is only BMP data. + */ + index1Offset = utf82BIndex2Offset + utf82BIndex2Length + maxIndex1Length = 0x100000 >> shift1 + + /* + * Fixed layout of the first part of the data array. ----------------------- + * Starts with 4 blocks (128=0x80 entries) for ASCII. + */ + + /** + * The illegal-UTF-8 data block follows the ASCII block, at offset 128=0x80. + * Used with linear access for single bytes 0..0xbf for simple error handling. + * Length 64=0x40, not UTRIE2_DATA_BLOCK_LENGTH. + */ + badUtf8DataOffset = 0x80 +) + +func UTrie2FromBytes(bytes *udata.Bytes) (*UTrie2, error) { + type utrie2Header struct { + /** "Tri2" in big-endian US-ASCII (0x54726932) */ + signature uint32 + + /** + * options bit field: + * 15.. 4 reserved (0) + * 3.. 0 UTrie2ValueBits valueBits + */ + options uint16 + + /** UTRIE2_INDEX_1_OFFSET..UTRIE2_MAX_INDEX_LENGTH */ + indexLength uint16 + + /** (UTRIE2_DATA_START_OFFSET..UTRIE2_MAX_DATA_LENGTH)>>UTRIE2_INDEX_SHIFT */ + shiftedDataLength uint16 + + /** Null index and data blocks, not shifted. */ + index2NullOffset, dataNullOffset uint16 + + /** + * First code point of the single-value range ending with U+10ffff, + * rounded up and then shifted right by UTRIE2_SHIFT_1. + */ + shiftedHighStart uint16 + } + + var header utrie2Header + header.signature = bytes.Uint32() + + switch header.signature { + case 0x54726932: + case 0x32697254: + return nil, errors.New("unsupported: BigEndian encoding") + default: + return nil, fmt.Errorf("invalid signature for Trie2: 0x%08x", header.signature) + } + + header.options = bytes.Uint16() + header.indexLength = bytes.Uint16() + header.shiftedDataLength = bytes.Uint16() + header.index2NullOffset = bytes.Uint16() + header.dataNullOffset = bytes.Uint16() + header.shiftedHighStart = bytes.Uint16() + + var width int + switch header.options & 0xf { + case 0: + width = 16 + case 1: + width = 32 + default: + return nil, errors.New("invalid width for serialized UTrie2") + } + + trie := &UTrie2{ + indexLength: int(header.indexLength), + dataLength: int(header.shiftedDataLength) << indexShift, + index2NullOffset: header.index2NullOffset, + dataNullOffset: header.dataNullOffset, + HighStart: rune(header.shiftedHighStart) << shift1, + } + + trie.HighValueIndex = trie.dataLength - dataGranularity + if width == 16 { + trie.HighValueIndex += trie.indexLength + } + + indexArraySize := trie.indexLength + if width == 16 { + indexArraySize += trie.dataLength + } + + trie.index = bytes.Uint16Slice(int32(indexArraySize)) + + if width == 16 { + trie.data16 = trie.index[trie.indexLength:] + trie.InitialValue = uint32(trie.index[trie.dataNullOffset]) + trie.ErrorValue = uint32(trie.index[trie.indexLength+badUtf8DataOffset]) + } else { + trie.data32 = bytes.Uint32Slice(int32(trie.dataLength)) + trie.InitialValue = trie.data32[trie.dataNullOffset] + trie.ErrorValue = trie.data32[badUtf8DataOffset] + } + + return trie, nil +} diff --git a/go/mysql/icuregex/matcher.go b/go/mysql/icuregex/matcher.go new file mode 100644 index 00000000000..11fbc152d73 --- /dev/null +++ b/go/mysql/icuregex/matcher.go @@ -0,0 +1,1655 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +import ( + "fmt" + "io" + + "vitess.io/vitess/go/mysql/icuregex/internal/ucase" + "vitess.io/vitess/go/mysql/icuregex/internal/uchar" + "vitess.io/vitess/go/mysql/icuregex/internal/uprops" +) + +const timerInitialValue = 10000 +const defaultTimeout = 3 +const defaultStackLimit = 0 + +type Matcher struct { + pattern *Pattern + + input []rune + + regionStart int // Start of the input region, default = 0. + regionLimit int // End of input region, default to input.length. + + anchorStart int // Region bounds for anchoring operations (^ or $). + anchorLimit int // See useAnchoringBounds + + lookStart int // Region bounds for look-ahead/behind and + lookLimit int // and other boundary tests. See + // useTransparentBounds + + activeStart int // Currently active bounds for matching. + activeLimit int // Usually is the same as region, but + // is changed to fLookStart/Limit when + // entering look around regions. + + match bool // True if the last attempted match was successful. + matchStart int // Position of the start of the most recent match + matchEnd int // First position after the end of the most recent match + // Zero if no previous match, even when a region + // is active. + lastMatchEnd int // First position after the end of the previous match, + // or -1 if there was no previous match. + appendPosition int // First position after the end of the previous + // appendReplacement(). As described by the + // JavaDoc for Java Matcher, where it is called + // "append position" + hitEnd bool // True if the last match touched the end of input. + requireEnd bool // True if the last match required end-of-input + // (matched $ or Z) + + stack stack + frame stackFrame // After finding a match, the last active stack frame, + // which will contain the capture group results. + // NOT valid while match engine is running. + + data []int // Data area for use by the compiled pattern. + + timeLimit int32 // Max time (in arbitrary steps) to let the + // match engine run. Zero for unlimited. + + time int32 // Match time, accumulates while matching. + tickCounter int32 // Low bits counter for time. Counts down StateSaves. + // Kept separately from fTime to keep as much + // code as possible out of the inline + // StateSave function. + + dumper io.Writer +} + +func NewMatcher(pat *Pattern) *Matcher { + m := &Matcher{ + pattern: pat, + data: make([]int, pat.dataSize), + stack: stack{ + frameSize: pat.frameSize, + stackLimit: defaultStackLimit, + }, + timeLimit: defaultTimeout, + } + m.reset() + return m +} + +func (m *Matcher) MatchAt(startIdx int, toEnd bool) error { + //-------------------------------------------------------------------------------- + // + // MatchAt This is the actual matching engine. + // + // startIdx: begin matching a this index. + // toEnd: if true, match must extend to end of the input region + // + //-------------------------------------------------------------------------------- + var err error + var isMatch bool // True if the we have a match. + + if m.dumper != nil { + fmt.Fprintf(m.dumper, "MatchAt(startIdx=%d)\n", startIdx) + fmt.Fprintf(m.dumper, "Original Pattern: \"%s\"\n", m.pattern.pattern) + fmt.Fprintf(m.dumper, "Input String: \"%s\"\n\n", string(m.input)) + } + + pat := m.pattern.compiledPat + inputText := m.input + litText := m.pattern.literalText + sets := m.pattern.sets + + fp := m.resetStack() + *fp.inputIdx() = startIdx + *fp.patIdx() = 0 + for i := 0; i < len(m.data); i++ { + m.data[i] = 0 + } + + for { + op := pat[*fp.patIdx()] + + if m.dumper != nil { + fmt.Fprintf(m.dumper, "inputIdx=%d inputChar=%x sp=%3d activeLimit=%d ", *fp.inputIdx(), + charAt(inputText, *fp.inputIdx()), m.stack.sp(), m.activeLimit) + m.pattern.dumpOp(m.dumper, *fp.patIdx()) + } + + *fp.patIdx()++ + + switch op.typ() { + case urxNop: + // Nothing to do. + case urxBacktrack: + // Force a backtrack. In some circumstances, the pattern compiler + // will notice that the pattern can't possibly match anything, and will + // emit one of these at that point. + fp = m.stack.popFrame() + case urxOnechar: + if *fp.inputIdx() < m.activeLimit { + c := charAt(inputText, *fp.inputIdx()) + *fp.inputIdx()++ + if c == rune(op.value()) { + break + } + } else { + m.hitEnd = true + } + fp = m.stack.popFrame() + case urxString: + // Test input against a literal string. + // Strings require two slots in the compiled pattern, one for the + // offset to the string text, and one for the length. + stringStartIdx := op.value() + nextOp := pat[*fp.patIdx()] // Fetch the second operand + *fp.patIdx()++ + stringLen := nextOp.value() + + patternString := litText[stringStartIdx:] + var patternStringIndex int + success := true + for patternStringIndex < stringLen { + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + success = false + break + } + if charAt(patternString, patternStringIndex) != charAt(inputText, *fp.inputIdx()) { + success = false + break + } + patternStringIndex++ + *fp.inputIdx()++ + } + + if !success { + fp = m.stack.popFrame() + } + case urxStateSave: + fp, err = m.stateSave(*fp.inputIdx(), op.value()) + if err != nil { + return err + } + case urxEnd: + // The match loop will exit via this path on a successful match, + // when we reach the end of the pattern. + if toEnd && *fp.inputIdx() != m.activeLimit { + // The pattern matched, but not to the end of input. Try some more. + fp = m.stack.popFrame() + break + } + isMatch = true + goto breakFromLoop + + // Start and End Capture stack frame variables are laid out out like this: + // fp->fExtra[opValue] - The start of a completed capture group + // opValue+1 - The end of a completed capture group + // opValue+2 - the start of a capture group whose end + // has not yet been reached (and might not ever be). + case urxStartCapture: + *fp.extra(op.value() + 2) = *fp.inputIdx() + case urxEndCapture: + *fp.extra(op.value()) = *fp.extra(op.value() + 2) // Tentative start becomes real. + *fp.extra(op.value() + 1) = *fp.inputIdx() // End position + + case urxDollar: // $, test for End of line + if *fp.inputIdx() < m.anchorLimit-2 { + fp = m.stack.popFrame() + break + } + // or for position before new line at end of input + if *fp.inputIdx() >= m.anchorLimit { + // We really are at the end of input. Success. + m.hitEnd = true + m.requireEnd = true + break + } + + if *fp.inputIdx() == m.anchorLimit-1 { + c := m.input[*fp.inputIdx()] + if isLineTerminator(c) { + if !(c == 0x0a && *fp.inputIdx() > m.anchorStart && m.input[*fp.inputIdx()-1] == 0x0d) { + // At new-line at end of input. Success + m.hitEnd = true + m.requireEnd = true + break + } + } + } else if *fp.inputIdx() == m.anchorLimit-2 && m.input[*fp.inputIdx()] == 0x0d && m.input[*fp.inputIdx()+1] == 0x0a { + m.hitEnd = true + m.requireEnd = true + break // At CR/LF at end of input. Success + } + fp = m.stack.popFrame() + + case urxDollarD: // $, test for End of Line, in UNIX_LINES mode. + if *fp.inputIdx() >= m.anchorLimit { + // Off the end of input. Success. + m.hitEnd = true + m.requireEnd = true + break + } + c := charAt(inputText, *fp.inputIdx()) + *fp.inputIdx()++ + // Either at the last character of input, or off the end. + if c == 0x0a && *fp.inputIdx() == m.anchorLimit { + m.hitEnd = true + m.requireEnd = true + break + } + + // Not at end of input. Back-track out. + fp = m.stack.popFrame() + case urxDollarM: // $, test for End of line in multi-line mode + if *fp.inputIdx() >= m.anchorLimit { + // We really are at the end of input. Success. + m.hitEnd = true + m.requireEnd = true + break + } + // If we are positioned just before a new-line, succeed. + // It makes no difference where the new-line is within the input. + c := charAt(inputText, *fp.inputIdx()) + if isLineTerminator(c) { + // At a line end, except for the odd chance of being in the middle of a CR/LF sequence + // In multi-line mode, hitting a new-line just before the end of input does not + // set the hitEnd or requireEnd flags + if !(c == 0x0a && *fp.inputIdx() > m.anchorStart && charAt(inputText, *fp.inputIdx()-1) == 0x0d) { + break + } + } + // not at a new line. Fail. + fp = m.stack.popFrame() + case urxDollarMd: // $, test for End of line in multi-line and UNIX_LINES mode + if *fp.inputIdx() >= m.anchorLimit { + // We really are at the end of input. Success. + m.hitEnd = true + m.requireEnd = true // Java set requireEnd in this case, even though + break // adding a new-line would not lose the match. + } + // If we are not positioned just before a new-line, the test fails; backtrack out. + // It makes no difference where the new-line is within the input. + if charAt(inputText, *fp.inputIdx()) != 0x0a { + fp = m.stack.popFrame() + } + case urxCaret: // ^, test for start of line + if *fp.inputIdx() != m.anchorStart { + fp = m.stack.popFrame() + } + case urxCaretM: // ^, test for start of line in mulit-line mode + if *fp.inputIdx() == m.anchorStart { + // We are at the start input. Success. + break + } + // Check whether character just before the current pos is a new-line + // unless we are at the end of input + c := charAt(inputText, *fp.inputIdx()-1) + if (*fp.inputIdx() < m.anchorLimit) && isLineTerminator(c) { + // It's a new-line. ^ is true. Success. + // TODO: what should be done with positions between a CR and LF? + break + } + // Not at the start of a line. Fail. + fp = m.stack.popFrame() + case urxCaretMUnix: // ^, test for start of line in mulit-line + Unix-line mode + if *fp.inputIdx() <= m.anchorStart { + // We are at the start input. Success. + break + } + + c := charAt(inputText, *fp.inputIdx()-1) + if c != 0x0a { + // Not at the start of a line. Back-track out. + fp = m.stack.popFrame() + } + case urxBackslashB: // Test for word boundaries + success := m.isWordBoundary(*fp.inputIdx()) + success = success != (op.value() != 0) // flip sense for \B + if !success { + fp = m.stack.popFrame() + } + case urxBackslashBu: // Test for word boundaries, Unicode-style + success := m.isUWordBoundary(*fp.inputIdx()) + success = success != (op.value() != 0) // flip sense for \B + if !success { + fp = m.stack.popFrame() + } + case urxBackslashD: // Test for decimal digit + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + fp = m.stack.popFrame() + break + } + + c := charAt(inputText, *fp.inputIdx()) + + success := m.isDecimalDigit(c) + success = success != (op.value() != 0) // flip sense for \D + if success { + *fp.inputIdx()++ + } else { + fp = m.stack.popFrame() + } + + case urxBackslashG: // Test for position at end of previous match + if !((m.match && *fp.inputIdx() == m.matchEnd) || (!m.match && *fp.inputIdx() == m.activeStart)) { + fp = m.stack.popFrame() + } + + case urxBackslashH: // Test for \h, horizontal white space. + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + fp = m.stack.popFrame() + break + } + + c := charAt(inputText, *fp.inputIdx()) + success := m.isHorizWS(c) || c == 9 + success = success != (op.value() != 0) // flip sense for \H + if success { + *fp.inputIdx()++ + } else { + fp = m.stack.popFrame() + } + + case urxBackslashR: // Test for \R, any line break sequence. + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + fp = m.stack.popFrame() + break + } + c := charAt(inputText, *fp.inputIdx()) + if isLineTerminator(c) { + if c == 0x0d && charAt(inputText, *fp.inputIdx()+1) == 0x0a { + *fp.inputIdx()++ + } + *fp.inputIdx()++ + } else { + fp = m.stack.popFrame() + } + + case urxBackslashV: // \v, any single line ending character. + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + fp = m.stack.popFrame() + break + } + c := charAt(inputText, *fp.inputIdx()) + success := isLineTerminator(c) + success = success != (op.value() != 0) // flip sense for \V + if success { + *fp.inputIdx()++ + } else { + fp = m.stack.popFrame() + } + + case urxBackslashX: + // Match a Grapheme, as defined by Unicode UAX 29. + + // Fail if at end of input + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + fp = m.stack.popFrame() + break + } + + *fp.inputIdx() = m.followingGCBoundary(*fp.inputIdx()) + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + *fp.inputIdx() = m.activeLimit + } + + case urxBackslashZ: // Test for end of Input + if *fp.inputIdx() < m.anchorLimit { + fp = m.stack.popFrame() + } else { + m.hitEnd = true + m.requireEnd = true + } + case urxStaticSetref: + // Test input character against one of the predefined sets + // (Word Characters, for example) + // The high bit of the op value is a flag for the match polarity. + // 0: success if input char is in set. + // 1: success if input char is not in set. + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + fp = m.stack.popFrame() + break + } + + success := (op.value() & urxNegSet) == urxNegSet + negOp := op.value() & ^urxNegSet + + c := charAt(inputText, *fp.inputIdx()) + s := staticPropertySets[negOp] + if s.ContainsRune(c) { + success = !success + } + + if success { + *fp.inputIdx()++ + } else { + // the character wasn't in the set. + fp = m.stack.popFrame() + } + case urxStatSetrefN: + // Test input character for NOT being a member of one of + // the predefined sets (Word Characters, for example) + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + fp = m.stack.popFrame() + break + } + + c := charAt(inputText, *fp.inputIdx()) + s := staticPropertySets[op.value()] + if !s.ContainsRune(c) { + *fp.inputIdx()++ + break + } + // the character wasn't in the set. + fp = m.stack.popFrame() + + case urxSetref: + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + fp = m.stack.popFrame() + break + } + + // There is input left. Pick up one char and test it for set membership. + c := charAt(inputText, *fp.inputIdx()) + + s := sets[op.value()] + if s.ContainsRune(c) { + *fp.inputIdx()++ + break + } + + // the character wasn't in the set. + fp = m.stack.popFrame() + + case urxDotany: + // . matches anything, but stops at end-of-line. + if *fp.inputIdx() >= m.activeLimit { + m.hitEnd = true + fp = m.stack.popFrame() + break + } + + c := charAt(inputText, *fp.inputIdx()) + if isLineTerminator(c) { + // End of line in normal mode. . does not match. + fp = m.stack.popFrame() + break + } + *fp.inputIdx()++ + + case urxDotanyAll: + // ., in dot-matches-all (including new lines) mode + if *fp.inputIdx() >= m.activeLimit { + // At end of input. Match failed. Backtrack out. + m.hitEnd = true + fp = m.stack.popFrame() + break + } + + c := charAt(inputText, *fp.inputIdx()) + *fp.inputIdx()++ + if c == 0x0d && *fp.inputIdx() < m.activeLimit { + // In the case of a CR/LF, we need to advance over both. + nextc := charAt(inputText, *fp.inputIdx()) + if nextc == 0x0a { + *fp.inputIdx()++ + } + } + + case urxDotanyUnix: + // '.' operator, matches all, but stops at end-of-line. + // UNIX_LINES mode, so 0x0a is the only recognized line ending. + if *fp.inputIdx() >= m.activeLimit { + // At end of input. Match failed. Backtrack out. + m.hitEnd = true + fp = m.stack.popFrame() + break + } + + // There is input left. Advance over one char, unless we've hit end-of-line + c := charAt(inputText, *fp.inputIdx()) + if c == 0x0a { + // End of line in normal mode. '.' does not match the \n + fp = m.stack.popFrame() + } else { + *fp.inputIdx()++ + } + case urxJmp: + *fp.patIdx() = op.value() + + case urxFail: + isMatch = false + goto breakFromLoop + + case urxJmpSav: + fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()) // State save to loc following current + if err != nil { + return err + } + *fp.patIdx() = op.value() // Then JMP. + + case urxJmpSavX: + // This opcode is used with (x)+, when x can match a zero length string. + // Same as JMP_SAV, except conditional on the match having made forward progress. + // Destination of the JMP must be a URX_STO_INP_LOC, from which we get the + // data address of the input position at the start of the loop. + stoOp := pat[op.value()-1] + frameLoc := stoOp.value() + + prevInputIdx := *fp.extra(frameLoc) + if prevInputIdx < *fp.inputIdx() { + // The match did make progress. Repeat the loop. + fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()) // State save to loc following current + if err != nil { + return err + } + *fp.patIdx() = op.value() // Then JMP. + *fp.extra(frameLoc) = *fp.inputIdx() + } + // If the input position did not advance, we do nothing here, + // execution will fall out of the loop. + + case urxCtrInit: + *fp.extra(op.value()) = 0 // Set the loop counter variable to zero + + // Pick up the three extra operands that CTR_INIT has, and + // skip the pattern location counter past + instOperandLoc := *fp.patIdx() + *fp.patIdx() += 3 // Skip over the three operands that CTR_INIT has. + + loopLoc := pat[instOperandLoc].value() + minCount := int(pat[instOperandLoc+1]) + maxCount := int(pat[instOperandLoc+2]) + + if minCount == 0 { + fp, err = m.stateSave(*fp.inputIdx(), loopLoc+1) + if err != nil { + return err + } + } + if maxCount == -1 { + *fp.extra(op.value() + 1) = *fp.inputIdx() // For loop breaking. + } else if maxCount == 0 { + fp = m.stack.popFrame() + } + + case utxCtrLoop: + initOp := pat[op.value()] + opValue := initOp.value() + pCounter := fp.extra(opValue) + minCount := int(pat[op.value()+2]) + maxCount := int(pat[op.value()+3]) + *pCounter++ + if *pCounter >= maxCount && maxCount != -1 { + break + } + + if *pCounter >= minCount { + if maxCount == -1 { + // Loop has no hard upper bound. + // Check that it is progressing through the input, break if it is not. + pLastIntputIdx := fp.extra(opValue + 1) + if *pLastIntputIdx == *fp.inputIdx() { + break + } + *pLastIntputIdx = *fp.inputIdx() + } + fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()) + if err != nil { + return err + } + } else { + // Increment time-out counter. (StateSave() does it if count >= minCount) + m.tickCounter-- + if m.tickCounter <= 0 { + if err = m.incrementTime(*fp.inputIdx()); err != nil { + return err + } // Re-initializes fTickCounter + } + } + + *fp.patIdx() = op.value() + 4 // Loop back. + + case urxCtrInitNg: + *fp.extra(op.value()) = 0 // Set the loop counter variable to zero + + // Pick up the three extra operands that CTR_INIT_NG has, and + // skip the pattern location counter past + instrOperandLoc := *fp.patIdx() + *fp.patIdx() += 3 + loopLoc := pat[instrOperandLoc].value() + minCount := pat[instrOperandLoc+1].value() + maxCount := pat[instrOperandLoc+2].value() + + if maxCount == -1 { + *fp.extra(op.value() + 1) = *fp.inputIdx() // Save initial input index for loop breaking. + } + + if minCount == 0 { + if maxCount != 0 { + fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()) + if err != nil { + return err + } + } + *fp.patIdx() = loopLoc + 1 + } + + case urxCtrLoopNg: + initOp := pat[op.value()] + pCounter := fp.extra(initOp.value()) + minCount := int(pat[op.value()+2]) + maxCount := int(pat[op.value()+3]) + *pCounter++ + if *pCounter >= maxCount && maxCount != -1 { + // The loop has matched the maximum permitted number of times. + // Break out of here with no action. Matching will + // continue with the following pattern. + break + } + + if *pCounter < minCount { + // We haven't met the minimum number of matches yet. + // Loop back for another one. + *fp.patIdx() = op.value() + 4 // Loop back. + // Increment time-out counter. (StateSave() does it if count >= minCount) + m.tickCounter-- + if m.tickCounter <= 0 { + if err = m.incrementTime(*fp.inputIdx()); err != nil { + return err + } // Re-initializes fTickCounter + } + } else { + // We do have the minimum number of matches. + + // If there is no upper bound on the loop iterations, check that the input index + // is progressing, and stop the loop if it is not. + if maxCount == -1 { + lastInputIdx := fp.extra(initOp.value() + 1) + if *fp.inputIdx() == *lastInputIdx { + break + } + *lastInputIdx = *fp.inputIdx() + } + } + + // Loop Continuation: we will fall into the pattern following the loop + // (non-greedy, don't execute loop body first), but first do + // a state save to the top of the loop, so that a match failure + // in the following pattern will try another iteration of the loop. + fp, err = m.stateSave(*fp.inputIdx(), op.value()+4) + if err != nil { + return err + } + + case urxStoSp: + m.data[op.value()] = m.stack.len() + + case urxLdSp: + newStackSize := m.data[op.value()] + newFp := m.stack.offset(newStackSize) + if newFp.equals(fp) { + break + } + copy(newFp, fp) + fp = newFp + + m.stack.setSize(newStackSize) + case urxBackref: + groupStartIdx := *fp.extra(op.value()) + groupEndIdx := *fp.extra(op.value() + 1) + + if groupStartIdx < 0 { + // This capture group has not participated in the match thus far, + fp = m.stack.popFrame() // FAIL, no match. + break + } + + success := true + for { + if groupStartIdx >= groupEndIdx { + success = true + break + } + + if *fp.inputIdx() >= m.activeLimit { + success = false + m.hitEnd = true + break + } + + captureGroupChar := charAt(inputText, groupStartIdx) + inputChar := charAt(inputText, *fp.inputIdx()) + groupStartIdx++ + *fp.inputIdx()++ + if inputChar != captureGroupChar { + success = false + break + } + } + + if !success { + fp = m.stack.popFrame() + } + case urxBackrefI: + groupStartIdx := *fp.extra(op.value()) + groupEndIdx := *fp.extra(op.value() + 1) + + if groupStartIdx < 0 { + // This capture group has not participated in the match thus far, + fp = m.stack.popFrame() // FAIL, no match. + break + } + + captureGroupItr := newCaseFoldIterator(m.input, groupStartIdx, groupEndIdx) + inputItr := newCaseFoldIterator(m.input, *fp.inputIdx(), m.activeLimit) + success := true + + for { + captureGroupChar := captureGroupItr.next() + if captureGroupChar == -1 { + success = true + break + } + inputChar := inputItr.next() + if inputChar == -1 { + success = false + m.hitEnd = true + break + } + if inputChar != captureGroupChar { + success = false + break + } + } + + if success && inputItr.inExpansion() { + // We otained a match by consuming part of a string obtained from + // case-folding a single code point of the input text. + // This does not count as an overall match. + success = false + } + + if success { + *fp.inputIdx() = inputItr.index + } else { + fp = m.stack.popFrame() + } + + case urxStoInpLoc: + *fp.extra(op.value()) = *fp.inputIdx() + + case urxJmpx: + instrOperandLoc := *fp.patIdx() + *fp.patIdx()++ + dataLoc := pat[instrOperandLoc].value() + + saveInputIdx := *fp.extra(dataLoc) + + if saveInputIdx < *fp.inputIdx() { + *fp.patIdx() = op.value() // JMP + } else { + fp = m.stack.popFrame() // FAIL, no progress in loop. + } + + case urxLaStart: + m.data[op.value()] = m.stack.len() + m.data[op.value()+1] = *fp.inputIdx() + m.data[op.value()+2] = m.activeStart + m.data[op.value()+3] = m.activeLimit + m.activeStart = m.lookStart // Set the match region change for + m.activeLimit = m.lookLimit // transparent bounds. + + case urxLaEnd: + stackSize := m.stack.len() + newStackSize := m.data[op.value()] + if stackSize > newStackSize { + // Copy the current top frame back to the new (cut back) top frame. + // This makes the capture groups from within the look-ahead + // expression available. + newFp := m.stack.offset(newStackSize) + copy(newFp, fp) + fp = newFp + m.stack.setSize(newStackSize) + } + + *fp.inputIdx() = m.data[op.value()+1] + + m.activeStart = m.data[op.value()+2] + m.activeLimit = m.data[op.value()+3] + + case urcOnecharI: + // Case insensitive one char. The char from the pattern is already case folded. + // Input text is not, but case folding the input can not reduce two or more code + // points to one. + if *fp.inputIdx() < m.activeLimit { + c := charAt(inputText, *fp.inputIdx()) + if ucase.Fold(c) == op.value32() { + *fp.inputIdx()++ + break + } + } else { + m.hitEnd = true + } + + fp = m.stack.popFrame() + + case urxStringI: + // Case-insensitive test input against a literal string. + // Strings require two slots in the compiled pattern, one for the + // offset to the string text, and one for the length. + // The compiled string has already been case folded. + patternString := litText[op.value():] + var patternStringIdx int + nextOp := pat[*fp.patIdx()] + *fp.patIdx()++ + patternStringLen := nextOp.value() + + success := true + + it := newCaseFoldIterator(inputText, *fp.inputIdx(), m.activeLimit) + for patternStringIdx < patternStringLen { + cText := it.next() + cPattern := patternString[patternStringIdx] + patternStringIdx++ + + if cText != cPattern { + success = false + if cText == -1 { + m.hitEnd = true + } + break + } + } + if it.inExpansion() { + success = false + } + + if success { + *fp.inputIdx() = it.index + } else { + fp = m.stack.popFrame() + } + + case urxLbStart: + // Entering a look-behind block. + // Save Stack Ptr, Input Pos and active input region. + // TODO: implement transparent bounds. Ticket #6067 + m.data[op.value()] = m.stack.len() + m.data[op.value()+1] = *fp.inputIdx() + // Save input string length, then reset to pin any matches to end at + // the current position. + m.data[op.value()+2] = m.activeStart + m.data[op.value()+3] = m.activeLimit + m.activeStart = m.regionStart + m.activeLimit = *fp.inputIdx() + // Init the variable containing the start index for attempted matches. + m.data[op.value()+4] = -1 + case urxLbCont: + // Positive Look-Behind, at top of loop checking for matches of LB expression + // at all possible input starting positions. + + // Fetch the min and max possible match lengths. They are the operands + // of this op in the pattern. + minML := pat[*fp.patIdx()] + *fp.patIdx()++ + maxML := pat[*fp.patIdx()] + *fp.patIdx()++ + + lbStartIdx := &m.data[op.value()+4] + if *lbStartIdx < 0 { + // First time through loop. + *lbStartIdx = *fp.inputIdx() - int(minML) + if *lbStartIdx > 0 { + *lbStartIdx = *fp.inputIdx() + } + } else { + // 2nd through nth time through the loop. + // Back up start position for match by one. + *lbStartIdx-- + } + + if *lbStartIdx < 0 || *lbStartIdx < *fp.inputIdx()-int(maxML) { + // We have tried all potential match starting points without + // getting a match. Backtrack out, and out of the + // Look Behind altogether. + fp = m.stack.popFrame() + m.activeStart = m.data[op.value()+2] + m.activeLimit = m.data[op.value()+3] + break + } + + // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. + // (successful match will fall off the end of the loop.) + fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()-3) + if err != nil { + return err + } + *fp.inputIdx() = *lbStartIdx + + case urxLbEnd: + // End of a look-behind block, after a successful match. + if *fp.inputIdx() != m.activeLimit { + // The look-behind expression matched, but the match did not + // extend all the way to the point that we are looking behind from. + // FAIL out of here, which will take us back to the LB_CONT, which + // will retry the match starting at another position or fail + // the look-behind altogether, whichever is appropriate. + fp = m.stack.popFrame() + break + } + + // Look-behind match is good. Restore the orignal input string region, + // which had been truncated to pin the end of the lookbehind match to the + // position being looked-behind. + m.activeStart = m.data[op.value()+2] + m.activeLimit = m.data[op.value()+3] + case urxLbnCount: + // Negative Look-Behind, at top of loop checking for matches of LB expression + // at all possible input starting positions. + + // Fetch the extra parameters of this op. + minML := pat[*fp.patIdx()] + *fp.patIdx()++ + maxML := pat[*fp.patIdx()] + *fp.patIdx()++ + + continueLoc := pat[*fp.patIdx()].value() + *fp.patIdx()++ + + lbStartIdx := &m.data[op.value()+4] + + if *lbStartIdx < 0 { + // First time through loop. + *lbStartIdx = *fp.inputIdx() - int(minML) + if *lbStartIdx > 0 { + // move index to a code point boundary, if it's not on one already. + *lbStartIdx = *fp.inputIdx() + } + } else { + // 2nd through nth time through the loop. + // Back up start position for match by one. + *lbStartIdx-- + } + + if *lbStartIdx < 0 || *lbStartIdx < *fp.inputIdx()-int(maxML) { + // We have tried all potential match starting points without + // getting a match, which means that the negative lookbehind as + // a whole has succeeded. Jump forward to the continue location + m.activeStart = m.data[op.value()+2] + m.activeLimit = m.data[op.value()+3] + *fp.patIdx() = continueLoc + break + } + + // Save state to this URX_LB_CONT op, so failure to match will repeat the loop. + // (successful match will cause a FAIL out of the loop altogether.) + fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()-4) + if err != nil { + return err + } + *fp.inputIdx() = *lbStartIdx + case urxLbnEnd: + // End of a negative look-behind block, after a successful match. + + if *fp.inputIdx() != m.activeLimit { + // The look-behind expression matched, but the match did not + // extend all the way to the point that we are looking behind from. + // FAIL out of here, which will take us back to the LB_CONT, which + // will retry the match starting at another position or succeed + // the look-behind altogether, whichever is appropriate. + fp = m.stack.popFrame() + break + } + + // Look-behind expression matched, which means look-behind test as + // a whole Fails + + // Restore the orignal input string length, which had been truncated + // inorder to pin the end of the lookbehind match + // to the position being looked-behind. + m.activeStart = m.data[op.value()+2] + m.activeLimit = m.data[op.value()+3] + + // Restore original stack position, discarding any state saved + // by the successful pattern match. + newStackSize := m.data[op.value()] + m.stack.setSize(newStackSize) + + // FAIL, which will take control back to someplace + // prior to entering the look-behind test. + fp = m.stack.popFrame() + case urxLoopSrI: + // Loop Initialization for the optimized implementation of + // [some character set]* + // This op scans through all matching input. + // The following LOOP_C op emulates stack unwinding if the following pattern fails. + s := sets[op.value()] + + // Loop through input, until either the input is exhausted or + // we reach a character that is not a member of the set. + ix := *fp.inputIdx() + + for { + if ix >= m.activeLimit { + m.hitEnd = true + break + } + c := charAt(inputText, ix) + if !s.ContainsRune(c) { + break + } + ix++ + } + + // If there were no matching characters, skip over the loop altogether. + // The loop doesn't run at all, a * op always succeeds. + if ix == *fp.inputIdx() { + *fp.patIdx()++ // skip the URX_LOOP_C op. + break + } + + // Peek ahead in the compiled pattern, to the URX_LOOP_C that + // must follow. It's operand is the stack location + // that holds the starting input index for the match of this [set]* + loopcOp := pat[*fp.patIdx()] + stackLoc := loopcOp.value() + *fp.extra(stackLoc) = *fp.inputIdx() + *fp.inputIdx() = ix + + // Save State to the URX_LOOP_C op that follows this one, + // so that match failures in the following code will return to there. + // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. + fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()) + if err != nil { + return err + } + *fp.patIdx()++ + case urxLoopDotI: + // Loop Initialization for the optimized implementation of .* + // This op scans through all remaining input. + // The following LOOP_C op emulates stack unwinding if the following pattern fails. + + // Loop through input until the input is exhausted (we reach an end-of-line) + // In DOTALL mode, we can just go straight to the end of the input. + var ix int + if (op.value() & 1) == 1 { + // Dot-matches-All mode. Jump straight to the end of the string. + ix = m.activeLimit + m.hitEnd = true + } else { + // NOT DOT ALL mode. Line endings do not match '.' + // Scan forward until a line ending or end of input. + ix = *fp.inputIdx() + for { + if ix >= m.activeLimit { + m.hitEnd = true + break + } + c := charAt(inputText, ix) + if (c & 0x7f) <= 0x29 { // Fast filter of non-new-line-s + if (c == 0x0a) || // 0x0a is newline in both modes. + (((op.value() & 2) == 0) && // IF not UNIX_LINES mode + isLineTerminator(c)) { + // char is a line ending. Exit the scanning loop. + break + } + } + ix++ + } + } + + // If there were no matching characters, skip over the loop altogether. + // The loop doesn't run at all, a * op always succeeds. + if ix == *fp.inputIdx() { + *fp.patIdx()++ // skip the URX_LOOP_C op. + break + } + + // Peek ahead in the compiled pattern, to the URX_LOOP_C that + // must follow. It's operand is the stack location + // that holds the starting input index for the match of this .* + loopcOp := pat[*fp.patIdx()] + stackLoc := loopcOp.value() + *fp.extra(stackLoc) = *fp.inputIdx() + *fp.inputIdx() = ix + + // Save State to the URX_LOOP_C op that follows this one, + // so that match failures in the following code will return to there. + // Then bump the pattern idx so the LOOP_C is skipped on the way out of here. + fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()) + if err != nil { + return err + } + *fp.patIdx()++ + + case urxLoopC: + backSearchIndex := *fp.extra(op.value()) + + if backSearchIndex == *fp.inputIdx() { + // We've backed up the input idx to the point that the loop started. + // The loop is done. Leave here without saving state. + // Subsequent failures won't come back here. + break + } + // Set up for the next iteration of the loop, with input index + // backed up by one from the last time through, + // and a state save to this instruction in case the following code fails again. + // (We're going backwards because this loop emulates stack unwinding, not + // the initial scan forward.) + + prevC := charAt(inputText, *fp.inputIdx()-1) + *fp.inputIdx()-- + twoPrevC := charAt(inputText, *fp.inputIdx()-1) + + if prevC == 0x0a && + *fp.inputIdx() > backSearchIndex && + twoPrevC == 0x0d { + prevOp := pat[*fp.patIdx()-2] + if prevOp.typ() == urxLoopDotI { + // .*, stepping back over CRLF pair. + *fp.inputIdx()-- + } + } + + fp, err = m.stateSave(*fp.inputIdx(), *fp.patIdx()-1) + if err != nil { + return err + } + default: + // Trouble. The compiled pattern contains an entry with an + // unrecognized type tag. + panic("unreachable") + } + } + +breakFromLoop: + m.match = isMatch + if isMatch { + m.lastMatchEnd = m.matchEnd + m.matchStart = startIdx + m.matchEnd = *fp.inputIdx() + } + + if m.dumper != nil { + if isMatch { + fmt.Fprintf(m.dumper, "Match. start=%d end=%d\n\n", m.matchStart, m.matchEnd) + } else { + fmt.Fprintf(m.dumper, "No match\n\n") + } + } + + m.frame = fp // The active stack frame when the engine stopped. + // Contains the capture group results that we need to + // access later. + return nil +} + +func charAt(str []rune, idx int) rune { + if idx >= 0 && idx < len(str) { + return str[idx] + } + return -1 +} + +func (m *Matcher) isWordBoundary(pos int) bool { + cIsWord := false + + if pos >= m.lookLimit { + m.hitEnd = true + } else { + c := charAt(m.input, pos) + if uprops.HasBinaryProperty(c, uprops.UCharGraphemeExtend) || uchar.CharType(c) == uchar.FormatChar { + return false + } + cIsWord = staticPropertySets[urxIswordSet].ContainsRune(c) + } + + prevCIsWord := false + for { + if pos <= m.lookStart { + break + } + prevChar := charAt(m.input, pos-1) + pos-- + if !(uprops.HasBinaryProperty(prevChar, uprops.UCharGraphemeExtend) || uchar.CharType(prevChar) == uchar.FormatChar) { + prevCIsWord = staticPropertySets[urxIswordSet].ContainsRune(prevChar) + break + } + } + return cIsWord != prevCIsWord +} + +func (m *Matcher) isUWordBoundary(pos int) bool { + // TODO: implement + /* + UBool returnVal = FALSE; + + #if UCONFIG_NO_BREAK_ITERATION==0 + // Note: this point will never be reached if break iteration is configured out. + // Regex patterns that would require this function will fail to compile. + + // If we haven't yet created a break iterator for this matcher, do it now. + if (fWordBreakItr == nullptr) { + fWordBreakItr = BreakIterator::createWordInstance(Locale::getEnglish(), status); + if (U_FAILURE(status)) { + return FALSE; + } + fWordBreakItr->setText(fInputText, status); + } + + // Note: zero width boundary tests like \b see through transparent region bounds, + // which is why fLookLimit is used here, rather than fActiveLimit. + if (pos >= fLookLimit) { + fHitEnd = TRUE; + returnVal = TRUE; // With Unicode word rules, only positions within the interior of "real" + // words are not boundaries. All non-word chars stand by themselves, + // with word boundaries on both sides. + } else { + returnVal = fWordBreakItr->isBoundary((int32_t)pos); + } + #endif + return returnVal; + */ + return false +} + +func (m *Matcher) resetStack() stackFrame { + m.stack.reset() + frame, _ := m.stack.newFrame(0, nil, "") + frame.clearExtra() + return frame +} + +func (m *Matcher) stateSave(inputIdx, savePatIdx int) (stackFrame, error) { + // push storage for a new frame. + newFP, err := m.stack.newFrame(inputIdx, m.input, m.pattern.pattern) + if err != nil { + return nil, err + } + fp := m.stack.prevFromTop() + + // New stack frame = copy of old top frame. + copy(newFP, fp) + + m.tickCounter-- + if m.tickCounter <= 0 { + if err := m.incrementTime(*fp.inputIdx()); err != nil { + return nil, err + } + } + *fp.patIdx() = savePatIdx + return newFP, nil +} + +func (m *Matcher) incrementTime(inputIdx int) error { + m.tickCounter = timerInitialValue + m.time++ + if m.timeLimit > 0 && m.time >= m.timeLimit { + return &MatchError{ + Code: TimeOut, + Pattern: m.pattern.pattern, + Position: inputIdx, + Input: m.input, + } + } + return nil +} + +func (m *Matcher) isDecimalDigit(c rune) bool { + return uchar.IsDigit(c) +} + +func (m *Matcher) isHorizWS(c rune) bool { + return uchar.CharType(c) == uchar.SpaceSeparator || c == 9 +} + +func (m *Matcher) followingGCBoundary(pos int) int { + // TODO: implement + return pos + /* + // Note: this point will never be reached if break iteration is configured out. + // Regex patterns that would require this function will fail to compile. + + // If we haven't yet created a break iterator for this matcher, do it now. + if (m.gcBreakItr == nil) { + m.gcBreakItr = BreakIterator::createCharacterInstance(Locale::getEnglish(), status); + if (U_FAILURE(status)) { + return pos; + } + fGCBreakItr->setText(fInputText, status); + } + result = fGCBreakItr->following(pos); + if (result == BreakIterator::DONE) { + result = pos; + } + */ +} + +func (m *Matcher) ResetString(input string) { + m.Reset([]rune(input)) +} + +func (m *Matcher) Reset(input []rune) { + m.input = input + m.reset() +} + +func (m *Matcher) Matches() (bool, error) { + err := m.MatchAt(m.activeStart, true) + return m.match, err +} + +func (m *Matcher) LookingAt() (bool, error) { + err := m.MatchAt(m.activeStart, false) + return m.match, err +} + +func (m *Matcher) Find() (bool, error) { + startPos := m.matchEnd + if startPos == 0 { + startPos = m.activeStart + } + + if m.match { + // Save the position of any previous successful match. + m.lastMatchEnd = m.matchEnd + if m.matchStart == m.matchEnd { + // Previous match had zero length. Move start position up one position + // to avoid sending find() into a loop on zero-length matches. + if startPos >= m.activeLimit { + m.match = false + m.hitEnd = true + return false, nil + } + startPos++ + } + } else { + if m.lastMatchEnd >= 0 { + // A previous find() failed to match. Don't try again. + // (without this test, a pattern with a zero-length match + // could match again at the end of an input string.) + m.hitEnd = true + return false, nil + } + } + + testStartLimit := m.activeLimit - int(m.pattern.minMatchLen) + if startPos > testStartLimit { + m.match = false + m.hitEnd = true + return false, nil + } + + switch m.pattern.startType { + case startNoInfo: + // No optimization was found. + // Try a match at each input position. + for { + err := m.MatchAt(startPos, false) + if err != nil { + return false, err + } + if m.match { + return true, nil + } + if startPos >= testStartLimit { + m.hitEnd = true + return false, nil + } + startPos++ + } + case startSet: + // Match may start on any char from a pre-computed set. + for { + pos := startPos + c := charAt(m.input, startPos) + startPos++ + // c will be -1 (U_SENTINEL) at end of text, in which case we + // skip this next block (so we don't have a negative array index) + // and handle end of text in the following block. + if c >= 0 && m.pattern.initialChars.ContainsRune(c) { + err := m.MatchAt(pos, false) + if err != nil { + return false, err + } + if m.match { + return true, nil + } + } + + if startPos > testStartLimit { + m.match = false + m.hitEnd = true + return false, nil + } + } + case startStart: + // Matches are only possible at the start of the input string + // (pattern begins with ^ or \A) + if startPos > m.activeStart { + m.match = false + return false, nil + } + err := m.MatchAt(startPos, false) + return m.match, err + case startLine: + var ch rune + if startPos == m.anchorStart { + err := m.MatchAt(startPos, false) + if err != nil { + return false, err + } + if m.match { + return true, nil + } + ch = charAt(m.input, startPos) + startPos++ + } else { + ch = charAt(m.input, startPos-1) + } + + if m.pattern.flags&UnixLines != 0 { + for { + if ch == 0x0a { + err := m.MatchAt(startPos, false) + if err != nil { + return false, err + } + if m.match { + return true, nil + } + } + if startPos >= testStartLimit { + m.match = false + m.hitEnd = true + return false, nil + } + ch = charAt(m.input, startPos) + startPos++ + } + } else { + for { + if isLineTerminator(ch) { + if ch == 0x0d && startPos < m.activeLimit && charAt(m.input, startPos) == 0x0a { + startPos++ + } + err := m.MatchAt(startPos, false) + if err != nil { + return false, err + } + if m.match { + return true, nil + } + } + if startPos >= testStartLimit { + m.match = false + m.hitEnd = true + return false, nil + } + ch = charAt(m.input, startPos) + startPos++ + } + } + case startChar, startString: + // Match starts on exactly one char. + theChar := m.pattern.initialChar + for { + pos := startPos + c := charAt(m.input, startPos) + startPos++ + if c == theChar { + err := m.MatchAt(pos, false) + if err != nil { + return false, err + } + if m.match { + return true, nil + } + } + if startPos > testStartLimit { + m.match = false + m.hitEnd = true + return false, nil + } + } + default: + panic("unreachable") + } +} + +func (m *Matcher) Start() int { + if !m.match { + return -1 + } + + return m.matchStart +} + +func (m *Matcher) reset() { + m.regionStart = 0 + m.regionLimit = len(m.input) + m.activeStart = 0 + m.activeLimit = len(m.input) + m.anchorStart = 0 + m.anchorLimit = len(m.input) + m.lookStart = 0 + m.lookLimit = len(m.input) + m.resetPreserveRegion() +} + +func (m *Matcher) resetPreserveRegion() { + m.matchStart = 0 + m.matchEnd = 0 + m.lastMatchEnd = -1 + m.appendPosition = 0 + m.match = false + m.hitEnd = false + m.requireEnd = false + m.time = 0 + m.tickCounter = timerInitialValue +} + +func (m *Matcher) GroupCount() int { + return len(m.pattern.groupMap) +} + +func (m *Matcher) StartForGroup(group int) int { + if !m.match { + return -1 + } + if group < 0 || group > len(m.pattern.groupMap) { + return -1 + } + if group == 0 { + return m.matchStart + } + groupOffset := int(m.pattern.groupMap[group-1]) + return *m.frame.extra(groupOffset) +} + +func (m *Matcher) EndForGroup(group int) int { + if !m.match { + return -1 + } + if group < 0 || group > len(m.pattern.groupMap) { + return -1 + } + if group == 0 { + return m.matchEnd + } + groupOffset := int(m.pattern.groupMap[group-1]) + return *m.frame.extra(groupOffset + 1) +} + +func (m *Matcher) HitEnd() bool { + return m.hitEnd +} + +func (m *Matcher) RequireEnd() bool { + return m.requireEnd +} + +func (m *Matcher) Group(i int) (string, bool) { + start := m.StartForGroup(i) + end := m.EndForGroup(i) + if start == -1 || end == -1 { + return "", false + } + return string(m.input[start:end]), true +} + +func (m *Matcher) End() int { + if !m.match { + return -1 + } + + return m.matchEnd +} + +func (m *Matcher) Dumper(out io.Writer) { + m.dumper = out +} + +// Test for any of the Unicode line terminating characters. +func isLineTerminator(c rune) bool { + if (c & ^(0x0a | 0x0b | 0x0c | 0x0d | 0x85 | 0x2028 | 0x2029)) != 0 { + return false + } + return (c <= 0x0d && c >= 0x0a) || c == 0x85 || c == 0x2028 || c == 0x2029 +} diff --git a/go/mysql/icuregex/ops.go b/go/mysql/icuregex/ops.go new file mode 100644 index 00000000000..dbb83ee3d24 --- /dev/null +++ b/go/mysql/icuregex/ops.go @@ -0,0 +1,414 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +import ( + "golang.org/x/exp/slices" + + "vitess.io/vitess/go/mysql/icuregex/internal/ucase" + "vitess.io/vitess/go/mysql/icuregex/internal/utf16" +) + +type opcode uint8 + +const ( + urxReservedOp opcode = iota // For multi-operand ops, most non-first words. + urxBacktrack // Force a backtrack, as if a match test had failed. + urxEnd + urxOnechar // Value field is the 21 bit unicode char to match + urxString // Value field is index of string start + urxStringLen // Value field is string length (code units) + urxStateSave // Value field is pattern position to push + urxNop + urxStartCapture // Value field is capture group number. + urxEndCapture // Value field is capture group number + urxStaticSetref // Value field is index of set in array of sets. + urxSetref // Value field is index of set in array of sets. + urxDotany + urxJmp // Value field is destination position in the pattern. + urxFail // Stop match operation, No match. + + urxJmpSav // Operand: JMP destination location + urxBackslashB // Value field: 0: \b 1: \B + urxBackslashG + urxJmpSavX // Conditional JMP_SAV, + // Used in (x)+, breaks loop on zero length match. + // Operand: Jmp destination. + urxBackslashX + urxBackslashZ // \z Unconditional end of line. + + urxDotanyAll // ., in the . matches any mode. + urxBackslashD // Value field: 0: \d 1: \D + urxCaret // Value field: 1: multi-line mode. + urxDollar // Also for \Z + + urxCtrInit // Counter Inits for {Interval} loops. + urxCtrInitNg // 2 kinds, normal and non-greedy. + // These are 4 word opcodes. See description. + // First Operand: Data loc of counter variable + // 2nd Operand: Pat loc of the URX_CTR_LOOPx + // at the end of the loop. + // 3rd Operand: Minimum count. + // 4th Operand: Max count, -1 for unbounded. + + urxDotanyUnix // '.' operator in UNIX_LINES mode, only \n marks end of line. + + utxCtrLoop // Loop Ops for {interval} loops. + urxCtrLoopNg // Also in three flavors. + // Operand is loc of corresponding CTR_INIT. + + urxCaretMUnix // '^' operator, test for start of line in multi-line + // plus UNIX_LINES mode. + + urxRelocOprnd // Operand value in multi-operand ops that refers + // back into compiled pattern code, and thus must + // be relocated when inserting/deleting ops in code. + + urxStoSp // Store the stack ptr. Operand is location within + // matcher data (not stack data) to store it. + urxLdSp // Load the stack pointer. Operand is location + // to load from. + urxBackref // Back Reference. Parameter is the index of the + // capture group variables in the state stack frame. + urxStoInpLoc // Store the input location. Operand is location + // within the matcher stack frame. + urxJmpx // Conditional JMP. + // First Operand: JMP target location. + // Second Operand: Data location containing an + // input position. If current input position == + // saved input position, FAIL rather than taking + // the JMP + urxLaStart // Starting a LookAround expression. + // Save InputPos, SP and active region in static data. + // Operand: Static data offset for the save + urxLaEnd // Ending a Lookaround expression. + // Restore InputPos and Stack to saved values. + // Operand: Static data offset for saved data. + urcOnecharI // Test for case-insensitive match of a literal character. + // Operand: the literal char. + urxStringI // Case insensitive string compare. + // First Operand: Index of start of string in string literals + // Second Operand (next word in compiled code): + // the length of the string. + urxBackrefI // Case insensitive back reference. + // Parameter is the index of the + // capture group variables in the state stack frame. + urxDollarM // $ in multi-line mode. + urxCaretM // ^ in multi-line mode. + urxLbStart // LookBehind Start. + // Parameter is data location + urxLbCont // LookBehind Continue. + // Param 0: the data location + // Param 1: The minimum length of the look-behind match + // Param 2: The max length of the look-behind match + urxLbEnd // LookBehind End. + // Parameter is the data location. + // Check that match ended at the right spot, + // Restore original input string len. + urxLbnCount // Negative LookBehind Continue + // Param 0: the data location + // Param 1: The minimum length of the look-behind match + // Param 2: The max length of the look-behind match + // Param 3: The pattern loc following the look-behind block. + urxLbnEnd // Negative LookBehind end + // Parameter is the data location. + // Check that the match ended at the right spot. + urxStatSetrefN // Reference to a prebuilt set (e.g. \w), negated + // Operand is index of set in array of sets. + urxLoopSrI // Init a [set]* loop. + // Operand is the sets index in array of user sets. + urxLoopC // Continue a [set]* or OneChar* loop. + // Operand is a matcher static data location. + // Must always immediately follow LOOP_x_I instruction. + urxLoopDotI // .*, initialization of the optimized loop. + // Operand value: + // bit 0: + // 0: Normal (. doesn't match new-line) mode. + // 1: . matches new-line mode. + // bit 1: controls what new-lines are recognized by this operation. + // 0: All Unicode New-lines + // 1: UNIX_LINES, \u000a only. + urxBackslashBu // \b or \B in UREGEX_UWORD mode, using Unicode style + // word boundaries. + urxDollarD // $ end of input test, in UNIX_LINES mode. + urxDollarMd // $ end of input test, in MULTI_LINE and UNIX_LINES mode. + urxBackslashH // Value field: 0: \h 1: \H + urxBackslashR // Any line break sequence. + urxBackslashV // Value field: 0: \v 1: \V + + urxReservedOpN opcode = 255 // For multi-operand ops, negative operand values. +) + +// Keep this list of opcode names in sync with the above enum +// +// Used for debug printing only. +var urxOpcodeNames = []string{ + " ", + "BACKTRACK", + "END", + "ONECHAR", + "STRING", + "STRING_LEN", + "STATE_SAVE", + "NOP", + "START_CAPTURE", + "END_CAPTURE", + "URX_STATIC_SETREF", + "SETREF", + "DOTANY", + "JMP", + "FAIL", + "JMP_SAV", + "BACKSLASH_B", + "BACKSLASH_G", + "JMP_SAV_X", + "BACKSLASH_X", + "BACKSLASH_Z", + "DOTANY_ALL", + "BACKSLASH_D", + "CARET", + "DOLLAR", + "CTR_INIT", + "CTR_INIT_NG", + "DOTANY_UNIX", + "CTR_LOOP", + "CTR_LOOP_NG", + "URX_CARET_M_UNIX", + "RELOC_OPRND", + "STO_SP", + "LD_SP", + "BACKREF", + "STO_INP_LOC", + "JMPX", + "LA_START", + "LA_END", + "ONECHAR_I", + "STRING_I", + "BACKREF_I", + "DOLLAR_M", + "CARET_M", + "LB_START", + "LB_CONT", + "LB_END", + "LBN_CONT", + "LBN_END", + "STAT_SETREF_N", + "LOOP_SR_I", + "LOOP_C", + "LOOP_DOT_I", + "BACKSLASH_BU", + "DOLLAR_D", + "DOLLAR_MD", + "URX_BACKSLASH_H", + "URX_BACKSLASH_R", + "URX_BACKSLASH_V", +} + +type instruction int32 + +func (ins instruction) typ() opcode { + return opcode(uint32(ins) >> 24) +} + +func (ins instruction) value32() int32 { + return int32(ins) & 0xffffff +} + +func (ins instruction) value() int { + return int(ins.value32()) +} + +// Access to Unicode Sets composite character properties +// +// The sets are accessed by the match engine for things like \w (word boundary) +const ( + urxIswordSet = 1 + urxIsalnumSet = 2 + urxIsalphaSet = 3 + urxIsspaceSet = 4 + + urxGcNormal = iota + 1 // Sets for finding grapheme cluster boundaries. + urxGcExtend + urxGcControl + urxGcL + urxGcLv + urxGcLvt + urxGcV + urxGcT + + urxNegSet = 0x800000 // Flag bit to reverse sense of set + // membership test. +) + +type stack struct { + ary []int + frameSize int + stackLimit int +} + +type stackFrame []int + +func (f stackFrame) inputIdx() *int { + return &f[0] +} + +func (f stackFrame) patIdx() *int { + return &f[1] +} + +func (f stackFrame) extra(n int) *int { + return &f[2+n] +} + +func (f stackFrame) equals(f2 stackFrame) bool { + return &f[0] == &f2[0] +} + +func (s *stack) len() int { + return len(s.ary) +} + +func (s *stack) sp() int { + return len(s.ary) - s.frameSize +} + +func (s *stack) newFrame(inputIdx int, input []rune, pattern string) (stackFrame, error) { + if s.stackLimit != 0 && len(s.ary)+s.frameSize > s.stackLimit { + return nil, &MatchError{ + Code: StackOverflow, + Pattern: pattern, + Position: inputIdx, + Input: input, + } + } + s.ary = slices.Grow(s.ary, s.frameSize) + + f := s.ary[len(s.ary) : len(s.ary)+s.frameSize] + s.ary = s.ary[:len(s.ary)+s.frameSize] + return f, nil +} + +func (s *stack) prevFromTop() stackFrame { + return s.ary[len(s.ary)-2*s.frameSize:] +} + +func (s *stack) popFrame() stackFrame { + s.ary = s.ary[:len(s.ary)-s.frameSize] + return s.ary[len(s.ary)-s.frameSize:] +} + +func (s *stack) reset() { + s.ary = s.ary[:0] +} + +func (s *stack) offset(size int) stackFrame { + return s.ary[size-s.frameSize : size] +} + +func (s *stack) setSize(size int) { + s.ary = s.ary[:size] +} + +func (f stackFrame) clearExtra() { + for i := 2; i < len(f); i++ { + f[i] = -1 + } +} + +// number of UVector elements in the header +const restackframeHdrCount = 2 + +// Start-Of-Match type. Used by find() to quickly scan to positions where a +// +// match might start before firing up the full match engine. +type startOfMatch int8 + +const ( + startNoInfo startOfMatch = iota // No hint available. + startChar // Match starts with a literal code point. + startSet // Match starts with something matching a set. + startStart // Match starts at start of buffer only (^ or \A) + startLine // Match starts with ^ in multi-line mode. + startString // Match starts with a literal string. +) + +func (som startOfMatch) String() string { + switch som { + case startNoInfo: + return "START_NO_INFO" + case startChar: + return "START_CHAR" + case startSet: + return "START_SET" + case startStart: + return "START_START" + case startLine: + return "START_LINE" + case startString: + return "START_STRING" + default: + panic("unknown StartOfMatch") + } +} + +type caseFoldIterator struct { + chars []rune + index int + limit int + + foldChars []uint16 +} + +func (it *caseFoldIterator) next() rune { + if len(it.foldChars) == 0 { + // We are not in a string folding of an earlier character. + // Start handling the next char from the input UText. + if it.index >= it.limit { + return -1 + } + + originalC := it.chars[it.index] + it.index++ + + originalC, it.foldChars = ucase.FullFolding(originalC) + if len(it.foldChars) == 0 { + // input code point folds to a single code point, possibly itself. + return originalC + } + } + + var res rune + res, it.foldChars = utf16.NextUnsafe(it.foldChars) + return res +} + +func (it *caseFoldIterator) inExpansion() bool { + return len(it.foldChars) > 0 +} + +func newCaseFoldIterator(chars []rune, start, limit int) caseFoldIterator { + return caseFoldIterator{ + chars: chars, + index: start, + limit: limit, + } +} diff --git a/go/mysql/icuregex/pattern.go b/go/mysql/icuregex/pattern.go new file mode 100644 index 00000000000..f0823a213d4 --- /dev/null +++ b/go/mysql/icuregex/pattern.go @@ -0,0 +1,149 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +import ( + "vitess.io/vitess/go/mysql/icuregex/internal/uset" +) + +type Pattern struct { + pattern string + flags RegexpFlag + + compiledPat []instruction + literalText []rune + + sets []*uset.UnicodeSet + + minMatchLen int32 + frameSize int + dataSize int + + groupMap []int32 + + startType startOfMatch + initialStringIdx int + initialStringLen int + initialChars *uset.UnicodeSet + initialChar rune + needsAltInput bool + + namedCaptureMap map[string]int +} + +func NewPattern(flags RegexpFlag) *Pattern { + return &Pattern{ + flags: flags, + initialChars: uset.New(), + // Slot zero of the vector of sets is reserved. Fill it here. + sets: []*uset.UnicodeSet{nil}, + } +} + +func MustCompileString(in string, flags RegexpFlag) *Pattern { + pat, err := CompileString(in, flags) + if err != nil { + panic(err) + } + return pat +} + +func Compile(in []rune, flags RegexpFlag) (*Pattern, error) { + pat := NewPattern(flags) + cmp := newCompiler(pat) + if err := cmp.compile(in); err != nil { + return nil, err + } + return pat, nil +} + +func CompileString(in string, flags RegexpFlag) (*Pattern, error) { + pat := NewPattern(flags) + cmp := newCompiler(pat) + if err := cmp.compile([]rune(in)); err != nil { + return nil, err + } + return pat, nil +} + +func (p *Pattern) Match(input string) *Matcher { + m := NewMatcher(p) + m.ResetString(input) + return m +} + +type RegexpFlag int32 + +const ( + /** Enable case insensitive matching. @stable ICU 2.4 */ + CaseInsensitive RegexpFlag = 2 + + /** Allow white space and comments within patterns @stable ICU 2.4 */ + Comments RegexpFlag = 4 + + /** If set, '.' matches line terminators, otherwise '.' matching stops at line end. + * @stable ICU 2.4 */ + DotAll RegexpFlag = 32 + + /** If set, treat the entire pattern as a literal string. + * Metacharacters or escape sequences in the input sequence will be given + * no special meaning. + * + * The flag UREGEX_CASE_INSENSITIVE retains its impact + * on matching when used in conjunction with this flag. + * The other flags become superfluous. + * + * @stable ICU 4.0 + */ + Literal RegexpFlag = 16 + + /** Control behavior of "$" and "^" + * If set, recognize line terminators within string, + * otherwise, match only at start and end of input string. + * @stable ICU 2.4 */ + Multiline RegexpFlag = 8 + + /** Unix-only line endings. + * When this mode is enabled, only \\u000a is recognized as a line ending + * in the behavior of ., ^, and $. + * @stable ICU 4.0 + */ + UnixLines RegexpFlag = 1 + + /** Unicode word boundaries. + * If set, \b uses the Unicode TR 29 definition of word boundaries. + * Warning: Unicode word boundaries are quite different from + * traditional regular expression word boundaries. See + * http://unicode.org/reports/tr29/#Word_Boundaries + * @stable ICU 2.8 + */ + UWord RegexpFlag = 256 + + /** Error on Unrecognized backslash escapes. + * If set, fail with an error on patterns that contain + * backslash-escaped ASCII letters without a known special + * meaning. If this flag is not set, these + * escaped letters represent themselves. + * @stable ICU 4.0 + */ + ErrorOnUnknownEscapes RegexpFlag = 512 +) diff --git a/go/mysql/icuregex/perl_test.go b/go/mysql/icuregex/perl_test.go new file mode 100644 index 00000000000..0e7beda9fbd --- /dev/null +++ b/go/mysql/icuregex/perl_test.go @@ -0,0 +1,216 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +import ( + "bufio" + "os" + "strconv" + "strings" + "testing" +) + +func TestPerl(t *testing.T) { + f, err := os.Open("testdata/re_tests.txt") + if err != nil { + t.Fatalf("failed to open test data: %v", err) + } + defer f.Close() + + flagPat := MustCompileString(`('?)(.*)\1(.*)`, 0) + flagMat := NewMatcher(flagPat) + + groupsPat := MustCompileString(`\$([+\-])\[(\d+)\]`, 0) + groupsMat := NewMatcher(groupsPat) + + cgPat := MustCompileString(`\$(\d+)`, 0) + cgMat := NewMatcher(cgPat) + + group := func(m *Matcher, idx int) string { + g, _ := m.Group(idx) + return g + } + + lookingAt := func(m *Matcher) bool { + ok, err := m.LookingAt() + if err != nil { + t.Fatalf("failed to match with LookingAt(): %v", err) + } + return ok + } + + replacer := strings.NewReplacer( + `${bang}`, "!", + `${nulnul}`, "\x00\x00", + `${ffff}`, "\uffff", + ) + + scanner := bufio.NewScanner(f) + var lineno int + + for scanner.Scan() { + lineno++ + fields := strings.Split(scanner.Text(), "\t") + + flagMat.ResetString(fields[0]) + ok, _ := flagMat.Matches() + if !ok { + t.Fatalf("could not match pattern+flags (line %d)", lineno) + } + + pattern, _ := flagMat.Group(2) + pattern = replacer.Replace(pattern) + + flagStr, _ := flagMat.Group(3) + var flags RegexpFlag + if strings.IndexByte(flagStr, 'i') >= 0 { + flags |= CaseInsensitive + } + if strings.IndexByte(flagStr, 'm') >= 0 { + flags |= Multiline + } + if strings.IndexByte(flagStr, 'x') >= 0 { + flags |= Comments + } + + testPat, err := CompileString(pattern, flags) + if err != nil { + if cerr, ok := err.(*CompileError); ok && cerr.Code == Unimplemented { + continue + } + if strings.IndexByte(fields[2], 'c') == -1 && strings.IndexByte(fields[2], 'i') == -1 { + t.Errorf("line %d: ICU error %q", lineno, err) + } + continue + } + + if strings.IndexByte(fields[2], 'i') >= 0 { + continue + } + if strings.IndexByte(fields[2], 'c') >= 0 { + t.Errorf("line %d: expected error", lineno) + continue + } + + matchString := fields[1] + matchString = replacer.Replace(matchString) + matchString = strings.ReplaceAll(matchString, `\n`, "\n") + + testMat := testPat.Match(matchString) + found, _ := testMat.Find() + expected := strings.IndexByte(fields[2], 'y') >= 0 + + if expected != found { + t.Errorf("line %d: expected %v, found %v", lineno, expected, found) + continue + } + + if !found { + continue + } + + var result []byte + var perlExpr = fields[3] + + for len(perlExpr) > 0 { + groupsMat.ResetString(perlExpr) + cgMat.ResetString(perlExpr) + + switch { + case strings.HasPrefix(perlExpr, "$&"): + result = append(result, group(testMat, 0)...) + perlExpr = perlExpr[2:] + + case lookingAt(groupsMat): + groupNum, err := strconv.ParseInt(group(groupsMat, 2), 10, 32) + if err != nil { + t.Fatalf("failed to parse Perl pattern: %v", err) + } + + var matchPosition int + if group(groupsMat, 1) == "+" { + matchPosition = testMat.EndForGroup(int(groupNum)) + } else { + matchPosition = testMat.StartForGroup(int(groupNum)) + } + if matchPosition != -1 { + result = strconv.AppendInt(result, int64(matchPosition), 10) + } + + perlExpr = perlExpr[groupsMat.EndForGroup(0):] + + case lookingAt(cgMat): + groupNum, err := strconv.ParseInt(group(cgMat, 1), 10, 32) + if err != nil { + t.Fatalf("failed to parse Perl pattern: %v", err) + } + result = append(result, group(testMat, int(groupNum))...) + perlExpr = perlExpr[cgMat.EndForGroup(0):] + + case strings.HasPrefix(perlExpr, "@-"): + for i := 0; i <= testMat.GroupCount(); i++ { + if i > 0 { + result = append(result, ' ') + } + result = strconv.AppendInt(result, int64(testMat.StartForGroup(i)), 10) + } + perlExpr = perlExpr[2:] + + case strings.HasPrefix(perlExpr, "@+"): + for i := 0; i <= testMat.GroupCount(); i++ { + if i > 0 { + result = append(result, ' ') + } + result = strconv.AppendInt(result, int64(testMat.EndForGroup(i)), 10) + } + perlExpr = perlExpr[2:] + + case strings.HasPrefix(perlExpr, "\\"): + if len(perlExpr) > 1 { + perlExpr = perlExpr[1:] + } + c := perlExpr[0] + switch c { + case 'n': + c = '\n' + } + result = append(result, c) + perlExpr = perlExpr[1:] + + default: + result = append(result, perlExpr[0]) + perlExpr = perlExpr[1:] + } + } + + var expectedS string + if len(fields) > 4 { + expectedS = fields[4] + expectedS = replacer.Replace(expectedS) + expectedS = strings.ReplaceAll(expectedS, `\n`, "\n") + } + + if expectedS != string(result) { + t.Errorf("line %d: Incorrect Perl expression results for %s\nwant: %q\ngot: %q", lineno, pattern, expectedS, result) + } + } +} diff --git a/go/mysql/icuregex/sets.go b/go/mysql/icuregex/sets.go new file mode 100644 index 00000000000..0f745b3374d --- /dev/null +++ b/go/mysql/icuregex/sets.go @@ -0,0 +1,104 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +import ( + "vitess.io/vitess/go/mysql/icuregex/internal/uprops" + "vitess.io/vitess/go/mysql/icuregex/internal/uset" +) + +var staticPropertySets [13]*uset.UnicodeSet + +func init() { + staticPropertySets[urxIswordSet] = func() *uset.UnicodeSet { + s := uset.New() + s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{Alphabetic}`, 0)) + s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{M}`, 0)) + s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{Nd}`, 0)) + s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{Pc}`, 0)) + s.AddRune(0x200c) + s.AddRune(0x200d) + return s.Freeze() + }() + + staticPropertySets[urxIsspaceSet] = uprops.MustNewUnicodeSetFomPattern(`\p{Whitespace}`, 0).Freeze() + + staticPropertySets[urxGcExtend] = uprops.MustNewUnicodeSetFomPattern(`\p{Grapheme_Extend}`, 0).Freeze() + staticPropertySets[urxGcControl] = func() *uset.UnicodeSet { + s := uset.New() + s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Zl:]`, 0)) + s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Zp:]`, 0)) + s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Cc:]`, 0)) + s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Cf:]`, 0)) + s.RemoveAll(uprops.MustNewUnicodeSetFomPattern(`[:Grapheme_Extend:]`, 0)) + return s.Freeze() + }() + staticPropertySets[urxGcL] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=L}`, 0).Freeze() + staticPropertySets[urxGcLv] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=LV}`, 0).Freeze() + staticPropertySets[urxGcLvt] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=LVT}`, 0).Freeze() + staticPropertySets[urxGcV] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=V}`, 0).Freeze() + staticPropertySets[urxGcT] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=T}`, 0).Freeze() + + staticPropertySets[urxGcNormal] = func() *uset.UnicodeSet { + s := uset.New() + s.Complement() + s.RemoveRuneRange(0xac00, 0xd7a4) + s.RemoveAll(staticPropertySets[urxGcControl]) + s.RemoveAll(staticPropertySets[urxGcL]) + s.RemoveAll(staticPropertySets[urxGcV]) + s.RemoveAll(staticPropertySets[urxGcT]) + return s.Freeze() + }() +} + +var staticSetUnescape = func() *uset.UnicodeSet { + u := uset.New() + u.AddString("acefnrtuUx") + return u.Freeze() +}() + +const ( + ruleSetDigitChar = 128 + ruleSetASCIILetter = 129 + ruleSetRuleChar = 130 + ruleSetCount = 131 - 128 +) + +var staticRuleSet = [ruleSetCount]*uset.UnicodeSet{ + func() *uset.UnicodeSet { + u := uset.New() + u.AddRuneRange('0', '9') + return u.Freeze() + }(), + func() *uset.UnicodeSet { + u := uset.New() + u.AddRuneRange('A', 'Z') + u.AddRuneRange('a', 'z') + return u.Freeze() + }(), + func() *uset.UnicodeSet { + u := uset.New() + u.AddString("*?+[(){}^$|\\.") + u.Complement() + return u.Freeze() + }(), +} diff --git a/go/mysql/icuregex/sets_test.go b/go/mysql/icuregex/sets_test.go new file mode 100644 index 00000000000..d33552732f2 --- /dev/null +++ b/go/mysql/icuregex/sets_test.go @@ -0,0 +1,66 @@ +/* +© 2016 and later: Unicode, Inc. and others. +Copyright (C) 2004-2015, International Business Machines Corporation and others. +Copyright 2023 The Vitess Authors. + +This file contains code derived from the Unicode Project's ICU library. +License & terms of use for the original code: http://www.unicode.org/copyright.html + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package icuregex + +import ( + "testing" +) + +func TestStaticSetContents(t *testing.T) { + // These are the number of codepoints contained in each of the static sets as of ICU69-1, + // as to sanity check that we're re-creating the sets properly. + // This table must be re-created when updating Unicode versions. + var ExpectedSetSizes = map[int]int{ + 1: 134564, + 4: 25, + 5: 1102451, + 6: 1979, + 7: 131, + 8: 125, + 9: 399, + 10: 10773, + 11: 95, + 12: 137, + } + + for setid, expected := range ExpectedSetSizes { + if got := staticPropertySets[setid].Len(); got != expected { + t.Fatalf("static set [%d] has wrong size: got %d, expected %d", setid, got, expected) + } + } +} + +func TestStaticFreeze(t *testing.T) { + for _, s := range staticPropertySets { + if err := s.FreezeCheck_(); err != nil { + t.Error(err) + } + } + for _, s := range staticRuleSet { + if err := s.FreezeCheck_(); err != nil { + t.Error(err) + } + } + if err := staticSetUnescape.FreezeCheck_(); err != nil { + t.Error(err) + } +} diff --git a/go/mysql/icuregex/testdata/re_tests.txt b/go/mysql/icuregex/testdata/re_tests.txt new file mode 100644 index 00000000000..c18b638f9b3 --- /dev/null +++ b/go/mysql/icuregex/testdata/re_tests.txt @@ -0,0 +1,923 @@ +abc abc y $& abc +abc abc y $-[0] 0 +abc abc y $+[0] 3 +abc xbc n - - +abc axc n - - +abc abx n - - +abc xabcy y $& abc +abc xabcy y $-[0] 1 +abc xabcy y $+[0] 4 +abc ababc y $& abc +abc ababc y $-[0] 2 +abc ababc y $+[0] 5 +ab*c abc y $& abc +ab*c abc y $-[0] 0 +ab*c abc y $+[0] 3 +ab*bc abc y $& abc +ab*bc abc y $-[0] 0 +ab*bc abc y $+[0] 3 +ab*bc abbc y $& abbc +ab*bc abbc y $-[0] 0 +ab*bc abbc y $+[0] 4 +ab*bc abbbbc y $& abbbbc +ab*bc abbbbc y $-[0] 0 +ab*bc abbbbc y $+[0] 6 +.{1} abbbbc y $& a +.{1} abbbbc y $-[0] 0 +.{1} abbbbc y $+[0] 1 +.{3,4} abbbbc y $& abbb +.{3,4} abbbbc y $-[0] 0 +.{3,4} abbbbc y $+[0] 4 +ab{0,}bc abbbbc y $& abbbbc +ab{0,}bc abbbbc y $-[0] 0 +ab{0,}bc abbbbc y $+[0] 6 +ab+bc abbc y $& abbc +ab+bc abbc y $-[0] 0 +ab+bc abbc y $+[0] 4 +ab+bc abc n - - +ab+bc abq n - - +ab{1,}bc abq n - - +ab+bc abbbbc y $& abbbbc +ab+bc abbbbc y $-[0] 0 +ab+bc abbbbc y $+[0] 6 +ab{1,}bc abbbbc y $& abbbbc +ab{1,}bc abbbbc y $-[0] 0 +ab{1,}bc abbbbc y $+[0] 6 +ab{1,3}bc abbbbc y $& abbbbc +ab{1,3}bc abbbbc y $-[0] 0 +ab{1,3}bc abbbbc y $+[0] 6 +ab{3,4}bc abbbbc y $& abbbbc +ab{3,4}bc abbbbc y $-[0] 0 +ab{3,4}bc abbbbc y $+[0] 6 +ab{4,5}bc abbbbc n - - +ab?bc abbc y $& abbc +ab?bc abc y $& abc +ab{0,1}bc abc y $& abc +ab?bc abbbbc n - - +ab?c abc y $& abc +ab{0,1}c abc y $& abc +^abc$ abc y $& abc +^abc$ abcc n - - +^abc abcc y $& abc +^abc$ aabc n - - +abc$ aabc y $& abc +abc$ aabcd n - - +^ abc y $& +$ abc y $& +a.c abc y $& abc +a.c axc y $& axc +a.*c axyzc y $& axyzc +a.*c axyzd n - - +a[bc]d abc n - - +a[bc]d abd y $& abd +a[b-d]e abd n - - +a[b-d]e ace y $& ace +a[b-d] aac y $& ac +a[-b] a- y $& a- +a[b-] a- y $& a- +a[b-a] - c - Invalid [] range "b-a" +a[]b - ci - Unmatched [ +a[ - c - Unmatched [ +a] a] y $& a] +a[]]b a]b y $& a]b +a[^bc]d aed y $& aed +a[^bc]d abd n - - +a[^-b]c adc y $& adc +a[^-b]c a-c n - - +a[^]b]c a]c n - - +a[^]b]c adc y $& adc +\ba\b a- y - - +\ba\b -a y - - +\ba\b -a- y - - +\by\b xy n - - +\by\b yz n - - +\by\b xyz n - - +\Ba\B a- n - - +\Ba\B -a n - - +\Ba\B -a- n - - +\By\b xy y - - +\By\b xy y $-[0] 1 +\By\b xy y $+[0] 2 +\By\b xy y - - +\by\B yz y - - +\By\B xyz y - - +\w a y - - +\w - n - - +\W a n - - +\W - y - - +a\sb a b y - - +a\sb a-b n - - +a\Sb a b n - - +a\Sb a-b y - - +\d 1 y - - +\d - n - - +\D 1 n - - +\D - y - - +[\w] a y - - +[\w] - n - - +[\W] a n - - +[\W] - y - - +a[\s]b a b y - - +a[\s]b a-b n - - +a[\S]b a b n - - +a[\S]b a-b y - - +[\d] 1 y - - +[\d] - n - - +[\D] 1 n - - +[\D] - y - - +ab|cd abc y $& ab +ab|cd abcd y $& ab +()ef def y $&-$1 ef- +()ef def y $-[0] 1 +()ef def y $+[0] 3 +()ef def y $-[1] 1 +()ef def y $+[1] 1 +*a - c - Quantifier follows nothing +(*)b - c - Quantifier follows nothing +$b b n - - +a\ - c - Search pattern not terminated +a\(b a(b y $&-$1 a(b- +a\(*b ab y $& ab +a\(*b a((b y $& a((b +a\\b a\b y $& a\b +abc) - c - Unmatched ) +(abc - c - Unmatched ( +((a)) abc y $&-$1-$2 a-a-a +((a)) abc y $-[0]-$-[1]-$-[2] 0-0-0 +((a)) abc y $+[0]-$+[1]-$+[2] 1-1-1 +((a)) abc by @- 0 0 0 +((a)) abc by @+ 1 1 1 +(a)b(c) abc y $&-$1-$2 abc-a-c +(a)b(c) abc y $-[0]-$-[1]-$-[2] 0-0-2 +(a)b(c) abc y $+[0]-$+[1]-$+[2] 3-1-3 +a+b+c aabbabc y $& abc +a{1,}b{1,}c aabbabc y $& abc +a** - c - Nested quantifiers +a.+?c abcabc y $& abc +(a+|b)* ab y $&-$1 ab-b +(a+|b)* ab y $-[0] 0 +(a+|b)* ab y $+[0] 2 +(a+|b)* ab y $-[1] 1 +(a+|b)* ab y $+[1] 2 +(a+|b){0,} ab y $&-$1 ab-b +(a+|b)+ ab y $&-$1 ab-b +(a+|b){1,} ab y $&-$1 ab-b +(a+|b)? ab y $&-$1 a-a +(a+|b){0,1} ab y $&-$1 a-a +)( - c - Unmatched ) +[^ab]* cde y $& cde +abc n - - +a* y $& +([abc])*d abbbcd y $&-$1 abbbcd-c +([abc])*bcd abcd y $&-$1 abcd-a +a|b|c|d|e e y $& e +(a|b|c|d|e)f ef y $&-$1 ef-e +(a|b|c|d|e)f ef y $-[0] 0 +(a|b|c|d|e)f ef y $+[0] 2 +(a|b|c|d|e)f ef y $-[1] 0 +(a|b|c|d|e)f ef y $+[1] 1 +abcd*efg abcdefg y $& abcdefg +ab* xabyabbbz y $& ab +ab* xayabbbz y $& a +(ab|cd)e abcde y $&-$1 cde-cd +[abhgefdc]ij hij y $& hij +^(ab|cd)e abcde n x$1y xy +(abc|)ef abcdef y $&-$1 ef- +(a|b)c*d abcd y $&-$1 bcd-b +(ab|ab*)bc abc y $&-$1 abc-a +a([bc]*)c* abc y $&-$1 abc-bc +a([bc]*)(c*d) abcd y $&-$1-$2 abcd-bc-d +a([bc]*)(c*d) abcd y $-[0] 0 +a([bc]*)(c*d) abcd y $+[0] 4 +a([bc]*)(c*d) abcd y $-[1] 1 +a([bc]*)(c*d) abcd y $+[1] 3 +a([bc]*)(c*d) abcd y $-[2] 3 +a([bc]*)(c*d) abcd y $+[2] 4 +a([bc]+)(c*d) abcd y $&-$1-$2 abcd-bc-d +a([bc]*)(c+d) abcd y $&-$1-$2 abcd-b-cd +a([bc]*)(c+d) abcd y $-[0] 0 +a([bc]*)(c+d) abcd y $+[0] 4 +a([bc]*)(c+d) abcd y $-[1] 1 +a([bc]*)(c+d) abcd y $+[1] 2 +a([bc]*)(c+d) abcd y $-[2] 2 +a([bc]*)(c+d) abcd y $+[2] 4 +a[bcd]*dcdcde adcdcde y $& adcdcde +a[bcd]+dcdcde adcdcde n - - +(ab|a)b*c abc y $&-$1 abc-ab +(ab|a)b*c abc y $-[0] 0 +(ab|a)b*c abc y $+[0] 3 +(ab|a)b*c abc y $-[1] 0 +(ab|a)b*c abc y $+[1] 2 +((a)(b)c)(d) abcd y $1-$2-$3-$4 abc-a-b-d +((a)(b)c)(d) abcd y $-[0] 0 +((a)(b)c)(d) abcd y $+[0] 4 +((a)(b)c)(d) abcd y $-[1] 0 +((a)(b)c)(d) abcd y $+[1] 3 +((a)(b)c)(d) abcd y $-[2] 0 +((a)(b)c)(d) abcd y $+[2] 1 +((a)(b)c)(d) abcd y $-[3] 1 +((a)(b)c)(d) abcd y $+[3] 2 +((a)(b)c)(d) abcd y $-[4] 3 +((a)(b)c)(d) abcd y $+[4] 4 +[a-zA-Z_][a-zA-Z0-9_]* alpha y $& alpha +^a(bc+|b[eh])g|.h$ abh y $&-$1 bh- +(bc+d$|ef*g.|h?i(j|k)) effgz y $&-$1-$2 effgz-effgz- +(bc+d$|ef*g.|h?i(j|k)) ij y $&-$1-$2 ij-ij-j +(bc+d$|ef*g.|h?i(j|k)) effg n - - +(bc+d$|ef*g.|h?i(j|k)) bcdd n - - +(bc+d$|ef*g.|h?i(j|k)) reffgz y $&-$1-$2 effgz-effgz- +((((((((((a)))))))))) a y $10 a +((((((((((a)))))))))) a y $-[0] 0 +((((((((((a)))))))))) a y $+[0] 1 +((((((((((a)))))))))) a y $-[10] 0 +((((((((((a)))))))))) a y $+[10] 1 +((((((((((a))))))))))\10 aa y $& aa +((((((((((a))))))))))${bang} aa n - - +((((((((((a))))))))))${bang} a! y $& a! +(((((((((a))))))))) a y $& a +multiple words of text uh-uh n - - +multiple words multiple words, yeah y $& multiple words +(.*)c(.*) abcde y $&-$1-$2 abcde-ab-de +\((.*), (.*)\) (a, b) y ($2, $1) (b, a) +[k] ab n - - +abcd abcd y $&-\$&-\\$& abcd-$&-\abcd +a(bc)d abcd y $1-\$1-\\$1 bc-$1-\bc +a[-]?c ac y $& ac +(abc)\1 abcabc y $1 abc +([a-c]*)\1 abcabc y $1 abc +\1 - c - Reference to nonexistent group +\2 - c - Reference to nonexistent group +(a)|\1 a y - - +(a)|\1 x n - - +(a)|\2 - c - Reference to nonexistent group +(([a-c])b*?\2)* ababbbcbc y $&-$1-$2 ababb-bb-b +(([a-c])b*?\2){3} ababbbcbc y $&-$1-$2 ababbbcbc-cbc-c +((\3|b)\2(a)x)+ aaxabxbaxbbx n - - +((\3|b)\2(a)x)+ aaaxabaxbaaxbbax y $&-$1-$2-$3 bbax-bbax-b-a +((\3|b)\2(a)){2,} bbaababbabaaaaabbaaaabba y $&-$1-$2-$3 bbaaaabba-bba-b-a +(a)|(b) b y $-[0] 0 +(a)|(b) b y $+[0] 1 +(a)|(b) b y x$-[1] x +(a)|(b) b y x$+[1] x +(a)|(b) b y $-[2] 0 +(a)|(b) b y $+[2] 1 +'abc'i ABC y $& ABC +'abc'i XBC n - - +'abc'i AXC n - - +'abc'i ABX n - - +'abc'i XABCY y $& ABC +'abc'i ABABC y $& ABC +'ab*c'i ABC y $& ABC +'ab*bc'i ABC y $& ABC +'ab*bc'i ABBC y $& ABBC +'ab*?bc'i ABBBBC y $& ABBBBC +'ab{0,}?bc'i ABBBBC y $& ABBBBC +'ab+?bc'i ABBC y $& ABBC +'ab+bc'i ABC n - - +'ab+bc'i ABQ n - - +'ab{1,}bc'i ABQ n - - +'ab+bc'i ABBBBC y $& ABBBBC +'ab{1,}?bc'i ABBBBC y $& ABBBBC +'ab{1,3}?bc'i ABBBBC y $& ABBBBC +'ab{3,4}?bc'i ABBBBC y $& ABBBBC +'ab{4,5}?bc'i ABBBBC n - - +'ab??bc'i ABBC y $& ABBC +'ab??bc'i ABC y $& ABC +'ab{0,1}?bc'i ABC y $& ABC +'ab??bc'i ABBBBC n - - +'ab??c'i ABC y $& ABC +'ab{0,1}?c'i ABC y $& ABC +'^abc$'i ABC y $& ABC +'^abc$'i ABCC n - - +'^abc'i ABCC y $& ABC +'^abc$'i AABC n - - +'abc$'i AABC y $& ABC +'^'i ABC y $& +'$'i ABC y $& +'a.c'i ABC y $& ABC +'a.c'i AXC y $& AXC +'a.*?c'i AXYZC y $& AXYZC +'a.*c'i AXYZD n - - +'a[bc]d'i ABC n - - +'a[bc]d'i ABD y $& ABD +'a[b-d]e'i ABD n - - +'a[b-d]e'i ACE y $& ACE +'a[b-d]'i AAC y $& AC +'a[-b]'i A- y $& A- +'a[b-]'i A- y $& A- +'a[b-a]'i - c - Invalid [] range "b-a" +'a[]b'i - ci - Unmatched [ +'a['i - c - Unmatched [ +'a]'i A] y $& A] +'a[]]b'i A]B y $& A]B +'a[^bc]d'i AED y $& AED +'a[^bc]d'i ABD n - - +'a[^-b]c'i ADC y $& ADC +'a[^-b]c'i A-C n - - +'a[^]b]c'i A]C n - - +'a[^]b]c'i ADC y $& ADC +'ab|cd'i ABC y $& AB +'ab|cd'i ABCD y $& AB +'()ef'i DEF y $&-$1 EF- +'*a'i - c - Quantifier follows nothing +'(*)b'i - c - Quantifier follows nothing +'$b'i B n - - +'a\'i - c - Search pattern not terminated +'a\(b'i A(B y $&-$1 A(B- +'a\(*b'i AB y $& AB +'a\(*b'i A((B y $& A((B +'a\\b'i A\B y $& A\B +'abc)'i - c - Unmatched ) +'(abc'i - c - Unmatched ( +'((a))'i ABC y $&-$1-$2 A-A-A +'(a)b(c)'i ABC y $&-$1-$2 ABC-A-C +'a+b+c'i AABBABC y $& ABC +'a{1,}b{1,}c'i AABBABC y $& ABC +'a**'i - c - Nested quantifiers +'a.+?c'i ABCABC y $& ABC +'a.*?c'i ABCABC y $& ABC +'a.{0,5}?c'i ABCABC y $& ABC +'(a+|b)*'i AB y $&-$1 AB-B +'(a+|b){0,}'i AB y $&-$1 AB-B +'(a+|b)+'i AB y $&-$1 AB-B +'(a+|b){1,}'i AB y $&-$1 AB-B +'(a+|b)?'i AB y $&-$1 A-A +'(a+|b){0,1}'i AB y $&-$1 A-A +'(a+|b){0,1}?'i AB y $&-$1 - +')('i - c - Unmatched ) +'[^ab]*'i CDE y $& CDE +'abc'i n - - +'a*'i y $& +'([abc])*d'i ABBBCD y $&-$1 ABBBCD-C +'([abc])*bcd'i ABCD y $&-$1 ABCD-A +'a|b|c|d|e'i E y $& E +'(a|b|c|d|e)f'i EF y $&-$1 EF-E +'abcd*efg'i ABCDEFG y $& ABCDEFG +'ab*'i XABYABBBZ y $& AB +'ab*'i XAYABBBZ y $& A +'(ab|cd)e'i ABCDE y $&-$1 CDE-CD +'[abhgefdc]ij'i HIJ y $& HIJ +'^(ab|cd)e'i ABCDE n x$1y XY +'(abc|)ef'i ABCDEF y $&-$1 EF- +'(a|b)c*d'i ABCD y $&-$1 BCD-B +'(ab|ab*)bc'i ABC y $&-$1 ABC-A +'a([bc]*)c*'i ABC y $&-$1 ABC-BC +'a([bc]*)(c*d)'i ABCD y $&-$1-$2 ABCD-BC-D +'a([bc]+)(c*d)'i ABCD y $&-$1-$2 ABCD-BC-D +'a([bc]*)(c+d)'i ABCD y $&-$1-$2 ABCD-B-CD +'a[bcd]*dcdcde'i ADCDCDE y $& ADCDCDE +'a[bcd]+dcdcde'i ADCDCDE n - - +'(ab|a)b*c'i ABC y $&-$1 ABC-AB +'((a)(b)c)(d)'i ABCD y $1-$2-$3-$4 ABC-A-B-D +'[a-zA-Z_][a-zA-Z0-9_]*'i ALPHA y $& ALPHA +'^a(bc+|b[eh])g|.h$'i ABH y $&-$1 BH- +'(bc+d$|ef*g.|h?i(j|k))'i EFFGZ y $&-$1-$2 EFFGZ-EFFGZ- +'(bc+d$|ef*g.|h?i(j|k))'i IJ y $&-$1-$2 IJ-IJ-J +'(bc+d$|ef*g.|h?i(j|k))'i EFFG n - - +'(bc+d$|ef*g.|h?i(j|k))'i BCDD n - - +'(bc+d$|ef*g.|h?i(j|k))'i REFFGZ y $&-$1-$2 EFFGZ-EFFGZ- +'((((((((((a))))))))))'i A y $10 A +'((((((((((a))))))))))\10'i AA y $& AA +'((((((((((a))))))))))${bang}'i AA n - - +'((((((((((a))))))))))${bang}'i A! y $& A! +'(((((((((a)))))))))'i A y $& A +'(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))'i A y $1 A +'(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))'i C y $1 C +'multiple words of text'i UH-UH n - - +'multiple words'i MULTIPLE WORDS, YEAH y $& MULTIPLE WORDS +'(.*)c(.*)'i ABCDE y $&-$1-$2 ABCDE-AB-DE +'\((.*), (.*)\)'i (A, B) y ($2, $1) (B, A) +'[k]'i AB n - - +'abcd'i ABCD y $&-\$&-\\$& ABCD-$&-\ABCD +'a(bc)d'i ABCD y $1-\$1-\\$1 BC-$1-\BC +'a[-]?c'i AC y $& AC +'(abc)\1'i ABCABC y $1 ABC +'([a-c]*)\1'i ABCABC y $1 ABC +a(?!b). abad y $& ad +a(?=d). abad y $& ad +a(?=c|d). abad y $& ad +a(?:b|c|d)(.) ace y $1 e +a(?:b|c|d)*(.) ace y $1 e +a(?:b|c|d)+?(.) ace y $1 e +a(?:b|c|d)+?(.) acdbcdbe y $1 d +a(?:b|c|d)+(.) acdbcdbe y $1 e +a(?:b|c|d){2}(.) acdbcdbe y $1 b +a(?:b|c|d){4,5}(.) acdbcdbe y $1 b +a(?:b|c|d){4,5}?(.) acdbcdbe y $1 d +((foo)|(bar))* foobar y $1-$2-$3 bar-foo-bar +:(?: - c - Sequence (? incomplete +a(?:b|c|d){6,7}(.) acdbcdbe y $1 e +a(?:b|c|d){6,7}?(.) acdbcdbe y $1 e +a(?:b|c|d){5,6}(.) acdbcdbe y $1 e +a(?:b|c|d){5,6}?(.) acdbcdbe y $1 b +a(?:b|c|d){5,7}(.) acdbcdbe y $1 e +a(?:b|c|d){5,7}?(.) acdbcdbe y $1 b +a(?:b|(c|e){1,2}?|d)+?(.) ace y $1$2 ce +^(.+)?B AB y $1 A +^([^a-z])|(\^)$ . y $1 . +^[<>]& <&OUT y $& <& +^(a\1?){4}$ aaaaaaaaaa y $1 aaaa +^(a\1?){4}$ aaaaaaaaa n - - +^(a\1?){4}$ aaaaaaaaaaa n - - +^(a(?(1)\1)){4}$ aaaaaaaaaa y $1 aaaa +^(a(?(1)\1)){4}$ aaaaaaaaa n - - +^(a(?(1)\1)){4}$ aaaaaaaaaaa n - - +((a{4})+) aaaaaaaaa y $1 aaaaaaaa +(((aa){2})+) aaaaaaaaaa y $1 aaaaaaaa +(((a{2}){2})+) aaaaaaaaaa y $1 aaaaaaaa +(?:(f)(o)(o)|(b)(a)(r))* foobar y $1:$2:$3:$4:$5:$6 f:o:o:b:a:r +(?<=a)b ab y $& b +(?<=a)b cb n - - +(?<=a)b b n - - +(?a+)ab aaab n - - +(?>a+)b aaab y - - +([[:]+) a:[b]: yi $1 :[ Java and ICU dont escape [[xyz +([[=]+) a=[b]= yi $1 =[ Java and ICU dont escape [[xyz +([[.]+) a.[b]. yi $1 .[ Java and ICU dont escape [[xyz +[a[:xyz: - c - Unmatched [ +[a[:xyz:] - c - POSIX class [:xyz:] unknown +[a[:]b[:c] abc yi $& abc Java and ICU embedded [ is nested set +([a[:xyz:]b]+) pbaq c - POSIX class [:xyz:] unknown +[a[:]b[:c] abc iy $& abc Java and ICU embedded [ is nested set +([[:alpha:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd +([[:alnum:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy +([[:ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- ${nulnul} +([[:cntrl:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${nulnul} +([[:digit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 01 +([[:graph:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- +([[:lower:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 cd +([[:print:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- +([[:punct:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 __-- +([[:space:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 +([[:word:]]+) ABcd01Xy__-- ${nulnul}${ffff} yi $1 ABcd01Xy__ +([[:upper:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 AB +([[:xdigit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01 +([[:^alpha:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 01 +([[:^alnum:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 __-- ${nulnul}${ffff} +([[:^ascii:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${ffff} +([[:^cntrl:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- +([[:^digit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd +([[:^lower:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 AB +([[:^print:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ${nulnul}${ffff} +([[:^punct:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy +([[:^space:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 ABcd01Xy__-- +([[:^word:]]+) ABcd01Xy__-- ${nulnul}${ffff} yi $1 -- ${nulnul}${ffff} +([[:^upper:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 cd01 +([[:^xdigit:]]+) ABcd01Xy__-- ${nulnul}${ffff} y $1 Xy__-- ${nulnul}${ffff} +[[:foo:]] - c - POSIX class [:foo:] unknown +[[:^foo:]] - c - POSIX class [:^foo:] unknown +((?>a+)b) aaab y $1 aaab +(?>(a+))b aaab y $1 aaa +((?>[^()]+)|\([^()]*\))+ ((abc(ade)ufh()()x y $& abc(ade)ufh()()x +(?<=x+)y - c - Variable length lookbehind not implemented +a{37,17} - c - Can't do {n,m} with n > m +\Z a\nb\n y $-[0] 3 +\z a\nb\n y $-[0] 4 +$ a\nb\n y $-[0] 3 +\Z b\na\n y $-[0] 3 +\z b\na\n y $-[0] 4 +$ b\na\n y $-[0] 3 +\Z b\na y $-[0] 3 +\z b\na y $-[0] 3 +$ b\na y $-[0] 3 +'\Z'm a\nb\n y $-[0] 3 +'\z'm a\nb\n y $-[0] 4 +'$'m a\nb\n y $-[0] 1 +'\Z'm b\na\n y $-[0] 3 +'\z'm b\na\n y $-[0] 4 +'$'m b\na\n y $-[0] 1 +'\Z'm b\na y $-[0] 3 +'\z'm b\na y $-[0] 3 +'$'m b\na y $-[0] 1 +a\Z a\nb\n n - - +a\z a\nb\n n - - +a$ a\nb\n n - - +a\Z b\na\n y $-[0] 2 +a\z b\na\n n - - +a$ b\na\n y $-[0] 2 +a\Z b\na y $-[0] 2 +a\z b\na y $-[0] 2 +a$ b\na y $-[0] 2 +'a\Z'm a\nb\n n - - +'a\z'm a\nb\n n - - +'a$'m a\nb\n y $-[0] 0 +'a\Z'm b\na\n y $-[0] 2 +'a\z'm b\na\n n - - +'a$'m b\na\n y $-[0] 2 +'a\Z'm b\na y $-[0] 2 +'a\z'm b\na y $-[0] 2 +'a$'m b\na y $-[0] 2 +aa\Z aa\nb\n n - - +aa\z aa\nb\n n - - +aa$ aa\nb\n n - - +aa\Z b\naa\n y $-[0] 2 +aa\z b\naa\n n - - +aa$ b\naa\n y $-[0] 2 +aa\Z b\naa y $-[0] 2 +aa\z b\naa y $-[0] 2 +aa$ b\naa y $-[0] 2 +'aa\Z'm aa\nb\n n - - +'aa\z'm aa\nb\n n - - +'aa$'m aa\nb\n y $-[0] 0 +'aa\Z'm b\naa\n y $-[0] 2 +'aa\z'm b\naa\n n - - +'aa$'m b\naa\n y $-[0] 2 +'aa\Z'm b\naa y $-[0] 2 +'aa\z'm b\naa y $-[0] 2 +'aa$'m b\naa y $-[0] 2 +aa\Z ac\nb\n n - - +aa\z ac\nb\n n - - +aa$ ac\nb\n n - - +aa\Z b\nac\n n - - +aa\z b\nac\n n - - +aa$ b\nac\n n - - +aa\Z b\nac n - - +aa\z b\nac n - - +aa$ b\nac n - - +'aa\Z'm ac\nb\n n - - +'aa\z'm ac\nb\n n - - +'aa$'m ac\nb\n n - - +'aa\Z'm b\nac\n n - - +'aa\z'm b\nac\n n - - +'aa$'m b\nac\n n - - +'aa\Z'm b\nac n - - +'aa\z'm b\nac n - - +'aa$'m b\nac n - - +aa\Z ca\nb\n n - - +aa\z ca\nb\n n - - +aa$ ca\nb\n n - - +aa\Z b\nca\n n - - +aa\z b\nca\n n - - +aa$ b\nca\n n - - +aa\Z b\nca n - - +aa\z b\nca n - - +aa$ b\nca n - - +'aa\Z'm ca\nb\n n - - +'aa\z'm ca\nb\n n - - +'aa$'m ca\nb\n n - - +'aa\Z'm b\nca\n n - - +'aa\z'm b\nca\n n - - +'aa$'m b\nca\n n - - +'aa\Z'm b\nca n - - +'aa\z'm b\nca n - - +'aa$'m b\nca n - - +ab\Z ab\nb\n n - - +ab\z ab\nb\n n - - +ab$ ab\nb\n n - - +ab\Z b\nab\n y $-[0] 2 +ab\z b\nab\n n - - +ab$ b\nab\n y $-[0] 2 +ab\Z b\nab y $-[0] 2 +ab\z b\nab y $-[0] 2 +ab$ b\nab y $-[0] 2 +'ab\Z'm ab\nb\n n - - +'ab\z'm ab\nb\n n - - +'ab$'m ab\nb\n y $-[0] 0 +'ab\Z'm b\nab\n y $-[0] 2 +'ab\z'm b\nab\n n - - +'ab$'m b\nab\n y $-[0] 2 +'ab\Z'm b\nab y $-[0] 2 +'ab\z'm b\nab y $-[0] 2 +'ab$'m b\nab y $-[0] 2 +ab\Z ac\nb\n n - - +ab\z ac\nb\n n - - +ab$ ac\nb\n n - - +ab\Z b\nac\n n - - +ab\z b\nac\n n - - +ab$ b\nac\n n - - +ab\Z b\nac n - - +ab\z b\nac n - - +ab$ b\nac n - - +'ab\Z'm ac\nb\n n - - +'ab\z'm ac\nb\n n - - +'ab$'m ac\nb\n n - - +'ab\Z'm b\nac\n n - - +'ab\z'm b\nac\n n - - +'ab$'m b\nac\n n - - +'ab\Z'm b\nac n - - +'ab\z'm b\nac n - - +'ab$'m b\nac n - - +ab\Z ca\nb\n n - - +ab\z ca\nb\n n - - +ab$ ca\nb\n n - - +ab\Z b\nca\n n - - +ab\z b\nca\n n - - +ab$ b\nca\n n - - +ab\Z b\nca n - - +ab\z b\nca n - - +ab$ b\nca n - - +'ab\Z'm ca\nb\n n - - +'ab\z'm ca\nb\n n - - +'ab$'m ca\nb\n n - - +'ab\Z'm b\nca\n n - - +'ab\z'm b\nca\n n - - +'ab$'m b\nca\n n - - +'ab\Z'm b\nca n - - +'ab\z'm b\nca n - - +'ab$'m b\nca n - - +abb\Z abb\nb\n n - - +abb\z abb\nb\n n - - +abb$ abb\nb\n n - - +abb\Z b\nabb\n y $-[0] 2 +abb\z b\nabb\n n - - +abb$ b\nabb\n y $-[0] 2 +abb\Z b\nabb y $-[0] 2 +abb\z b\nabb y $-[0] 2 +abb$ b\nabb y $-[0] 2 +'abb\Z'm abb\nb\n n - - +'abb\z'm abb\nb\n n - - +'abb$'m abb\nb\n y $-[0] 0 +'abb\Z'm b\nabb\n y $-[0] 2 +'abb\z'm b\nabb\n n - - +'abb$'m b\nabb\n y $-[0] 2 +'abb\Z'm b\nabb y $-[0] 2 +'abb\z'm b\nabb y $-[0] 2 +'abb$'m b\nabb y $-[0] 2 +abb\Z ac\nb\n n - - +abb\z ac\nb\n n - - +abb$ ac\nb\n n - - +abb\Z b\nac\n n - - +abb\z b\nac\n n - - +abb$ b\nac\n n - - +abb\Z b\nac n - - +abb\z b\nac n - - +abb$ b\nac n - - +'abb\Z'm ac\nb\n n - - +'abb\z'm ac\nb\n n - - +'abb$'m ac\nb\n n - - +'abb\Z'm b\nac\n n - - +'abb\z'm b\nac\n n - - +'abb$'m b\nac\n n - - +'abb\Z'm b\nac n - - +'abb\z'm b\nac n - - +'abb$'m b\nac n - - +abb\Z ca\nb\n n - - +abb\z ca\nb\n n - - +abb$ ca\nb\n n - - +abb\Z b\nca\n n - - +abb\z b\nca\n n - - +abb$ b\nca\n n - - +abb\Z b\nca n - - +abb\z b\nca n - - +abb$ b\nca n - - +'abb\Z'm ca\nb\n n - - +'abb\z'm ca\nb\n n - - +'abb$'m ca\nb\n n - - +'abb\Z'm b\nca\n n - - +'abb\z'm b\nca\n n - - +'abb$'m b\nca\n n - - +'abb\Z'm b\nca n - - +'abb\z'm b\nca n - - +'abb$'m b\nca n - - +(^|x)(c) ca y $2 c +a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz x n - - +a(?{$a=2;$b=3;($b)=$a})b yabz y $b 2 +round\(((?>[^()]+))\) _I(round(xs * sz),1) y $1 xs * sz +'((?x:.) )' x y $1- x - +'((?-x:.) )'x x y $1- x- +foo.bart foo.bart y - - +'^d[x][x][x]'m abcd\ndxxx y - - +.X(.+)+X bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa yi - - # TODO: ICU doesn't optimize on trailing literals in pattern. +.X(.+)+XX bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa yi - - +.XX(.+)+X bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa yi - - +.X(.+)+X bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ni - - +.X(.+)+XX bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ni - - +.XX(.+)+X bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ni - - +.X(.+)+[X] bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa yi - - +.X(.+)+[X][X] bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa yi - - +.XX(.+)+[X] bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa yi - - +.X(.+)+[X] bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ni - - +.X(.+)+[X][X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ni - - +.XX(.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ni - - +.[X](.+)+[X] bbbbXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa yi - - +.[X](.+)+[X][X] bbbbXcXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa yi - - +.[X][X](.+)+[X] bbbbXXcXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa yi - - +.[X](.+)+[X] bbbbXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ni - - +.[X](.+)+[X][X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ni - - +.[X][X](.+)+[X] bbbbXXXaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa ni - - +tt+$ xxxtt y - - +([a-\d]+) za-9z yi $1 a-9 +([\d-z]+) a0-za y $1 0-z +([\d-\s]+) a0- z y $1 0- +([a-[:digit:]]+) za-9z y $1 a-9 +([[:digit:]-z]+) =0-z= y $1 0-z +([[:digit:]-[:alpha:]]+) =0-z= iy $1 0-z Set difference in ICU +\GX.*X aaaXbX n - - +(\d+\.\d+) 3.1415926 y $1 3.1415926 +(\ba.{0,10}br) have a web browser y $1 a web br +'\.c(pp|xx|c)?$'i Changes n - - +'\.c(pp|xx|c)?$'i IO.c y - - +'(\.c(pp|xx|c)?$)'i IO.c y $1 .c +^([a-z]:) C:/ n - - +'^\S\s+aa$'m \nx aa y - - +(^|a)b ab y - - +^([ab]*?)(b)?(c)$ abac y -$2- -- +(\w)?(abc)\1b abcab n - - +^(?:.,){2}c a,b,c y - - +^(.,){2}c a,b,c y $1 b, +^(?:[^,]*,){2}c a,b,c y - - +^([^,]*,){2}c a,b,c y $1 b, +^([^,]*,){3}d aaa,b,c,d y $1 c, +^([^,]*,){3,}d aaa,b,c,d y $1 c, +^([^,]*,){0,3}d aaa,b,c,d y $1 c, +^([^,]{1,3},){3}d aaa,b,c,d y $1 c, +^([^,]{1,3},){3,}d aaa,b,c,d y $1 c, +^([^,]{1,3},){0,3}d aaa,b,c,d y $1 c, +^([^,]{1,},){3}d aaa,b,c,d y $1 c, +^([^,]{1,},){3,}d aaa,b,c,d y $1 c, +^([^,]{1,},){0,3}d aaa,b,c,d y $1 c, +^([^,]{0,3},){3}d aaa,b,c,d y $1 c, +^([^,]{0,3},){3,}d aaa,b,c,d y $1 c, +^([^,]{0,3},){0,3}d aaa,b,c,d y $1 c, +(?i) y - - +'(?!\A)x'm a\nxb\n y - - +^(a(b)?)+$ aba yi -$1-$2- -a-- Java disagrees. Not clear who is right. +'^.{9}abc.*\n'm 123\nabcabcabcabc\n y - - +^(a)?a$ a y -$1- -- +^(a)?(?(1)a|b)+$ a n - - +^(a\1?)(a\1?)(a\2?)(a\3?)$ aaaaaa y $1,$2,$3,$4 a,aa,a,aa +^(a\1?){4}$ aaaaaa y $1 aa +^(0+)?(?:x(1))? x1 y - - +^([0-9a-fA-F]+)(?:x([0-9a-fA-F]+)?)(?:x([0-9a-fA-F]+))? 012cxx0190 y - - +^(b+?|a){1,2}c bbbac y $1 a +^(b+?|a){1,2}c bbbbac y $1 a +\((\w\. \w+)\) cd. (A. Tw) y -$1- -A. Tw- +((?:aaaa|bbbb)cccc)? aaaacccc y - - +((?:aaaa|bbbb)cccc)? bbbbcccc y - - +(a)?(a)+ a y $1:$2 :a - +(ab)?(ab)+ ab y $1:$2 :ab - +(abc)?(abc)+ abc y $1:$2 :abc - +'b\s^'m a\nb\n n - - +\ba a y - - +^(a(??{"(?!)"})|(a)(?{1}))b ab yi $2 a # [ID 20010811.006] +ab(?i)cd AbCd n - - # [ID 20010809.023] +ab(?i)cd abCd y - - +(A|B)*(?(1)(CD)|(CD)) CD y $2-$3 -CD +(A|B)*(?(1)(CD)|(CD)) ABCD y $2-$3 CD- +(A|B)*?(?(1)(CD)|(CD)) CD y $2-$3 -CD # [ID 20010803.016] +(A|B)*?(?(1)(CD)|(CD)) ABCD y $2-$3 CD- +'^(o)(?!.*\1)'i Oo n - - +(.*)\d+\1 abc12bc y $1 bc +(?m:(foo\s*$)) foo\n bar y $1 foo +(.*)c abcd y $1 ab +(.*)(?=c) abcd y $1 ab +(.*)(?=c)c abcd yB $1 ab +(.*)(?=b|c) abcd y $1 ab +(.*)(?=b|c)c abcd y $1 ab +(.*)(?=c|b) abcd y $1 ab +(.*)(?=c|b)c abcd y $1 ab +(.*)(?=[bc]) abcd y $1 ab +(.*)(?=[bc])c abcd yB $1 ab +(.*)(?<=b) abcd y $1 ab +(.*)(?<=b)c abcd y $1 ab +(.*)(?<=b|c) abcd y $1 abc +(.*)(?<=b|c)c abcd y $1 ab +(.*)(?<=c|b) abcd y $1 abc +(.*)(?<=c|b)c abcd y $1 ab +(.*)(?<=[bc]) abcd y $1 abc +(.*)(?<=[bc])c abcd y $1 ab +(.*?)c abcd y $1 ab +(.*?)(?=c) abcd y $1 ab +(.*?)(?=c)c abcd yB $1 ab +(.*?)(?=b|c) abcd y $1 a +(.*?)(?=b|c)c abcd y $1 ab +(.*?)(?=c|b) abcd y $1 a +(.*?)(?=c|b)c abcd y $1 ab +(.*?)(?=[bc]) abcd y $1 a +(.*?)(?=[bc])c abcd yB $1 ab +(.*?)(?<=b) abcd y $1 ab +(.*?)(?<=b)c abcd y $1 ab +(.*?)(?<=b|c) abcd y $1 ab +(.*?)(?<=b|c)c abcd y $1 ab +(.*?)(?<=c|b) abcd y $1 ab +(.*?)(?<=c|b)c abcd y $1 ab +(.*?)(?<=[bc]) abcd y $1 ab +(.*?)(?<=[bc])c abcd y $1 ab +2(]*)?$\1 2 y $& 2 +(??{}) x yi - - diff --git a/go/mysql/icuregex/testdata/regextst.txt b/go/mysql/icuregex/testdata/regextst.txt new file mode 100644 index 00000000000..8d5d2c34a8e --- /dev/null +++ b/go/mysql/icuregex/testdata/regextst.txt @@ -0,0 +1,2793 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Copyright (c) 2001-2015 International Business Machines +# Corporation and others. All Rights Reserved. +# +# file: +# +# ICU regular expression test cases. +# +# format: one test case per line, +# = [# comment] +# = "" +# = "" +# the quotes on the pattern and match string can be " or ' or / +# = text, with the start and end of each +# capture group tagged with .... The overall match, +# if any, is group 0, as in <0>matched text +# A region can be specified with ... tags. +# Standard ICU unescape will be applied, allowing \u, \U, etc. to appear. +# +# = any combination of +# i case insensitive match +# x free spacing and comments +# s dot-matches-all mode +# m multi-line mode. +# ($ and ^ match at embedded new-lines) +# D Unix Lines mode (only recognize 0x0a as new-line) +# Q UREGEX_LITERAL flag. Entire pattern is literal string. +# v If icu configured without break iteration, this +# regex test pattern should not compile. +# e set the UREGEX_ERROR_ON_UNKNOWN_ESCAPES flag +# d dump the compiled pattern +# t trace operation of match engine. +# 2-9 a digit between 2 and 9, specifies the number of +# times to execute find(). The expected results are +# for the last find() in the sequence. +# G Only check match / no match. Do not check capture groups. +# E Pattern compilation error expected +# L Use LookingAt() rather than find() +# M Use matches() rather than find(). +# +# a Use non-Anchoring Bounds. +# b Use Transparent Bounds. +# The a and b options only make a difference if +# a region has been specified in the string. +# z|Z hitEnd was expected(z) or not expected (Z). +# With neither, hitEnd is not checked. +# y|Y Require End expected(y) or not expected (Y). +# +# White space must be present between the flags and the match string. +# + +# Look-ahead expressions +# +"(?!0{5})(\d{5})" "<0><1>00001zzzz" +"(?!0{5})(\d{5})z" "<0><1>00001zzzz" +"(?!0{5})(\d{5})(?!y)" "<0><1>00001zzzz" +"abc(?=def)" "<0>abcdef" +"(.*)(?=c)" "<0><1>abcdef" + +"(?:.*)(?=c)" "abcdef" +"(?:.*)(?=c)" b "<0>abcdef" # transparent bounds +"(?:.*)(?=c)" bM "<0>abcdef" # transparent bounds + +"(?:.*)(?=(c))" b "<0>ab<1>cdef" # Capture in look-ahead +"(?=(.)\1\1)\1" "abcc<0><1>dddefg" # Backrefs to look-ahead capture + +".(?!\p{L})" "abc<0>d " # Negated look-ahead +".(?!(\p{L}))" "abc<0>d " # Negated look-ahead, no capture + # visible outside of look-ahead +"and(?=roid)" L "<0>android" +"and(?=roid)" M "android" +"and(?=roid)" bM "<0>android" + +"and(?!roid)" L "<0>androix" +"and(?!roid)" L "android" + +"and(?!roid)" M "<0>android" # Opaque bounds +"and(?!roid)" bM "android" +"and(?!roid)" bM "<0>androix" + +# +# Negated Lookahead, various regions and region transparency +# +"abc(?!def)" "<0>abcxyz" +"abc(?!def)" "abcdef" +"abc(?!def)" "<0>abcdef" +"abc(?!def)" b "abcdef" +"abc(?!def)" b "<0>abcxyz" + +# +# Nested Lookahead / Behind +# +"one(?=(?:(?!).)*)" "<0>one stuff" +"one(?=(?:(?!).)*)" "one " + +# More nesting lookaround: pattern matches "qq" when not preceded by 'a' and followed by 'z' +"(?qqc" +"(?qqc" +"(?A<0>jk<2>B" +"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).." "ajkB" +"(?=(?<=(\p{Lu})(?=..(\p{Lu})))).." "Ajkb" + +# Nested lookaround cases from bug ICU-20564 +"(?<=(?<=((?=)){0}+))" "<0>abc" +"(?<=c(?<=c((?=c)){1}+))" "c<0><1>cc" + +# +# Anchoring Bounds +# +"^def$" "abc<0>defghi" # anchoring (default) bounds +"^def$" a "abcdefghi" # non-anchoring bounds +"^def" a "<0>defghi" # non-anchoring bounds +"def$" a "abc<0>def" # non-anchoring bounds + +"^.*$" m "<0>line 1\n line 2" +"^.*$" m2 "line 1\n<0> line 2" +"^.*$" m3 "line 1\n line 2" +"^.*$" m "li<0>ne 1\n line 2" # anchoring bounds +"^.*$" m2 "line 1\n line 2" # anchoring bounds +"^.*$" am "line 1\n line 2" # non-anchoring bounds +"^.*$" am "li\n<0>ne \n1\n line 2" # non-anchoring bounds + +# +# HitEnd and RequireEnd for new-lines just before end-of-input +# +"xyz$" yz "<0>xyz\n" +"xyz$" yz "<0>xyz\x{d}\x{a}" + +"xyz$" myz "<0>xyz" # multi-line mode +"xyz$" mYZ "<0>xyz\n" +"xyz$" mYZ "<0>xyz\r\n" +"xyz$" mYZ "<0>xyz\x{85}abcd" + +"xyz$" Yz "xyz\nx" +"xyz$" Yz "xyza" +"xyz$" yz "<0>xyz" + +# +# HitEnd +# +"abcd" Lz "a" +"abcd" Lz "ab" +"abcd" Lz "abc" +"abcd" LZ "<0>abcd" +"abcd" LZ "<0>abcde" +"abcd" LZ "abcx" +"abcd" LZ "abx" +"abcd" Lzi "a" +"abcd" Lzi "ab" +"abcd" Lzi "abc" +"abcd" LZi "<0>abcd" +"abcd" LZi "<0>abcde" +"abcd" LZi "abcx" +"abcd" LZi "abx" + +# +# All Unicode line endings recognized. +# 0a, 0b, 0c, 0d, 0x85, 0x2028, 0x2029 +# Multi-line and non-multiline mode take different paths, so repeated tests. +# +"^def$" mYZ "abc\x{a}<0>def\x{a}ghi" +"^def$" mYZ "abc\x{b}<0>def\x{b}ghi" +"^def$" mYZ "abc\x{c}<0>def\x{c}ghi" +"^def$" mYZ "abc\x{d}<0>def\x{d}ghi" +"^def$" mYZ "abc\x{85}<0>def\x{85}ghi" +"^def$" mYZ "abc\x{2028}<0>def\x{2028}ghi" +"^def$" mYZ "abc\x{2029}<0>def\x{2029}ghi" +"^def$" mYZ "abc\r\n<0>def\r\nghi" + +"^def$" yz "<0>def\x{a}" +"^def$" yz "<0>def\x{b}" +"^def$" yz "<0>def\x{c}" +"^def$" yz "<0>def\x{d}" +"^def$" yz "<0>def\x{85}" +"^def$" yz "<0>def\x{2028}" +"^def$" yz "<0>def\x{2029}" +"^def$" yz "<0>def\r\n" +"^def$" yz "<0>def" + + +# "^def$" "<0>def\x{2028" #TODO: should be an error of some sort. + +# +# UNIX_LINES mode +# +"abc$" D "<0>abc\n" +"abc$" D "abc\r" +"abc$" D "abc\u0085" +"a.b" D "<0>a\rb" +"a.b" D "a\nb" +"(?d)abc$" "<0>abc\n" +"(?d)abc$" "abc\r" +"abc$" mD "<0>abc\ndef" +"abc$" mD "abc\rdef" + +".*def" L "abc\r def xyz" # Normal mode, LookingAt() stops at \r +".*def" DL "<0>abc\r def xyz" # Unix Lines mode, \r not line end. +".*def" DL "abc\n def xyz" + +"(?d)a.b" "a\nb" +"(?d)a.b" "<0>a\rb" + +"^abc" m "xyz\r<0>abc" +"^abc" Dm "xyz\rabc" +"^abc" Dm "xyz\n<0>abc" + + + +# Capturing parens +".(..)." "<0>a<1>bcd" + ".*\A( +hello)" "<0><1> hello" +"(hello)|(goodbye)" "<0><1>hello" +"(hello)|(goodbye)" "<0><2>goodbye" +"abc( +( inner(X?) +) xyz)" "leading cruft <0>abc<1> <2> inner<3> xyz cruft" +"\s*([ixsmdt]*)([:letter:]*)" "<0> <1>d<2> " +"(a|b)c*d" "a<0><1>bcd" + +# Non-capturing parens (?: stuff). Groups, but does not capture. +"(?:abc)*(tail)" "<0>abcabcabc<1>tail" + +# Non-greedy *? quantifier +".*?(abc)" "<0> abx <1>abc abc abc abc" +".*(abc)" "<0> abx abc abc abc <1>abc" + +"((?:abc |xyz )*?)abc " "<0><1>xyz abc abc abc " +"((?:abc |xyz )*)abc " "<0><1>xyz abc abc abc " + +# Non-greedy +? quantifier +"(a+?)(a*)" "<0><1>a<2>aaaaaaaaaaaa" +"(a+)(a*)" "<0><1>aaaaaaaaaaaaa<2>" + +"((ab)+?)((ab)*)" "<0><1><2>ab<3>ababababab<4>ab" +"((ab)+)((ab)*)" "<0><1>abababababab<2>ab<3>" + +# Non-greedy ?? quantifier +"(ab)(ab)??(ab)??(ab)??(ab)??c" "<0><1>ab<4>ab<5>abc" + +# Unicode Properties as naked elements in a pattern +"\p{Lu}+" "here we go ... <0>ABC and no more." +"(\p{L}+)(\P{L}*?) (\p{Zs}*)" "7999<0><1>letters<2>4949%^&*( <3> " + +# \w and \W +"\w+" " $%^&*( <0>hello123%^&*(" +"\W+" "<0> $%^&*( hello123%^&*(" + +# \A match at beginning of input only. + ".*\Ahello" "<0>hello hello" + ".*hello" "<0>hello hello" +".*\Ahello" "stuff\nhello" # don't match after embedded new-line. + +# \b \B +# +".*?\b(.).*" "<0> $%^&*( <1>hello123%^&*()gxx" +"\ba\b" "-<0>a" +"\by\b" "xy" +"[ \b]" "<0>b" # in a set, \b is a literal b. + +# Finds first chars of up to 5 words +"(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?(?:.*?\b(\w))?" "<0><1>Tthe <2>qick <3>brown <4>fox" + +"H.*?((?:\B.)+)" "<0>H<1>ello " +".*?((?:\B.)+).*?((?:\B.)+).*?((?:\B.)+)" "<0>H<1>ello <2> g<3>oodbye " + +"(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?(?:.*?\b(.))?.*" "<0> \u0301 \u0301<1>A\u0302BC\u0303\u0304<2> \u0305 \u0306<3>X\u0307Y\u0308" + + +# +# Unicode word boundary mode +# +"(?w).*?\b" v "<0>hello, world" +"(?w).*?(\b.+?\b).*" v "<0><1> 123.45 " +"(?w).*?(\b\d.*?\b).*" v "<0> <1>123.45 " +".*?(\b.+?\b).*" "<0> <1>123.45 " +"(?w:.*?(\b\d.*?\b).*)" v "<0> <1>123.45 " +"(?w:.*?(\b.+?\b).*)" v "<0><1>don't " +"(?w:.+?(\b\S.+?\b).*)" v "<0> <1>don't " +"(?w:(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?)(\b.+?).*)" v "<0><1>.<2> <3>,<4>:<5>$<6>37,000.50<7> " + +# +# Unicode word boundaries with Regions +# +"(?w).*?\b" v "abc<0>defghi" +"(?w).*?\b" v2 "abcdef<0>ghi" +"(?w).*?\b" v3 "abcdefghi" +#"(?w).*?\b" vb "abc<0>defghi" # TODO: bug. Ticket 6073 +#"(?w).*?\b" vb2 "abcdefghi" + + + +# . does not match new-lines +"." "\u000a\u000d\u0085\u000c\u000b\u2028\u2029<0>X\u000aY" +"A." "A\u000a "# no match + +# \d for decimal digits +"\d*" "<0>0123456789\u0660\u06F9\u0969\u0A66\u17E2\uFF10\U0001D7CE\U0001D7FFnon-digits" +"\D+" "<0>non digits" +"\D*(\d*)(\D*)" "<0>non-digits<1>3456666<2>more non digits" + +# \Q...\E quote mode +"hel\Qlo, worl\Ed" "<0>hello, world" +"\Q$*^^(*)?\A\E(a*)" "<0>$*^^(*)?\\A<1>aaaaaaaaaaaaaaa" +"[abc\Q]\r\E]+" "<0>aaaccc]]]\\\\\\\r..." # \Q ... \E escape in a [set] + +# UREGEX_LITERAL - entire pattern is a literal string, no escapes recognized. +# Note that data strings in test cases still get escape processing. +"abc\an\r\E\\abcd\u0031bye" Q "lead<0>abc\\an\\r\\E\\\\abcd\\u0031byeextra" +"case insensitive \\ (l)iteral" Qi "stuff!! <0>cAsE InSenSiTiVE \\\\ (L)ITeral" + +# \S and \s space characters +"\s+" "not_space<0> \t \r \n \u3000 \u2004 \u2028 \u2029xyz" +"(\S+).*?(\S+).*" "<0><1>Not-spaces <2>more-non-spaces " + +# \X consume one Grapheme Cluster. +"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?" v "<0><1>A<2>B<3> <4>\r\n" +"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?" v "<0><1>A\u0301<2>\n<3>\u0305<4>a\u0302\u0303\u0304" +"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?" v "<0><1>\u1100\u1161\u11a8<2>\u115f\u11a2\u11f9" +"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?" v "<0><1>\u1100\uac01<2>\uac02<3>\uac03\u11b0" +"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?" v "<0><1>\u1100\u1101\uac02\u0301<2>\u1100" +# Regional indicator pairs are grapheme clusters +"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?" v "<0><1>\U0001f1e6\U0001f1e8<2>\U0001f1ea\U0001f1ff" +# Grapheme Break rule 9b: Prepend x +"(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?(\X)?" v "<0><1>\U000111C2x" + +# Grapheme clusters that straddle a match region. Matching is pinned to the region limits, +# giving boundaries inside grapheme clusters +"(\X)?(\X)?(\X)?" v "a\u0301<0><1>\u0301\u0301<2>z\u0302\u0302\u0302" +# Same as previous test case, but without the region limits. +"(\X)?(\X)?(\X)?" v "<0><1>a\u0301\u0301\u0301<2>z\u0302\u0302\u0302" + +# ^ matches only at beginning of line +".*^(Hello)" "<0><1>Hello Hello Hello Hello Goodbye" +".*(Hello)" "<0>Hello Hello Hello <1>Hello Goodbye" +".*^(Hello)" " Hello Hello Hello Hello Goodbye"# No Match + +# $ matches only at end of line, or before a newline preceding the end of line +".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye" +".*?(Goodbye)" ZY "<0>Hello <1>Goodbye Goodbye Goodbye" +".*?(Goodbye)$" z "Hello Goodbye> Goodbye Goodbye "# No Match + +".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye\n" +".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye\n" +".*?(Goodbye)$" zy "<0>Hello Goodbye Goodbye <1>Goodbye\r\n" +".*?(Goodbye)$" z "Hello Goodbye Goodbye Goodbye\n\n"# No Match + +# \Z matches at end of input, like $ with default flags. +".*?(Goodbye)\Z" zy "<0>Hello Goodbye Goodbye <1>Goodbye" +".*?(Goodbye)" ZY "<0>Hello <1>Goodbye Goodbye Goodbye" +".*?(Goodbye)\Z" z "Hello Goodbye> Goodbye Goodbye "# No Match +"here$" z "here\nthe end"# No Match + +".*?(Goodbye)\Z" "<0>Hello Goodbye Goodbye <1>Goodbye\n" +".*?(Goodbye)\Z" "<0>Hello Goodbye Goodbye <1>Goodbye\n" +".*?(Goodbye)\Z" "<0>Hello Goodbye Goodbye <1>Goodbye\r\n" +".*?(Goodbye)\Z" "Hello Goodbye Goodbye Goodbye\n\n"# No Match + +# \z matches only at the end of string. +# no special treatment of new lines. +# no dependencies on flag settings. +".*?(Goodbye)\z" zy "<0>Hello Goodbye Goodbye <1>Goodbye" +".*?(Goodbye)\z" z "Hello Goodbye Goodbye Goodbye "# No Match +"here$" z "here\nthe end"# No Match + +".*?(Goodbye)\z" z "Hello Goodbye Goodbye Goodbye\n"# No Match +".*?(Goodbye)\n\z" zy "<0>Hello Goodbye Goodbye <1>Goodbye\n" +"abc\z|def" ZY "abc<0>def" + +# (?# comment) doesn't muck up pattern +"Hello (?# this is a comment) world" " <0>Hello world..." + +# Check some implementation corner cases base on the way literal strings are compiled. +"A" "<0>A" +"AB" "<0>ABABABAB" +"AB+" "<0>ABBBA" +"AB+" "<0>ABABAB" +"ABC+" "<0>ABCABC" +"ABC+" "<0>ABCCCCABC" +"(?:ABC)+" "<0>ABCABCABCD" +"(?:ABC)DEF+" "<0>ABCDEFFFD" +"AB\.C\eD\u0666E" "<0>AB.C\u001BD\u0666EF" +"ab\Bde" "<0>abde" + +# loop breaking +"(a?)*" "<0><1>xyz" +"(a?)+" "<0><1>xyz" +"^(?:a?b?)*$" "a--" +"(x?)*xyz" "<0>xx<1>xyz" # Sligthtly weird, but correct. The "last" time through (x?), + # it matches the empty string. + +# Set expressions, basic operators and escapes work +# +"[\d]+" "<0>0123abc/.," +"[^\d]+" "0123<0>abc/.," +"[\D]+" "0123<0>abc/.," +"[^\D]+" "<0>0123abc/.," + +"[\s]+" "<0> \tabc/.," +"[^\s]+" " \t<0>abc/.," +"[\S]+" " \t<0>abc/.," +"[^\S]+" "<0> \tabc/.," + +"[\w]+" "<0>abc123 .,;" +"[^\w]+" "abc123<0> .,;" +"[\W]+" "abc123<0> .,;" +"[^\W]+" "<0>abc123 .,;" + +"[\z]+" "abc<0>zzzdef" # \z has no special meaning +"[^\z]+" "<0>abczzzdef" +"[\^]+" "abc<0>^^" +"[^\^]+" "<0>abc^^" + +"[\u0041c]+" "<0>AcAcdef" +"[\U00010002]+" "<0>\ud800\udc02\U00010003" +"[^\U00010002]+" "<0>Hello\x{10002}" +"[\x61b]+" "<0>ababcde" +#"[\x6z]+" "\x06" #TODO: single hex digits should fail +"[\x{9}\x{75}\x{6d6}\x{6ba6}\x{6146B}\x{10ffe3}]+" "<0>\u0009\u0075\u06d6\u6ba6\U0006146B\U0010ffe3abc" + +"[\N{LATIN CAPITAL LETTER TONE SIX}ab\N{VARIATION SELECTOR-70} ]+" "x<0> \u0184\U000E0135 abc" +"[\N{LATIN SMALL LETTER C}-\N{LATIN SMALL LETTER F}]+" "ab<0>cdefghi" + + + +# +# [set expressions], check the precedence of '-', '&', '--', '&&' +# '-' and '&', for compatibility with ICU UnicodeSet, have the same +# precedence as the implicit Union between adjacent items. +# '--' and '&&', for compatibility with Java, have lower precedence than +# the implicit Union operations. '--' and '&&' themselves +# have the same precedence, and group left to right. +# +"[[a-m]-[f-w]p]+" "<0>depfgwxyz" +"[^[a-m]-[f-w]p]+" "dep<0>fgwxyz" + +"[[a-m]--[f-w]p]+" "<0>depfgwxyz" +"[^[a-m]--[f-w]p]+" "de<0>pfgwxyz" + +"[[a-m]&[e-s]w]+" "<0>efmwadnst" +"[^[a-m]&[e-s]w]+" "efmw<0>adnst" + +"[[a-m]&[e-s]]+" "<0>efmadnst" + + + +# {min,max} iteration qualifier +"A{3}BC" "<0>AAABC" + +"(ABC){2,3}AB" "no matchAB" +"(ABC){2,3}AB" "ABCAB" +"(ABC){2,3}AB" "<0>ABC<1>ABCAB" +"(ABC){2,3}AB" "<0>ABCABC<1>ABCAB" +"(ABC){2,3}AB" "<0>ABCABC<1>ABCABCAB" + +"(ABC){2}AB" "ABCAB" +"(ABC){2}AB" "<0>ABC<1>ABCAB" +"(ABC){2}AB" "<0>ABC<1>ABCABCAB" +"(ABC){2}AB" "<0>ABC<1>ABCABCABCAB" + +"(ABC){2,}AB" "ABCAB" +"(ABC){2,}AB" "<0>ABC<1>ABCAB" +"(ABC){2,}AB" "<0>ABCABC<1>ABCAB" +"(ABC){2,}AB" "<0>ABCABCABC<1>ABCAB" + +"X{0,0}ABC" "<0>ABC" +"X{0,1}ABC" "<0>ABC" + +"(?:Hello(!{1,3}) there){1}" "Hello there" +"(?:Hello(!{1,3}) there){1}" "<0>Hello<1>! there" +"(?:Hello(!{1,3}) there){1}" "<0>Hello<1>!! there" +"(?:Hello(!{1,3}) there){1}" "<0>Hello<1>!!! there" +"(?:Hello(!{1,3}) there){1}" "Hello!!!! there" + +# Nongreedy {min,max}? intervals +"(ABC){2,3}?AB" "no matchAB" +"(ABC){2,3}?AB" "ABCAB" +"(ABC){2,3}?AB" "<0>ABC<1>ABCAB" +"(ABC){2,3}?AB" "<0>ABC<1>ABCABCAB" +"(ABC){2,3}?AB" "<0>ABC<1>ABCABCABCAB" +"(ABC){2,3}?AX" "<0>ABCABC<1>ABCAX" +"(ABC){2,3}?AX" "ABC<0>ABCABC<1>ABCAX" + +# Possessive {min,max}+ intervals +"(ABC){2,3}+ABC" "ABCABCABC" +"(ABC){1,2}+ABC" "<0>ABC<1>ABCABC" +"(?:(.)\1){2,5}+." "<0>aabbcc<1>ddex" + + +# Atomic Grouping +"(?>.*)abc" "abcabcabc" # no match. .* consumed entire string. +"(?>(abc{2,4}?))(c*)" "<0><1>abcc<2>cccddd" +"(\.\d\d(?>[1-9]?))\d+" "1.625" +"(\.\d\d(?>[1-9]?))\d+" "1<0><1>.6250" + +# Possessive *+ +"(abc)*+a" "abcabcabc" +"(abc)*+a" "<0>abc<1>abcab" +"(a*b)*+a" "<0><1>aaaabaaaa" + +# Possessive ?+ +"c?+ddd" "<0>cddd" +"c?+cddd" "cddd" +"c?cddd" "<0>cddd" + +# Back Reference +"(?:ab(..)cd\1)*" "<0>ab23cd23ab<1>wwcdwwabxxcdyy" +"ab(?:c|(d?))(\1)" "<0>ab<1><2>c" +"ab(?:c|(d?))(\1)" "<0>ab<1>d<2>d" +"ab(?:c|(d?))(\1)" "<0>ab<1><2>e" +"ab(?:c|(d?))(\1)" "<0>ab<1><2>" + +# Back References that hit/don't hit end +"(abcd) \1" z "abcd abc" +"(abcd) \1" Z "<0><1>abcd abcd" +"(abcd) \1" Z "<0><1>abcd abcd " + +# Case Insensitive back references that hit/don't hit end. +"(abcd) \1" zi "abcd abc" +"(abcd) \1" Zi "<0><1>abcd ABCD" +"(abcd) \1" Zi "<0><1>abcd ABCD " + +# Back references that hit/don't hit boundary limits. + +"(abcd) \1" z "abcd abcd " +"(abcd) \1" Z "<0><1>abcd abcd " +"(abcd) \1" Z "<0><1>abcd abcd " + +"(abcd) \1" zi "abcd abcd " +"(abcd) \1" Zi "<0><1>abcd abcd " +"(abcd) \1" Zi "<0><1>abcd abcd " + +# Back reference that fails match near the end of input without actually hitting the end. +"(abcd) \1" ZL "abcd abd" +"(abcd) \1" ZLi "abcd abd" + +# Back reference to a zero-length match. They are always a successful match. +"ab(x?)cd(\1)ef" "<0>ab<1>cd<2>ef" +"ab(x?)cd(\1)ef" i "<0>ab<1>cd<2>ef" + +# Back refs to capture groups that didn't participate in the match. +"ab(?:(c)|(d))\1" "abde" +"ab(?:(c)|(d))\1" "<0>ab<1>cce" +"ab(?:(c)|(d))\1" i "abde" +"ab(?:(c)|(d))\1" i "<0>ab<1>cce" + +# Named back references +"(?abcd)\k" "<0><1>abcdabcd" +"(no)?(?abcd)\k" "<0><2>abcdabcd" + +"(?...)" E " " # backref names are ascii letters & numbers only" +"(?<1a>...)" E " " # backref names must begin with a letter" +"(?.)(?.)" E " " # Repeated names are illegal. + + +# Case Insensitive +"aBc" i "<0>ABC" +"a[^bc]d" i "ABD" +'((((((((((a))))))))))\10' i "<0><1><2><3><4><5><6><7><8><9><10>AA" + +"(?:(?i)a)b" "<0>Ab" +"ab(?i)cd" "<0>abCd" +"ab$cd" "abcd" + +"ssl" i "abc<0>ßlxyz" +"ssl" i "abc<0>ẞlxyz" +"FIND" i "can <0>find ?" # fi ligature, \ufb01 +"find" i "can <0>FIND ?" +"ῧ" i "xxx<0>ῧxxx" # Composed char (match string) decomposes when case-folded (pattern) + +# White space handling +"a b" "ab" +"abc " "abc" +"abc " "<0>abc " +"ab[cd e]z" "<0>ab z" +"ab\ c" "<0>ab c " +"ab c" "<0>ab c " +"ab c" x "ab c " +"ab\ c" x "<0>ab c " + +# +# Pattern Flags +# +"(?u)abc" "<0>abc" +"(?-u)abc" "<0>abc" + +# +# \c escapes (Control-whatever) +# +"\cA" "<0>\u0001" +"\ca" "<0>\u0001" +"\c\x" "<0>\u001cx" + + +#Multi-line mode +'b\s^' m "a\nb\n" +"(?m)^abc$" "abc \n abc\n<0>abc\nabc" +"(?m)^abc$" 2 "abc \n abc\nabc\n<0>abc" +"^abc$" 2 "abc \n abc\nabc\nabc" + +# Empty and full range +"[\u0000-\U0010ffff]+" "<0>abc\u0000\uffff\U00010000\U0010ffffzz" +"[^\u0000-\U0010ffff]" "abc\u0000\uffff\U00010000\U0010ffffzz" +"[^a--a]+" "<0>abc\u0000\uffff\U00010000\U0010ffffzz" + +# Free-spacing mode +"a b c # this is a comment" x "<0>abc " +'^a (?#xxx) (?#yyy) {3}c' x "<0>aaac" +"a b c [x y z]" x "abc " +"a b c [x y z]" x "a b c " +"a b c [x y z]" x "<0>abcxyz" +"a b c [x y z]" x "<0>abcyyz" + +# +# Look Behind +# +"(?<=a)b" "a<0>b" +"(.*)(?<=[bc])" "<0><1>abcd" +"(?<=(abc))def" "<1>abc<0>def" # lookbehind precedes main match. +"(?<=ab|abc)xyz" "abwxyz" # ab matches, but not far enough. +"(?<=abc)cde" "abcde" +"(?<=abc|ab)cde" "ab<0>cde" +"(?<=abc|ab)cde" "abc<0>cde" + +"(?<=bc?c?c?)cd" "ab<0>cd" +"(?<=bc?c?c?)cd" "abc<0>cd" +"(?<=bc?c?c?)cd" "abcc<0>cd" +"(?<=bc?c?c?)cd" "abccc<0>cd" +"(?<=bc?c?c?)cd" "abcccccd" +"(?<=bc?c?c?)c+d" "ab<0>cccccd" + +".*(?<=: ?)(\w*)" "<0>1:one 2: two 3:<1>three " + +# +# Named Characters +# +"a\N{LATIN SMALL LETTER B}c" "<0>abc" +"a\N{LATIN SMALL LETTER B}c" i "<0>abc" +"a\N{LATIN SMALL LETTER B}c" i "<0>aBc" +"a\N{LATIN SMALL LETTER B}c" "aBc" + +"\N{FULL STOP}*" "<0>...abc" + +"$" "abc<0>" + +# +# Optimizations of .* at end of patterns +# +"abc.*" "<0>abcdef" +"abc.*$" "<0>abcdef" +"abc(.*)" "<0>abc<1>def" +"abc(.*)" "<0>abc<1>" +"abc.*" "<0>abc\ndef" +"abc.*" s "<0>abc\ndef" +"abc.*$" s "<0>abc\ndef" +"abc.*$" "abc\ndef" +"abc.*$" m "<0>abc\ndef" +"abc.*\Z" m "abc\ndef" +"abc.*\Z" sm "<0>abc\ndef" + +"abc*" "<0>abcccd" +"abc*$" "<0>abccc" +"ab(?:ab[xyz]\s)*" "<0>ababy abx abc" + +"(?:(abc)|a)(?:bc)+" "<0>abc" +"(?:(abc)|a)(?:bc)*" "<0><1>abc" +"^[+\-]?[0-9]*\.?[0-9]*" "<0>123.456" + +"ab.+yz" "<0>abc12345xyzttt" +"ab.+yz" s "<0>abc12345xyzttt" + +"ab.+yz" "abc123\n45xyzttt" +"ab.+yz" s "<0>abc12\n345xyzttt" + +"ab[0-9]+yz" "---abyz+++" +"ab[0-9]+yz" "---<0>ab1yz+++" +"ab[0-9]+yz" "---<0>ab12yz+++" +"ab[0-9]+yz" "---<0>ab123456yz+++" + +"ab([0-9]+|[A-Z]+)yz" "---abyz+++" +"ab([0-9]+|[A-Z]+)yz" "---<0>ab<1>1yz+++" +"ab([0-9]+|[A-Z]+)yz" "---<0>ab<1>12yz+++" +"ab([0-9]+|[A-Z]+)yz" "---<0>ab<1>Ayz+++" +"ab([0-9]+|[A-Z]+)yz" "---<0>ab<1>AByz+++" +"ab([0-9]+|[A-Z]+)yz" "---<0>ab<1>ABCDEyz+++" + +# +# Hex format \x escaping +# +"ab\x63" "<0>abc" +"ab\x09w" "<0>ab\u0009w" +"ab\xabcdc" "<0>ab\u00abcdc" +"ab\x{abcd}c" "<0>ab\uabcdc" +"ab\x{101234}c" "<0>ab\U00101234c" +"abα" "<0>abα" + +# +# Octal Escaping. This conforms to Java conventions, not Perl. +"\0101\00\03\073\0154\01442" "<0>A\u0000\u0003\u003b\u006c\u0064\u0032" +"\0776" "<0>\u003f\u0036" # overflow, the 6 is literal. +"\0376xyz" "<0>\u00fexyz" +"\08" E "<0>\u00008" +"\0" E "x" + +# +# \u Surrogate Pairs +# +"\ud800\udc00" "<0>\U00010000" +"\ud800\udc00*" "<0>\U00010000\U00010000\U00010000\U00010001" +# TODO (Vitess): The next case has invalid UTF-8, so it's not supported right now for testing. It likely works in practice though! +# "\ud800\ud800\udc00" "<0>\ud800\U00010000\U00010000\U00010000\U00010001" +"(\ud800)(\udc00)" "\U00010000" +"\U00010001+" "<0>\U00010001\U00010001\udc01" + +# +# hitEnd with find() +# +"abc" Z "aa<0>abc abcab" +"abc" 2Z "aaabc <0>abcab" +"abc" 3z "aa>abc abcab" + +# +# \ escaping +# +"abc\jkl" "<0>abcjkl" # escape of a non-special letter is just itself. +"abc[ \j]kl" "<0>abcjkl" + +# +# \R all newline sequences. +# +"abc\Rxyz" "<0>abc\u000axyzgh" +"abc\Rxyz" "<0>abc\u000bxyzgh" +"abc\Rxyz" "<0>abc\u000cxyzgh" +"abc\Rxyz" "<0>abc\u000dxyzgh" +"abc\Rxyz" "<0>abc\u0085xyzgh" +"abc\Rxyz" "<0>abc\u2028xyzgh" +"abc\Rxyz" "<0>abc\u2029xyzgh" +"abc\Rxyz" "<0>abc\u000d\u000axyzgh" + +"abc\R\nxyz" "abc\u000d\u000axyzgh" # \R cannot match only the CR from a CR/LF sequence. +"abc\r\nxyz" "<0>abc\u000d\u000axyzgh" + +"abc\Rxyz" "abc\u0009xyz" # Assorted non-matches. +"abc\Rxyz" "abc\u000exyz" +"abc\Rxyz" "abc\u202axyz" + +# \v \V single character new line sequences. + +"abc\vxyz" "<0>abc\u000axyzgh" +"abc\vxyz" "<0>abc\u000bxyzgh" +"abc\vxyz" "<0>abc\u000cxyzgh" +"abc\vxyz" "<0>abc\u000dxyzgh" +"abc\vxyz" "<0>abc\u0085xyzgh" +"abc\vxyz" "<0>abc\u2028xyzgh" +"abc\vxyz" "<0>abc\u2029xyzgh" +"abc\vxyz" "abc\u000d\u000axyzgh" +"abc\vxyz" "abc?xyzgh" + +"abc[\v]xyz" "<0>abc\u000axyzgh" +"abc[\v]xyz" "<0>abc\u000bxyzgh" +"abc[\v]xyz" "<0>abc\u000cxyzgh" +"abc[\v]xyz" "<0>abc\u000dxyzgh" +"abc[\v]xyz" "<0>abc\u0085xyzgh" +"abc[\v]xyz" "<0>abc\u2028xyzgh" +"abc[\v]xyz" "<0>abc\u2029xyzgh" +"abc[\v]xyz" "abc\u000d\u000axyzgh" +"abc[\v]xyz" "abc?xyzgh" + +"abc\Vxyz" "abc\u000axyzgh" +"abc\Vxyz" "abc\u000bxyzgh" +"abc\Vxyz" "abc\u000cxyzgh" +"abc\Vxyz" "abc\u000dxyzgh" +"abc\Vxyz" "abc\u0085xyzgh" +"abc\Vxyz" "abc\u2028xyzgh" +"abc\Vxyz" "abc\u2029xyzgh" +"abc\Vxyz" "abc\u000d\u000axyzgh" +"abc\Vxyz" "<0>abc?xyzgh" + +# \h \H horizontal white space. Defined as gc=space_separator plus ascii tab + +"abc\hxyz" "<0>abc xyzgh" +"abc\Hxyz" "abc xyzgh" +"abc\hxyz" "<0>abc\u2003xyzgh" +"abc\Hxyz" "abc\u2003xyzgh" +"abc\hxyz" "<0>abc\u0009xyzgh" +"abc\Hxyz" "abc\u0009xyzgh" +"abc\hxyz" "abc?xyzgh" +"abc\Hxyz" "<0>abc?xyzgh" + +"abc[\h]xyz" "<0>abc xyzgh" +"abc[\H]xyz" "abc xyzgh" +"abc[\h]xyz" "<0>abc\u2003xyzgh" +"abc[\H]xyz" "abc\u2003xyzgh" +"abc[\h]xyz" "<0>abc\u0009xyzgh" +"abc[\H]xyz" "abc\u0009xyzgh" +"abc[\h]xyz" "abc?xyzgh" +"abc[\H]xyz" "<0>abc?xyzgh" + + +# +# Bug xxxx +# +"(?:\-|(\-?\d+\d\d\d))?(?:\-|\-(\d\d))?(?:\-|\-(\d\d))?(T)?(?:(\d\d):(\d\d):(\d\d)(\.\d+)?)?(?:(?:((?:\+|\-)\d\d):(\d\d))|(Z))?" MG "<0>-1234-21-31T41:51:61.789+71:81" + + +# +# A random, complex, meaningless pattern that should at least compile +# +"(?![^\\G)(?![^|\]\070\ne\{\t\[\053\?\\\x51\a\075\0023-\[&&[|\022-\xEA\00-\u41C2&&[^|a-\xCC&&[^\037\uECB3\u3D9A\x31\|\[^\016\r\{\,\uA29D\034\02[\02-\[|\t\056\uF599\x62\e\<\032\uF0AC\0026\0205Q\|\\\06\0164[|\057-\u7A98&&[\061-g|\|\0276\n\042\011\e\xE8\x64B\04\u6D0EDW^\p{Lower}]]]]?)(?<=[^\n\\\t\u8E13\,\0114\u656E\xA5\]&&[\03-\026|\uF39D\01\{i\u3BC2\u14FE]])(?<=[^|\uAE62\054H\|\}&&^\p{Space}])(?sxx)(?<=[\f\006\a\r\xB4]{1,5})|(?x-xd:^{5}+)()" "<0>abc" + + +# +# Bug 3225 + +"1|9" "<0>1" +"1|9" "<0>9" +"1*|9" "<0>1" +"1*|9" "<0>9" + +"(?:a|ac)d" "<0>acd" +"a|ac" "<0>ac" + +# +# Bug 3320 +# +"(a([^ ]+)){0,} (c)" "<0><1>a<2>b <3>c " +"(a([^ ]+))* (c)" "<0><1>a<2>b <3>c " + +# +# Bug 3436 +# +"(.*?) *$" "<0><1>test " + +# +# Bug 4034 +# +"\D" "<0>ABC\u00ffDEF" +"\d" "ABC\u00ffDEF" +"\D" "<0>\u00ffDEF" +"\d" "\u00ffDEF" +"\D" "123<0>\u00ffDEF" +"\D" "<0>\u0100DEF" +"\D" "123<0>\u0100DEF" + +# +#bug 4024, new line sequence handling +# +"(?m)^" "<0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" +"(?m)^" 2 "AA\u000d\u000a<0>BB\u000d\u000aCC\u000d\u000a" +"(?m)^" 3 "AA\u000d\u000aBB\u000d\u000a<0>CC\u000d\u000a" +"(?m)^" 4 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +"(?m)$" "AA<0>\u000d\u000aBB\u000d\u000aCC\u000d\u000a" +"(?m)$" 2 "AA\u000d\u000aBB<0>\u000d\u000aCC\u000d\u000a" +"(?m)$" 3 "AA\u000d\u000aBB\u000d\u000aCC<0>\u000d\u000a" +"(?m)$" 4 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a<0>" +"(?m)$" 5 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +"$" "AA\u000d\u000aBB\u000d\u000aCC<0>\u000d\u000a" +"$" 2 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a<0>" +"$" 3 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +"$" "\u000a\u0000a<0>\u000a" +"$" 2 "\u000a\u0000a\u000a<0>" +"$" 3 "\u000a\u0000a\u000a" + +"$" "<0>" +"$" 2 "" + +"$" "<0>\u000a" +"$" 2 "\u000a<0>" +"$" 3 "\u000a" + +"^" "<0>" +"^" 2 "" + +"\Z" "<0>" +"\Z" 2 "" +"\Z" 2 "\u000a<0>" +"\Z" "<0>\u000d\u000a" +"\Z" 2 "\u000d\u000a<0>" + + +# No matching ^ at interior new-lines if not in multi-line mode. +"^" "<0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" +"^" 2 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +# +# Dot-matches-any mode, and stopping at new-lines if off. +# +"." "<0>123\u000aXYZ" +"." 2 "1<0>23\u000aXYZ" +"." 3 "12<0>3\u000aXYZ" +"." 4 "123\u000a<0>XYZ" # . doesn't match newlines +"." 4 "123\u000b<0>XYZ" +"." 4 "123\u000c<0>XYZ" +"." 4 "123\u000d<0>XYZ" +"." 4 "123\u000d\u000a<0>XYZ" +"." 4 "123\u0085<0>XYZ" +"." 4 "123\u2028<0>XYZ" +"." 4 "123\u2029<0>XYZ" +"." 4s "123<0>\u000aXYZ" # . matches any +"." 4s "123<0>\u000bXYZ" +"." 4s "123<0>\u000cXYZ" +"." 4s "123<0>\u000dXYZ" +"." 4s "123<0>\u000d\u000aXYZ" +"." 4s "123<0>\u0085XYZ" +"." 4s "123<0>\u2028XYZ" +"." 4s "123<0>\u2029XYZ" +".{6}" "123\u000a\u000dXYZ" +".{6}" s "<0>123\u000a\u000dXY" + + +# +# Ranges +# +".*" "abc<0>defghi" +"a" "aaa<0>aaaaaa" +"a" 2 "aaaa<0>aaaaa" +"a" 3 "aaaaa<0>aaaa" +"a" 4 "aaaaaaaaa" +"a" "aaa<0>aaaaaa" + +# +# [set] parsing, systematically run through all of the parser states. +# +# +"[def]+" "abc<0>ddeeffghi" # set-open +"[^def]+" "<0>abcdefghi" +"[:digit:]+" "abc<0>123def" +"[:^digit:]+" "<0>abc123def" +"[\u005edef]+" "abc<0>de^fghi" + +"[]]+" "abc<0>]]][def" # set-open2 +"[^]]+" "<0>abc]]][def" + +"[:Lu:]+" "abc<0>ABCdef" # set-posix +"[:Lu]+" "abc<0>uL::Lu" +"[:^Lu]+" "abc<0>uL:^:Lu" +"[:]+" "abc<0>:::def" +"[:whats this:]" E " " +"[--]+" dE "-------" + +"[[nested]]+" "xyz[<0>nnetsteed]abc" #set-start +"[\x{41}]+" "CB<0>AAZYX" +"[\[\]\\]+" "&*<0>[]\\..." +"[*({<]+" "^&<0>{{(<<*)))" + + +"[-def]+" "abc<0>def-ef-dxyz" # set-start-dash +"[abc[--def]]" E " " + +"[x[&def]]+" "abc<0>def&ghi" # set-start-amp +"[&& is bad at start]" E " " + +"[abc" E " " # set-after-lit +"[def]]" "abcdef" +"[def]]" "abcde<0>f]]" + +"[[def][ghi]]+" "abc]<0>defghi[xyz" # set-after-set +"[[def]ghi]+" "abc]<0>defghi[xyz" +"[[[[[[[[[[[abc]" E " " +"[[abc]\p{Lu}]+" "def<0>abcABCxyz" + +"[d-f]+" "abc<0>defghi" # set-after-range +"[d-f[x-z]]+" "abc<0>defxyzzzgw" +"[\s\d]+" "abc<0> 123def" +"[d-f\d]+" "abc<0>def123ghi" +"[d-fr-t]+" "abc<0>defrstuvw" + +"[abc--]" E " " # set-after-op +"[[def]&&]" E " " +"[-abcd---]+" "<0>abc--" #[-abcd]--[-] +"[&abcd&&&ac]+" "b<0>ac&&cad" #[&abcd]&&[&ac] + +"[[abcd]&[ac]]+" "b<0>acacd" # set-set-amp +"[[abcd]&&[ac]]+" "b<0>acacd" +"[[abcd]&&ac]+" "b<0>acacd" +"[[abcd]&ac]+" "<0>bacacd&&&" + +"[abcd&[ac]]+" "<0>bacacd&&&" #set-lit-amp +"[abcd&&[ac]]+" "b<0>acacd" +"[abcd&&ac]+" "b<0>acacd" + +"[[abcd]-[ac]]+" "a<0>bdbdc" # set-set-dash +"[[abcd]--[ac]]+" "a<0>bdbdc" +"[[abcd]--ac]+" "a<0>bdbdc" +"[[abcd]-ac]+" "<0>bacacd---" + +"[a-d--[b-c]]+" "b<0>adadc" # set-range-dash +"[a-d--b-c]+" "b<0>adadc" +"[a-d-[b-c]]+" "<0>bad-adc" +"[a-d-b-c]+" "<0>bad-adc" +"[\w--[b-c]]+" "b<0>adadc" +"[\w--b-c]+" "b<0>adadc" +"[\w-[b-c]]+" "<0>bad-adc" +"[\w-b-c]+" "<0>bad-adc" + +"[a-d&&[b-c]]+" "a<0>bcbcd" # set-range-amp +"[a-d&&b-c]+" "a<0>bcbcd" +"[a-d&[b-c]]+" "<0>abc&bcd" +"[a-d&b-c]+" "<0>abc&bcd" + +"[abcd--bc]+" "b<0>addac" # set-lit-dash +"[abcd--[bc]]+" "b<0>addac" +"[abcd-[bc]]+" "<0>bad--dacxyz" +"[abcd-]+" "<0>bad--dacxyz" + +"[abcd-\s]+" E "xyz<0>abcd --xyz" # set-lit-dash-esc +"[abcd-\N{LATIN SMALL LETTER G}]+" "xyz-<0>abcdefghij-" +"[bcd-\{]+" "a<0>bcdefyz{|}" + +"[\p{Ll}]+" "ABC<0>abc^&*&" # set-escape +"[\P{Ll}]+" "abc<0>ABC^&*&xyz" +"[\N{LATIN SMALL LETTER Q}]+" "mnop<0>qqqrst" +"[\sa]+" "cb<0>a a (*&" +"[\S]+" " <0>hello " +"[\w]+" " <0>hello_world! " +"[\W]+" "a<0> *$%#,hello " +"[\d]+" "abc<0>123def" +"[\D]+" "123<0>abc567" +"[\$\#]+" "123<0>$#$#\\" + +# +# Try each of the Java compatibility properties. +# These are checked here, while normal Unicode properties aren't, because +# these Java compatibility properties are implemented directly by regexp, while other +# properties are handled by ICU's Property and UnicodeSet APIs. +# +# These tests are only to verify that the names are recognized and the +# implementation isn't dead. They are not intended to verify that the +# function definitions are 100% correct. +# +"[:InBasic Latin:]+" "ΓΔΕΖΗΘ<0>hello, world.ニヌネノハバパ" +"[:^InBasic Latin:]+" "<0>ΓΔΕΖΗΘhello, world.ニヌネノハバパ" +"\p{InBasicLatin}+" "ΓΔΕΖΗΘ<0>hello, world.ニヌネノハバパ" +"\P{InBasicLatin}+" "<0>ΓΔΕΖΗΘhello, world.ニヌネノハバパ" +"\p{InGreek}+" "<0>ΓΔΕΖΗΘhello, world.ニヌネノハバパ" +"\p{InCombining Marks for Symbols}" "<0>\u20d0" +"\p{Incombiningmarksforsymbols}" "<0>\u20d0" + + +"\p{javaDefined}+" "\uffff<0>abcd\U00045678" +"\p{javaDigit}+" "abc<0>1234xyz" +"\p{javaIdentifierIgnorable}+" "abc<0>\u0000\u000e\u009fxyz" +"\p{javaISOControl}+" "abc<0>\u0000\u000d\u0083xyz" +"\p{javaJavaIdentifierPart}+" "#@!<0>abc123_$;" +"\p{javaJavaIdentifierStart}+" "123\u0301<0>abc$_%^&" +"\p{javaLetter}+" "123<0>abcDEF&*()(" +"\p{javaLetterOrDigit}+" "$%^&*<0>123abcகஙசஜஞ☺♘♚☔☎♬⚄⚡" +"\p{javaLowerCase}+" "ABC<0>def&^%#:=" +"\p{javaMirrored}+" "ab$%<0>(){}[]xyz" +"\p{javaSpaceChar}+" "abc<0> \u00a0\u2028!@#" +"\p{javaSupplementaryCodePoint}+" "abc\uffff<0>\U00010000\U0010ffff\u0000" +"\p{javaTitleCase}+" "abCE<0>Džῌᾨ123" +"\p{javaUnicodeIdentifierStart}+" "123<0>abcⅣ%^&&*" +"\p{javaUnicodeIdentifierPart}+" "%&&^<0>abc123\u0301\u0002..." +"\p{javaUpperCase}+" "abc<0>ABC123" +"\p{javaValidCodePoint}+" "<0>\u0000abc\ud800 unpaired \udfff |\U0010ffff" +"\p{javaWhitespace}+" "abc\u00a0\u2007\u202f<0> \u0009\u001c\u001f\u202842" +"\p{all}+" "<0>123\u0000\U0010ffff" +"\P{all}+" "123\u0000\U0010ffff" + +# [:word:] is implemented directly by regexp. Not a java compat property, but PCRE and others. + +"[:word:]+" ".??$<0>abc123ΓΔΕΖΗ_%%%" +"\P{WORD}+" "<0>.??$abc123ΓΔΕΖΗ_%%%" + +# +# Errors on unrecognized ASCII letter escape sequences. +# +"[abc\Y]+" "<0>abcY" +"[abc\Y]+" eE "<0>abcY" + +"(?:a|b|c|\Y)+" "<0>abcY" +"(?:a|b|c|\Y)+" eE "<0>abcY" + +"\Q\Y\E" e "<0>\\Y" + +# +# Reported problem +# +"[a-\w]" E "x" + +# +# Bug 4045 +# +"A*" "<0>AAAA" +"A*" 2 "AAAA<0>" +"A*" 3 "AAAA" +"A*" 4 "AAAA" +"A*" 5 "AAAA" +"A*" 6 "AAAA" +"A*" "<0>" +"A*" 2 "" +"A*" 3 "" +"A*" 4 "" +"A*" 5 "" + +# +# Bug 4046 +# +"(?m)^" "<0>AA\u000dBB\u000dCC\u000d" +"(?m)^" 2 "AA\u000d<0>BB\u000dCC\u000d" +"(?m)^" 3 "AA\u000dBB\u000d<0>CC\u000d" +"(?m)^" 4 "AA\u000dBB\u000dCC\u000d" +"(?m)^" 5 "AA\u000dBB\u000dCC\u000d" +"(?m)^" 6 "AA\u000dBB\u000dCC\u000d" + +"(?m)^" "<0>AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" +"(?m)^" 2 "AA\u000d\u000a<0>BB\u000d\u000aCC\u000d\u000a" +"(?m)^" 3 "AA\u000d\u000aBB\u000d\u000a<0>CC\u000d\u000a" +"(?m)^" 4 "AA\u000d\u000aBB\u000d\u000aCC\u000d\u000a" + +# +# Bug 4059 +# +"\w+" "<0>イチロー" +"\b....\b." "<0>イチロー?" + + +# +# Bug 4058 ICU Unicode Set patterns have an odd feature - +# A $ as the last character before the close bracket means match +# a \uffff, which means off the end of the string in transliterators. +# Didn't make sense for regular expressions, and is now fixed. +# +"[\$](P|C|D);" "<0>$<1>P;" +"[$](P|C|D);" "<0>$<1>P;" +"[$$](P|C|D);" "<0>$<1>P;" + +# +# bug 4888 Flag settings lost in some cases. +# +"((a){2})|(#)" is "no" +"((a){2})|(#)" is "<0><1>a<2>a#" +"((a){2})|(#)" is "a<0><3>#" + +"((a|b){2})|c" is "<0>c" +"((a|b){2})|c" is "<0>C" +"((a|b){2})|c" s "C" + +# +# bug 5617 ZWJ \u200d shouldn't cause word boundaries +# +".+?\b" "<0> \u0935\u0915\u094D\u200D\u0924\u0947 " +".+?\b" 2 " <0>\u0935\u0915\u094D\u200D\u0924\u0947 " +".+?\b" 3 " \u0935\u0915\u094D\u200D\u0924\u0947 " + +# +# bug 5386 "^.*$" should match empty input +# +"^.*$" "<0>" +"^.*$" m "<0>" +"^.*$" "<0>\n" +"(?s)^.*$" "<0>\n" + +# +# bug 5386 Empty pattern and empty input should match. +# +"" "<0>abc" +"" "<0>" + +# +# bug 5386 Range upper and lower bounds can be equal +# +"[a-a]" "<0>a" + +# +# bug 5386 $* should not fail, should match empty string. +# +"$*" "<0>abc" + +# +# bug 5386 \Q ... \E escaping problem +# +"[a-z\Q-$\E]+" "QE<0>abc-def$." + +# More reported 5386 Java comaptibility failures +# +"[^]*abb]*" "<0>kkkk" +"\xa" "huh" # Java would like to be warned. +"^.*$" "<0>" + +# +# bug 5386 Empty left alternation should produce a zero length match. +# +"|a" "<0>a" +"$|ab" "<0>ab" +"$|ba" "ab<0>" + +# +# bug 5386 Java compatibility for set expressions +# +"[a-z&&[cde]]+" "ab<0>cdefg" + +# +# bug 6019 matches() needs to backtrack and check for a longer match if the +# first match(es) found don't match the entire input. +# +"a?|b" "<0>b" +"a?|b" M "<0>b" +"a?|.*?u|stuff|d" M "<0>stuff" +"a?|.*?(u)|stuff|d" M "<0>stuff<1>u" +"a+?" "<0>aaaaaaaaaaaaa" +"a+?" M "<0>aaaaaaaaaaaaa" + +# +# Bug 7724. Expression to validate zip codes. +# +"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "<0><1>94040<2>-3344" +"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "94040-0000" +"(?!0{5})(\d{5})(?!-?0{4})(-?\d{4})?" "00000-3344" + +# +# Bug 8666. Assertion failure on match, bad operand to JMP_SAV_X opcode. +# +"((.??)+|A)*" "<0><1><2>AAAAABBBBBCCCCCDDDDEEEEE" + +# +# Bug 8826. Incorrect results with case insensitive matches. +# +"AS(X)" i "aßx" +"AS.*" i "aßx" # Expansion of sharp s can't split between pattern terms. +"ASßS" i "<0>aßß" # All one literal string, does match. +"ASß{1}S" i "aßß" # Pattern with terms, no match. +"aßx" i "<0>assx" +"aßx" i "<0>ASSX" +"aßx" i "<0>aßx" +"ASS(.)" i "<0>aß<1>x" + +# Case Insensitive, probe some corner cases. +"ass+" i "aß" # Second 's' in pattern is qualified, can't combine with first. +"as+" i "aß" +"aßs" i "as" # Can't match half of a ß +"aß+" i "<0>asssssssss" +"aß+" i "<0>assßSssSSSs" +"a(ß?)+" i "<0>assssssss<1>s" +"a(ß?)+" i "<0>a<1>zzzzzzzzs" + +"\U00010400" i "<0>\U00010428" # case folded supplemental code point. + +"sstuff" i "<0>ßtuff" # exercise optimizations on what chars can start a match. +"sstuff" i "s<0>ßtuff" # exercise optimizations on what chars can start a match. +"ßtuff" i "s<0>sstuff" +"ßtuff" i "s<0>Sstuff" + +"a(..)\1" i "<0>A<1>bcBCdef" +"(ß)\1" i "aa<0><1>ssßzz" # Case insensitive back reference +"..(.)\1" i "<0>aa<1>ßss" +"ab(..)\1" i "xx<0>ab<1>ssßss" + +" (ss) ((\1.*)|(.*))" i "<0> <1>ss <2><4>sß" # The back reference 'ss' must not match in 'sß' + +# Bug 9057 +# \u200c and \u200d should be word characters. +# +"\w+" " <0>abc\u200cdef\u200dghi " +"\w+" i " <0>abc\u200cdef\u200dghi " +"[\w]+" " <0>abc\u200cdef\u200dghi " +"[\w]+" i " <0>abc\u200cdef\u200dghi " + +# Bug 9283 +# uregex_open fails for look-behind assertion + case-insensitive + +"(ab)?(?<=ab)cd|ef" i "<0><1>abcd" + +# Bug 9719 Loop breaking on (zero length match){3,} (unlimited upper bound). +# + +"(?:abc){1,}abc" "<0>abcabcabcabcabc" +"(?:2*){2,}?a2\z" "<0>2a2" +"(?:2*){2,}?a2\z" "2a3" +"(?:x?+){3,}+yz" "w<0>yz" +"(2*){2,}?a2\\z" "2a3" +"(2*){2,}?a2\\z" "<0>2<1>a2\\z" +"(2*){2,}?a2\z" "<0>2<1>a2" + + +# Bug 10024 +# Incorrect (unbounded) longest match length with {1, 20} style quantifiers. +# Unbounded match is disallowed in look-behind expressions. +# Max match length is used to limit where to check for look-behind matches. + +"(?<=a{1,5})bc" "aaaa<0>bcdef" +"(?<=(?:aa){3,20})bc" "aaaaaa<0>bcdef" +"(?jkl" +"(?<=a{11})bc" "aaaaaaaaaaa<0>bc" +"(?<=a{11})bc" "aaaaaaaaaabc" +"(?<=a{1,})bc" E "aaaa<0>bcdef" # U_REGEX_LOOK_BEHIND_LIMIT error. +"(?<=(?:){11})bc" "<0>bc" # Empty (?:) expression. + +# Bug 10835 +# Match Start Set not being correctly computed for case insensitive patterns. +# (Test here is to dump the compiled pattern & manually check the start set.) + +"(private|secret|confidential|classified|restricted)" i "hmm, <0><1>Classified stuff" +"(private|secret|confidential|classified|restricted)" "hmm, Classified stuff" + +# Bug 10844 + +"^([\w\d:]+)$" "<0><1>DiesIst1Beispiel:text" +"^([\w\d:]+)$" i "<0><1>DiesIst1Beispiel:text" +"^(\w+\d\w+:\w+)$" "<0><1>DiesIst1Beispiel:text" +"^(\w+\d\w+:\w+)$" i "<0><1>DiesIst1Beispiel:text" + +# Bug 11049 +# Edge cases in find() when pattern match begins with set of code points +# and the match begins at the end of the string. + +"A|B|C" "hello <0>A" +"A|B|C" "hello \U00011234" +"A|B|\U00012345" "hello <0>\U00012345" +"A|B|\U00010000" "hello \ud800" + +# Bug 11369 +# Incorrect optimization of patterns with a zero length quantifier {0} + +"(.|b)(|b){0}\$(?#xxx){3}(?>\D*)" "AAAAABBBBBCCCCCDDDDEEEEE" +"(|b)ab(c)" "<0><1>ab<2>c" +"(|b){0}a{3}(D*)" "<0>aaa<2>" +"(|b){0,1}a{3}(D*)" "<0><1>aaa<2>" +"((|b){0})a{3}(D*)" "<0><1>aaa<3>" + +# Bug 11370 +# Max match length computation of look-behind expression gives result that is too big to fit in the +# in the 24 bit operand portion of the compiled code. Expressions should fail to compile +# (Look-behind match length must be bounded. This case is treated as unbounded, an error.) + +"(?pre<1>\ud800post\ud800 fin" +"pre(.)post\1" i "pre\ud800post\ud800\udc00" # case insensiteve backrefs take a different code path +"pre(.)post\1" i "<0>pre<1>\ud800post\ud800 fin" + +# Bug 11554 +# +# Maximum match length computation was assuming UTF-16. +# Used in look-behind matches to constrain how far back to look. + +"(?<=a\x{100000})spam" "***a\x{100000}<0>spam**" +"(?<=aą)spam" "**aą<0>spam**" +"(?<=ąabc)spam" "**ąabc<0>spam**" + +"(?<=a\x{100000})spam" "***a\x{100001}spam**" +"(?<=aą)spam" "**bąspam**" +"(?<=ąabc)spam" "**ąabxspam**" + +# with negative look-behind + +"(?spam**" +"(?spam**" +"(?spam**" + +# Bug #12930 +# +# Minimum Match Length computation, int32_t overflow on an empty set in the pattern. +# The empty set, with no match possible, has a min match length of INT32_MAX. +# Was incremented subsequently. Caused assertion failure on pattern compile. + +"[^\u0000-\U0010ffff]bc?" "bc no match" +"[^\u0000-\U0010ffff]?bc?" "<0>bc has a match" + +# Bug #12160 Hit End behavior after find fails to find. +# To match Java, should be true if find fails to find. +# +"abc" Z "<0>abc abc abc xyz" +"abc" Z2 "abc <0>abc abc xyz" +"abc" Z3 "abc abc <0>abc xyz" +"abc" z4 "abc abc abc xyz" + +# Bug #13844 Verify that non-standard Java property names are recognized. +"[\p{IsAlphabetic}]" " <0>A" +"[\P{IsAlphabetic}]" "A<0> " +"[\p{IsIdeographic}]" "A<0>〆" +"[\P{IsIdeographic}]" "〆<0>A" +"[\p{IsLetter}]" " <0>A" +"[\P{IsLetter}]" "A<0> " +"[\p{Letter}]" " <0>A" +"[\p{IsLowercase}]" "A<0>a" +"[\P{IsLowercase}]" "a<0>A" +"[\p{IsUppercase}]" "a<0>A" +"[\P{IsUppercase}]" "A<0>a" +"[\p{IsTitlecase}]" "D<0>Dz" +"[\P{IsTitlecase}]" "Dz<0>D" +"[\p{IsPunctuation}]" " <0>&" +"[\P{IsPunctuation}]" "&<0> " +"[\p{IsControl}]" " <0>\x{82}" +"[\P{IsControl}]" "\x{82}<0> " +"[\p{IsWhite_Space}]" "x<0> " +"[\P{IsWhite_Space}]" " <0>x" +"[\p{IsDigit}]" " <0>4" +"[\P{IsDigit}]" "4<0> " +"[\p{IsHex_Digit}]" " <0>F" +"[\P{IsHex_Digit}]" "F<0> " +"[\p{IsJoin_Control}]" " <0>\x{200d}" +"[\P{IsJoin_Control}]" "\x{200d}<0> " +"[\p{IsNoncharacter_Code_Point}]" "A<0>\x{5fffe}" +"[\p{IsAssigned}]" "\x{10ffff}<0>a" +"[\P{IsAssigned}]" "a<0>\x{10ffff}" + +"[\p{InBasic Latin}]" "〆<0>A" +"[\p{InBasicLatin}]" "〆<0>A" +"[\p{InBasic-Latin}]" "〆<0>A" # ICU accepts '-'; Java does not. +"[\p{InBasic_Latin}]" "〆<0>A" +"[\p{Inbasiclatin}]" "〆<0>A" +"[\p{inbasiclatin}]" E "〆<0>A" # "In" must be cased as shown. Property name part is case insensitive. +"[\p{InCombining_Marks_for_Symbols}]" "a<0>\x{20DD}" # COMBINING ENCLOSING CIRCLE + +"[\p{all}]*" "<0>\x{00}abc\x{10ffff}" +"[\p{javaBadProperty}]" E "whatever" +"[\p{IsBadProperty}]" E "whatever" +"[\p{InBadBlock}]" E "whatever" +"[\p{In}]" E "whatever" +"[\p{Is}]" E "whatever" +"[\p{java}]" "x<0>ꦉ" # Note: "java" is a valid script code. + +"[\p{javaLowerCase}]+" "A<0>a" +"[\p{javaLowerCase}]+" i "<0>Aa" +"[\P{javaLowerCase}]+" "<0>Aa" +"[\P{javaLowerCase}]+" i "Aa" # No Match because case fold of the set happens first, then negation. + # JDK is not case insensitive w named properties, even though + # the insensitive match flag is set. A JDK bug? + +"[a-z]+" i "<0>Aa" # Matches JDK behavior. +"[^a-z]+" i "Aa" # (no match) which is JDK behavior. Case fold first, then negation. + +# Bug 20385. Assertion failure while compiling a negative look-behind expression consisting of a set with +# no contents. Meaning the [set] can never match. There is no syntax to directly express +# an empty set, so generate it by negating (^) a set of all code points. +# Also check empty sets in other contexts. + +"(?abc" + +"(?abc" +"x(?xabc" +"x(?xabc" +"x(?xabc" + +"[^\u0000-\U0010ffff]" "a" +"[^[^\u0000-\U0010ffff]]" "<0>a" + +"This is a string with (?:one |two |three )endings" "<0>This is a string with two endings" + +# Bug ICU-20544. Similar to 20385, above. Assertion failure with a negative look-behind assertion containing +# a set with no contents. Look-behind pattern includes more than just the empty set. + +"(?abc" # note: first 'ⰿ' is \u2c3f, hence empty set. +"(?abc" +"(?<=[^[^]]†)" "abc" # Problem also exists w positive look-behind + +# Bug ICU-20391. Crash in computation of minimum match length with nested look-around patterns. +# +"(?<=(?<=((?=)){0}+)" E "aaa" +"(?<=(?<=((?=)){0}+))" "<0>" +"(?<=c(?<=b((?=a)){1}+))" "aaa" +"abc(?=de(?=f))...g" "<0>abcdefg" +"abc(?=de(?=f))...g" "abcdxfg" + +# Bug ICU-20618 Assertion failure with nested look-around expressions. +# +"(?<=(?<=b?(?=a)))" "hello, world." + +# Bug ICU-20939 +# Incorrect word \b boundaries w UTF-8 input and non-ASCII text +# +"(?w)\b" v2 "äää<0> äää" + +# Bug ICU-21492 Assertion failure with nested look-around expressions. +# +"(?<=(?:(?<=(?:(?<=(?:(?<=)){2})){3})){4}" E "<0>" # orig failure from bug report, w mismatched parens. +"(?:(?<=(?:(?<=)){2}))" "<0>" # Simplified case, with a valid pattern. + +# Random debugging, Temporary +# + +# +# Regexps from http://www.regexlib.com +# +"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" G "<0>G1 1AA" +"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" G "<0>EH10 2QQ" +"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" G "<0>SW1 1ZZ" +"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" "G111 1AA" +"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" "X10 WW" +"^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$" "DDD 5WW" +#"^[\w\-]+(?:\.[\w\-]+)*@(?:[\w\-]+\.)+[a-zA-Z]{2,7}$" dG "<0>joe.tillis@unit.army.mil" # TODO: \w in pattern +#"^[\w-]+(?:\.[\w-]+)*@(?:[\w-]+\.)+[a-zA-Z]{2,7}$" G "<0>jack_rabbit@slims.com" # TODO: \w in pattern +#"^[\w-]+(?:\.[\w-]+)*@(?:[\w-]+\.)+[a-zA-Z]{2,7}$" G "<0>foo99@foo.co.uk" # TODO: \w in pattern +#"^[\w-]+(?:\.[\w-]+)*@(?:[\w-]+\.)+[a-zA-Z]{2,7}$" "find_the_mistake.@foo.org" # TODO: \w in pattern +#"^[\w-]+(?:\.[\w-]+)*@(?:[\w-]+\.)+[a-zA-Z]{2,7}$" ".prefix.@some.net" +"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" G "<0>asmith@mactec.com" +"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" G "<0>foo12@foo.edu" +"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" G "<0>bob.smith@foo.tv" +"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" "joe" +"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" "@foo.com" +"^([a-zA-Z0-9_\-\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([a-zA-Z0-9\-]+\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\]?)$" "a@a" +"^\d{1,2}\/\d{1,2}\/\d{4}$" G "<0>4/1/2001" +"^\d{1,2}\/\d{1,2}\/\d{4}$" G "<0>12/12/2001" +"^\d{1,2}\/\d{1,2}\/\d{4}$" G "<0>55/5/3434" +"^\d{1,2}\/\d{1,2}\/\d{4}$" "1/1/01" +"^\d{1,2}\/\d{1,2}\/\d{4}$" "12 Jan 01" +"^\d{1,2}\/\d{1,2}\/\d{4}$" "1-1-2001" +"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" G "<0>01.1.02" +"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" G "<0>11-30-2001" +"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" G "<0>2/29/2000" +"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" "02/29/01" +"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" "13/01/2002" +"^(?:(?:(?:0?[13578]|1[02])(\/|-|\.)31)\1|(?:(?:0?[1,3-9]|1[0-2])(\/|-|\.)(?:29|30)\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:0?2(\/|-|\.)29\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:(?:0?[1-9])|(?:1[0-2]))(\/|-|\.)(?:0?[1-9]|1\d|2[0-8])\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" "11/00/02" +"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$" G "<0>127.0.0.1" +"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$" G "<0>255.255.255.0" +"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$" G "<0>192.168.0.1" +"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$" "1200.5.4.3" +"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$" "abc.def.ghi.jkl" +"^(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])$" "255.foo.bar.1" +"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$" G "<0>COM1" +"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$" G "<0>AUX" +"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$" G "<0>LPT1" +"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$" "image.jpg" +"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$" "index.html" +"(AUX|PRN|NUL|COM\d|LPT\d)+\s*$" "readme.txt" +"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" G "<0>29/02/1972" +"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" G "<0>5-9-98" +"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" G "<0>10-11-2002" +"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" "29/02/2003" +"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" "12/13/2002" +"^(?:(?:31(\/|-|\.)(?:0?[13578]|1[02]))\1|(?:(?:29|30)(\/|-|\.)(?:0?[1,3-9]|1[0-2])\2))(?:(?:1[6-9]|[2-9]\d)?\d{2})$|^(?:29(\/|-|\.)0?2\3(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00))))$|^(?:0?[1-9]|1\d|2[0-8])(\/|-|\.)(?:(?:0?[1-9])|(?:1[0-2]))\4(?:(?:1[6-9]|[2-9]\d)?\d{2})$" "1-1-1500" +"^(user=([a-z0-9]+,)*(([a-z0-9]+){1});)?(group=([a-z0-9]+,)*(([a-z0-9]+){1});)?(level=[0-9]+;)?$" G "<0>user=foo,bar,quux;group=manager,admin;level=100;" +"^(user=([a-z0-9]+,)*(([a-z0-9]+){1});)?(group=([a-z0-9]+,)*(([a-z0-9]+){1});)?(level=[0-9]+;)?$" G "<0>group=nobody;level=24;" +"^(user=([a-z0-9]+,)*(([a-z0-9]+){1});)?(group=([a-z0-9]+,)*(([a-z0-9]+){1});)?(level=[0-9]+;)?$" "user=foo" +"^(user=([a-z0-9]+,)*(([a-z0-9]+){1});)?(group=([a-z0-9]+,)*(([a-z0-9]+){1});)?(level=[0-9]+;)?$" "blahh" +"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$" G "<0>(+44)(0)20-12341234" +"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$" G "<0>02012341234" +"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$" G "<0>+44 (0) 1234-1234" +"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$" "(44+)020-12341234" +"^(\(?\+?[0-9]*\)?)?[0-9_\- \(\)]*$" "12341234(+020)" +"\b(\w+)\s+\1\b" G "<0>Tell the the preacher" +"\b(\w+)\s+\1\b" G "<0>some some" +"\b(\w+)\s+\1\b" G "<0>hubba hubba" +"\b(\w+)\s+\1\b" "once an annual report" +"\b(\w+)\s+\1\b" "mandate dated submissions" +"\b(\w+)\s+\1\b" "Hubba hubba" +"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)" G "<0>+31235256677" +"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)" G "<0>+31(0)235256677" +"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)" G "<0>023-5256677" +"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)" "+3123525667788999" +"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)" "3123525667788" +"(^\+[0-9]{2}|^\+[0-9]{2}\(0\)|^\(\+[0-9]{2}\)\(0\)|^00[0-9]{2}|^0)([0-9]{9}$|[0-9\-\s]{10}$)" "232-2566778" +"^[-+]?\d*\.?\d*$" G "<0>123" +"^[-+]?\d*\.?\d*$" G "<0>+3.14159" +"^[-+]?\d*\.?\d*$" G "<0>-3.14159" +"^[-+]?\d*\.?\d*$" "abc" +"^[-+]?\d*\.?\d*$" "3.4.5" +"^[-+]?\d*\.?\d*$" "$99.95" +"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$" G "<0>$1,234.50" +"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$" G "<0>$0.70" +"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$" G "<0>.7" +"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$" "$0,123.50" +"^\$?([1-9]{1}[0-9]{0,2}(\,[0-9]{3})*(\.[0-9]{0,2})?|[1-9]{1}[0-9]{0,}(\.[0-9]{0,2})?|0(\.[0-9]{0,2})?|(\.[0-9]{1,2})?)$" "$00.5" +"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$" G "<0>AB123456D" +"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$" G "<0>AB123456F" +"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$" G "<0>AB123456M" +"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$" "AB123456E" +"^[A-Z]{2}[0-9]{6}[A-DFM]{1}$" "ab123456d" +#"(http|ftp|https):\/\/[\w]+(.[\w]+)([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?" G "<0>http://regxlib.com/Default.aspx" # TODO: \w in pattern +#"(http|ftp|https):\/\/[\w]+(.[\w]+)([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?" G "<0>http://electronics.cnet.com/electronics/0-6342366-8-8994967-1.html" # TODO: \w in pattern +#"(http|ftp|https):\/\/[\w]+(.[\w]+)([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?" "www.yahoo.com" # TODO: \w in pattern +"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$" G "<0>2034AK" +"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$" G "<0>2034 AK" +"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$" G "<0>2034 ak" +"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$" "2034 AK" +"^[0-9]{4}\s{0,1}[a-zA-Z]{2}$" "321321 AKSSAA" +"((\d{2})|(\d))\/((\d{2})|(\d))\/((\d{4})|(\d{2}))" G "<0>4/5/91" +"((\d{2})|(\d))\/((\d{2})|(\d))\/((\d{4})|(\d{2}))" G "<0>04/5/1991" +"((\d{2})|(\d))\/((\d{2})|(\d))\/((\d{4})|(\d{2}))" G "<0>4/05/89" +"((\d{2})|(\d))\/((\d{2})|(\d))\/((\d{4})|(\d{2}))" "4/5/1" +#"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}" G "<0>01/01/2001 " #TODO - \s in pattern. +"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}" G "<0>01-01-2001:" +"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}" G "<0>(1-1-01)" +"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}" "13/1/2001" +"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}" "1-32-2001" +"(^|\s|\()((([1-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-]((2[0-9]){1}|(3[01]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:])|(^|\s|\()((([0-9]){1}|([0][1-9]){1}|([1][012]){1}){1}[\/-](([11-31]){1}|([01][1-9]){1}|([1-9]){1}){1}[\/-](((19|20)([0-9][0-9]){1}|([0-9][0-9]){1})){1}(([\s|\)|:|$|\>])){1}){1}){1}){1}" "1-1-1801" +"^\d{3}\s?\d{3}$" G "<0>400 099" +"^\d{3}\s?\d{3}$" G "<0>400099" +"^\d{3}\s?\d{3}$" G "<0>400050" +"^\d{3}\s?\d{3}$" "2345678" +"^\d{3}\s?\d{3}$" "12345" +"^\d{3}\s?\d{3}$" "asdf" +"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$" G "<0>(111) 222-3333" +"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$" G "<0>1112223333" +"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$" G "<0>111-222-3333" +"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$" "11122223333" +"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$" "11112223333" +"^\D?(\d{3})\D?\D?(\d{3})\D?(\d{4})$" "11122233333" +"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$" G "<0>#00ccff" +"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$" G "<0>#039" +"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$" G "<0>ffffcc" +"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$" "blue" +"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$" "0x000000" +"^#?([a-f]|[A-F]|[0-9]){3}(([a-f]|[A-F]|[0-9]){3})?$" "#ff000" +"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$" G "<0>01:23:45:67:89:ab" +"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$" G "<0>01:23:45:67:89:AB" +"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$" G "<0>fE:dC:bA:98:76:54" +"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$" "01:23:45:67:89:ab:cd" +"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$" "01:23:45:67:89:Az" +"^([0-9a-fA-F][0-9a-fA-F]:){5}([0-9a-fA-F][0-9a-fA-F])$" "01:23:45:56:" +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" G "<0>http://www.blah.com/~joe" +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" G "<0>ftp://ftp.blah.co.uk:2828/blah%20blah.gif" +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" G "<0>https://blah.gov/blah-blah.as" +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" "www.blah.com" +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" "http://www.blah.com/I have spaces!" +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*$" "ftp://blah_underscore/[nope]" +"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" G "<0>12/01/2002" +"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" G "<0>12/01/2002 12:32:10" +"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" "32/12/2002" +"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" "12/13/2001" +"^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2})$|^(([0-2]\d|[3][0-1])\/([0]\d|[1][0-2])\/[2][0]\d{2}\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" "12/02/06" +"^[0-9](\.[0-9]+)?$" G "<0>1.2345" +"^[0-9](\.[0-9]+)?$" G "<0>0.00001" +"^[0-9](\.[0-9]+)?$" G "<0>7" +"^[0-9](\.[0-9]+)?$" "12.2" +"^[0-9](\.[0-9]+)?$" "1.10.1" +"^[0-9](\.[0-9]+)?$" "15.98" +"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$" G "<0>III" +"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$" G "<0>xiv" +"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$" G "<0>MCMLXLIX" +"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$" "iiV" +"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$" "MCCM" +"^(?:[mM]{1,3})?(?:(?:[cC][dDmM])|(?:[dD]?(?:[cC]{1,3})?))?[lL]?(([xX])(?:\2{1,2}|[lL]|[cC])?)?((([iI])((\5{1,2})|[vV]|[xX]|[lL])?)|([vV]?([iI]{1,3})?))?$" "XXXX" +"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$" G "<0>123" +"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$" G "<0>-123.35" +"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$" G "<0>-123.35e-2" +"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$" "abc" +"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$" "123.32e" +"^[-+]?[0-9]+[.]?[0-9]*([eE][-+]?[0-9]+)?$" "123.32.3" +"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$" G "<0>T.F. Johnson" +"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$" G "<0>John O'Neil" +"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$" G "<0>Mary-Kate Johnson" +"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$" "sam_johnson" +"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$" "Joe--Bob Jones" +"^[a-zA-Z]+(([\'\,\.\- ][a-zA-Z ])?[a-zA-Z]*)*$" "dfjsd0rd" +"^(20|21|22|23|[0-1]\d)[0-5]\d$" G "<0>1200" +"^(20|21|22|23|[0-1]\d)[0-5]\d$" G "<0>1645" +"^(20|21|22|23|[0-1]\d)[0-5]\d$" G "<0>2359" +"^(20|21|22|23|[0-1]\d)[0-5]\d$" "2400" +"^(20|21|22|23|[0-1]\d)[0-5]\d$" "asbc" +"^(20|21|22|23|[0-1]\d)[0-5]\d$" "12:45" +/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/ G '<0>' +/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/ G "<0>" +/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/ G "<0>" +/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/ "= img.jpg" +/<[^>]*\n?.*=("|')?(.*\.jpg)("|')?.*\n?[^<]*>/ "img.jpg" +"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$" G "<0>78754" +"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$" G "<0>78754-1234" +"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$" G "<0>G3H 6A3" +"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$" "78754-12aA" +"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$" "7875A" +"^(\d{5}-\d{4}|\d{5})$|^([a-zA-Z]\d[a-zA-Z] \d[a-zA-Z]\d)$" "g3h6a3" +#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$" G "<0>bob@somewhere.com" # TODO: \w in pattern +#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$" G "<0>bob.jones@[1.1.1.1]" +#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$" G "<0>bob@a.b.c.d.info" # TODO: \w in pattern +#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$" "bob@com" # TODO: \w in pattern +#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$" "bob.jones@some.where" # TODO: \w in pattern +#"^([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))$" "bob@1.1.1.123" # TODO: \w in pattern +#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$" G "<0>" # TODO: \w in pattern +#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$" G "<0>bob A. jones " # TODO: \w in pattern +#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$" G "<0>bob A. jones " # TODO: \w in pattern +#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$" "ab@cd.ef" # TODO: \w in pattern +#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$" ""bob A. jones " # TODO: \w in pattern +#"^(([-\w \.]+)|(""[-\w \.]+"") )?<([\w\-\.]+)@((\[([0-9]{1,3}\.){3}[0-9]{1,3}\])|(([\w\-]+\.)+)([a-zA-Z]{2,4}))>$" "bob A. jones " # TODO: \w in pattern +"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$" G "<0>SW112LE" +"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$" G "<0>SW11 2LE" +"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$" G "<0>CR05LE" +"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$" "12CR0LE" +"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$" "12CR 0LE" +"^[A-Za-z]{1,2}[0-9A-Za-z]{1,2}[ ]?[0-9]{0,1}[A-Za-z]{2}$" "SWLE05" +"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])" G "<0>2099-12-31T23:59:59" +"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])" G "<0>2002/02/09 16:30:00" +"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])" G "<0>2000-01-01T00:00:00" +"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])" "2000-13-31T00:00:00" +"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])" "2002/02/33 24:00:00" +"20\d{2}(-|\/)((0[1-9])|(1[0-2]))(-|\/)((0[1-9])|([1-2][0-9])|(3[0-1]))(T|\s)(([0-1][0-9])|(2[0-3])):([0-5][0-9]):([0-5][0-9])" "2000-01-01 60:00:00" +"^((?:4\d{3})|(?:5[1-5]\d{2})|(?:6011)|(?:3[68]\d{2})|(?:30[012345]\d))[ -]?(\d{4})[ -]?(\d{4})[ -]?(\d{4}|3[4,7]\d{13})$" G "<0>6011567812345678" +"^((?:4\d{3})|(?:5[1-5]\d{2})|(?:6011)|(?:3[68]\d{2})|(?:30[012345]\d))[ -]?(\d{4})[ -]?(\d{4})[ -]?(\d{4}|3[4,7]\d{13})$" G "<0>6011 5678 1234 5678" +"^((?:4\d{3})|(?:5[1-5]\d{2})|(?:6011)|(?:3[68]\d{2})|(?:30[012345]\d))[ -]?(\d{4})[ -]?(\d{4})[ -]?(\d{4}|3[4,7]\d{13})$" G "<0>6011-5678-1234-5678" +"^((?:4\d{3})|(?:5[1-5]\d{2})|(?:6011)|(?:3[68]\d{2})|(?:30[012345]\d))[ -]?(\d{4})[ -]?(\d{4})[ -]?(\d{4}|3[4,7]\d{13})$" "1234567890123456" +"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$" G "<0>01/01/2001" +"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$" G "<0>02/29/2002" +"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$" G "<0>12/31/2002" +"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$" "1/1/02" +"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$" "02/30/2002" +"^((((0[13578])|(1[02]))[\/]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\/]?(([0-2][0-9])|(30)))|(02[\/]?[0-2][0-9]))[\/]?\d{4}$" "1/25/2002" +#"^(?=[^\&])(?:(?[^:/?#]+):)?(?://(?[^/?#]*))?(?[^?#]*)(?:\?(?[^#]*))?(?:#(?.*))?" G "<0>http://regexlib.com/REDetails.aspx?regexp_id=x#Details" # out of context, can't work stand-alone +#"^(?=[^\&])(?:(?[^:/?#]+):)?(?://(?[^/?#]*))?(?[^?#]*)(?:\?(?[^#]*))?(?:#(?.*))?" "&" # out of context, can't work stand-alone +"^[-+]?\d+(\.\d+)?$" G "<0>123" +"^[-+]?\d+(\.\d+)?$" G "<0>-123.45" +"^[-+]?\d+(\.\d+)?$" G "<0>+123.56" +"^[-+]?\d+(\.\d+)?$" "123x" +"^[-+]?\d+(\.\d+)?$" ".123" +"^[-+]?\d+(\.\d+)?$" "-123." +"^(\d{4}[- ]){3}\d{4}|\d{16}$" G "<0>1234-1234-1234-1234" +"^(\d{4}[- ]){3}\d{4}|\d{16}$" G "<0>1234 1234 1234 1234" +"^(\d{4}[- ]){3}\d{4}|\d{16}$" G "<0>1234123412341234" +"^(\d{4}[- ]){3}\d{4}|\d{16}$" "Visa" +"^(\d{4}[- ]){3}\d{4}|\d{16}$" "1234" +"^(\d{4}[- ]){3}\d{4}|\d{16}$" "123-1234-12345" +"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$" G "<0>6011-1111-1111-1111" +"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$" G "<0>5423-1111-1111-1111" +"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$" G "<0>341111111111111" +"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$" "4111-111-111-111" +"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$" "3411-1111-1111-111" +"^((4\d{3})|(5[1-5]\d{2})|(6011))-?\d{4}-?\d{4}-?\d{4}|3[4,7]\d{13}$" "Visa" +"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$" G "<0>4D28C5AD-6482-41CD-B84E-4573F384BB5C" +"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$" G "<0>B1E1282C-A35C-4D5A-BF8B-7A3A51D9E388" +"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$" G "91036A4A-A0F4-43F0-8CD" +"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$" "{B1E1282C-A35C-4D3A-BF8B-7A3A51D9E388}" +"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$" "AAAAAAAAAAAAAAAAA" +"^[A-Z0-9]{8}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{4}-[A-Z0-9]{12}$" "B;E1282C-A35C-4D3A-BF8B-7A3A51D9E38" +"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))" G "<0>4111-1234-1234-1234" +"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))" G "<0>6011123412341234" +"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))" G "<0>3711-123456-12345" +"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))" "1234567890123456" +"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))" "4111-123-1234-1234" +"(^(4|5)\d{3}-?\d{4}-?\d{4}-?\d{4}|(4|5)\d{15})|(^(6011)-?\d{4}-?\d{4}-?\d{4}|(6011)-?\d{12})|(^((3\d{3}))-\d{6}-\d{5}|^((3\d{14})))" "412-1234-1234-1234" +#'\[link="(?((.|\n)*?))"\](?((.|\n)*?))\[\/link\]' G '<0>[link="http://www.yahoo.com"]Yahoo[/link]' #named capture +#'\[link="(?((.|\n)*?))"\](?((.|\n)*?))\[\/link\]' "[link]http://www.yahoo.com[/link]" #named capture +#'\[link="(?((.|\n)*?))"\](?((.|\n)*?))\[\/link\]' "[link=http://www.yahoo.com]Yahoo[/link]" #named capture +"^[a-zA-Z0-9]+$" G "<0>10a" +"^[a-zA-Z0-9]+$" G "<0>ABC" +"^[a-zA-Z0-9]+$" G "<0>A3fg" +"^[a-zA-Z0-9]+$" "45.3" +"^[a-zA-Z0-9]+$" "this or that" +"^[a-zA-Z0-9]+$" "$23" +"((\(\d{3}\) ?)|(\d{3}-))?\d{3}-\d{4}" G "<0>(123) 456-7890" +"((\(\d{3}\) ?)|(\d{3}-))?\d{3}-\d{4}" G "<0>123-456-7890" +"((\(\d{3}\) ?)|(\d{3}-))?\d{3}-\d{4}" "1234567890" +"^[a-zA-Z]\w{3,14}$" G "<0>abcd" +"^[a-zA-Z]\w{3,14}$" G "<0>aBc45DSD_sdf" +"^[a-zA-Z]\w{3,14}$" G "<0>password" +"^[a-zA-Z]\w{3,14}$" "afv" +"^[a-zA-Z]\w{3,14}$" "1234" +"^[a-zA-Z]\w{3,14}$" "reallylongpassword" +"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$" G "<0>G1 1AA " +"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$" G "<0>GIR 0AA" +"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$" G "<0>SW1 1ZZ" +"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$" "BT01 3RT" +"^[A-Z]{1,2}[1-9][0-9]?[A-Z]? [0-9][A-Z]{2,}|GIR 0AA$" "G111 1AA" +"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$" G "<0>03-6106666" +"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$" G "<0>036106666" +"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$" G "<0>02-5523344" +"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$" "00-6106666" +"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$" "03-0106666" +"^0[23489]{1}(\-)?[^0\D]{1}\d{6}$" "02-55812346" +"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$" G "<0>050-346634" +"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$" G "<0>058633633" +"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$" G "<0>064-228226" +"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$" "059-336622" +"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$" "064-022663" +"^0(5[012345678]|6[47]){1}(\-)?[^0\D]{1}\d{5}$" "0545454545" +"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}" G "<0>AA11 1AA" +"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}" G "<0>AA1A 1AA" +"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}" G "<0>A11-1AA" +"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}" "111 AAA" +"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}" "1AAA 1AA" +"^([A-Z]{1,2}[0-9]{1,2}|[A-Z]{3}|[A-Z]{1,2}[0-9][A-Z])( |-)[0-9][A-Z]{2}" "A1AA 1AA" +"@{2}((\S)+)@{2}" G "<0>@@test@@" +"@{2}((\S)+)@{2}" G "<0>@@name@@" +"@{2}((\S)+)@{2}" G "<0>@@2342@@" +"@{2}((\S)+)@{2}" "@test@" +"@{2}((\S)+)@{2}" "@@na me@@" +"@{2}((\S)+)@{2}" "@@ name@@" +"([0-1][0-9]|2[0-3]):[0-5][0-9]" G "<0>00:00" +"([0-1][0-9]|2[0-3]):[0-5][0-9]" G "<0>13:59" +"([0-1][0-9]|2[0-3]):[0-5][0-9]" G "<0>23:59" +"([0-1][0-9]|2[0-3]):[0-5][0-9]" "24:00" +"([0-1][0-9]|2[0-3]):[0-5][0-9]" "23:60" +"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$" G "<0>23" +"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$" G "<0>-17.e23" +"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$" G "<0>+.23e+2" +"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$" "+.e2" +"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$" "23.17.5" +"^[+-]?([0-9]*\.?[0-9]+|[0-9]+\.?[0-9]*)([eE][+-]?[0-9]+)?$" "10e2.0" +"^([1-zA-Z0-1@.\s ]{1,255})$" G "<0>email@email.com" +"^([1-zA-Z0-1@.\s ]{1,255})$" G "<0>My Name" +"^([1-zA-Z0-1@.\s ]{1,255})$" G "<0>asdf12df" +"^([1-zA-Z0-1@.\s ]{1,255})$" "‘,\*&$<>" +"^([1-zA-Z0-1@.\s ]{1,255})$" "1001' string" +"^((0[1-9])|(1[0-2]))\/(\d{4})$" G "<0>12/2002" +"^((0[1-9])|(1[0-2]))\/(\d{4})$" G "<0>11/1900" +"^((0[1-9])|(1[0-2]))\/(\d{4})$" G "<0>02/1977" +"^((0[1-9])|(1[0-2]))\/(\d{4})$" "1/1977" +"^((0[1-9])|(1[0-2]))\/(\d{4})$" "00/000" +"^((0[1-9])|(1[0-2]))\/(\d{4})$" "15/2002" +"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$" G "<0>(0 34 56) 34 56 67" +"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$" G "<0>(03 45) 5 67 67" +"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$" G "<0>(0 45) 2 33 45-45" +"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$" "(2345) 34 34" +"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$" "(0 56) 456 456" +"^\(\d{1,2}(\s\d{1,2}){1,2}\)\s(\d{1,2}(\s\d{1,2}){1,2})((-(\d{1,4})){0,1})$" "(3 45) 2 34-45678" +"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}" G "<0>Genesis 3:3-4,6" +"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}" G "<0>II Sam 2:11,2" +"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}" G "<0>2 Tim 3:16" +"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}" "Genesis chap 3, verse 3" +"(?:\d|I{1,3})?\s?\w{2,}\.?\s*\d{1,}\:\d{1,}-?,?\d{0,2}(?:,\d{0,2}){0,2}" "2nd Samuel 2" +"(\[[Ii][Mm][Gg]\])(\S+?)(\[\/[Ii][Mm][Gg]\])" G "<0>[IMG]http://bleh.jpg[/IMG]" +"(\[[Ii][Mm][Gg]\])(\S+?)(\[\/[Ii][Mm][Gg]\])" G "<0>[ImG]bleh[/imG]" +"(\[[Ii][Mm][Gg]\])(\S+?)(\[\/[Ii][Mm][Gg]\])" G "<0>[img]ftp://login:pass@bleh.gif[/img]" +"(\[[Ii][Mm][Gg]\])(\S+?)(\[\/[Ii][Mm][Gg]\])" '' +"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$" G "<0>10/03/1979" +"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$" G "<0>1-1-02" +"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$" G "<0>01.1.2003" +"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$" "10/03/197" +"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$" "01-02-003" +"^([0-9]{1,2})[./-]+([0-9]{1,2})[./-]+([0-9]{2}|[0-9]{4})$" "01 02 03" +#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$" G "<0>12345" # No Conditionals? +#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$" G "<0>12345-6789" # No Conditionals? +#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$" "00000" # No Conditionals? +#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$" "00000-0000" # No Conditionals? +#"^(?(^00000(|-0000))|(\d{5}(|-\d{4})))$" "a4650-465s" # No Conditionals? +"^((0?[1-9])|((1|2)[0-9])|30|31)$" G "<0>01" +"^((0?[1-9])|((1|2)[0-9])|30|31)$" G "<0>12" +"^((0?[1-9])|((1|2)[0-9])|30|31)$" G "<0>31" +"^((0?[1-9])|((1|2)[0-9])|30|31)$" "123" +"^((0?[1-9])|((1|2)[0-9])|30|31)$" "32" +"^((0?[1-9])|((1|2)[0-9])|30|31)$" "abc" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$" G "<0>1.222.333.1234" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$" G "<0>1-223-123-1232" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$" G "<0>12223334444" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$" "1.1.123123.123" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$" "12-1322-112-31" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?(\d{3}([\s\-./\\])?\d{4}|[a-zA-Z0-9]{7})$" "11231321131" +"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$" G "<0>DN3 6GB" +"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$" G "<0>SW42 4RG" +"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$" G "<0>GIR 0AA" +"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$" "SEW4 5TY" +"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$" "AA2C 4FG" +"^([A-PR-UWYZ0-9][A-HK-Y0-9][AEHMNPRTVXY0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2}|GIR 0AA)$" "AA2 4CV" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$" G "<0>asD1" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$" G "<0>asDF1234" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$" G "<0>ASPgo123" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$" "asdf" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$" "1234" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z]).{4,8}$" "ASDF12345" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))" G "<0>1.222.333.1234" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))" G "<0>1-223-123-1232" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))" G "<0>1-888-425-DELL" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))" "1.1.123123.123" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))" "12-1322-112-31" +"^([0-1]([\s\-./\\])?)?(\(?[2-9]\d{2}\)?|[2-9]\d{3})([\s\-./\\])?([0-9]{3}([\s\-./\\])?[0-9]{4}|[a-zA-Z0-9]{7}|([0-9]{3}[-][a-zA-Z0-9]{4}))" "1-800-CALL-DEL" +"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$" G "<0>09:00" +"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$" G "<0>9:00" +"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$" G "<0>11:35" +"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$" "13:00" +"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$" "9.00" +"^(([0]?[1-9]|1[0-2])(:)([0-5][0-9]))$" "6:60" +"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$" G "<0>1" +"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$" G "<0>108" +"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$" G "<0>255" +"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$" "01" +"^([1-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])$" "256" +"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$" G "<0>01/01/2001" +"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$" G "<0>1/01/2001" +"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$" G "<0>2002" +"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$" "2/30/2002" +"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$" "13/23/2002" +"^((((0[13578])|([13578])|(1[02]))[\/](([1-9])|([0-2][0-9])|(3[01])))|(((0[469])|([469])|(11))[\/](([1-9])|([0-2][0-9])|(30)))|((2|02)[\/](([1-9])|([0-2][0-9]))))[\/]\d{4}$|^\d{4}$" "12345" +"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$" G "<0>SP939393H" +"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$" G "<0>PX123456D" +"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$" G "<0>SW355667G" +"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$" "12SP9393H" +"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$" "S3P93930D" +"^[A-Za-z]{2}[0-9]{6}[A-Za-z]{1}$" "11223344SP00ddSS" +"(^0[78][2347][0-9]{7})" G "<0>0834128458" +"(^0[78][2347][0-9]{7})" G "<0>0749526308" +"(^0[78][2347][0-9]{7})" "0861212308" +"(^0[78][2347][0-9]{7})" "0892549851" +"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$" G "<0>C1406HHA" +"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$" G "<0>A4126AAB" +"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$" G "<0>c1406hha" +"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$" "c1406HHA" +"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$" "4126" +"^([A-HJ-TP-Z]{1}\d{4}[A-Z]{3}|[a-z]{1}\d{4}[a-hj-tp-z]{3})$" "C1406hha" +"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$" G "<0>66.129.71.120" +"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$" G "<0>207.46.230.218" +"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$" G "<0>64.58.76.225" +"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$" "10.0.5.4" +"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$" "192.168.0.1" +"^(((25[0-5]|2[0-4][0-9]|19[0-1]|19[3-9]|18[0-9]|17[0-1]|17[3-9]|1[0-6][0-9]|1[1-9]|[2-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9]))|(192\.(25[0-5]|2[0-4][0-9]|16[0-7]|169|1[0-5][0-9]|1[7-9][0-9]|[1-9][0-9]|[0-9]))|(172\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|1[0-5]|3[2-9]|[4-9][0-9]|[0-9])))\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])\.(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])$" "my ip address" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" G "<0>foo@foo.com" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" G "<0>foo@foo-foo.com.au" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" G "<0>foo@foo.foo.info" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" "foo@.com" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" "foo@foo..com" +"^([\w\d\-\.]+)@{1}(([\w\d\-]{1,67})|([\w\d\-]+\.[\w\d\-]{1,67}))\.(([a-zA-Z\d]{2,4})(\.[a-zA-Z\d]{2})?)$" "foo@me@.com" +"/\*[\d\D]*?\*/" G "<0>/* my comment */" +"/\*[\d\D]*?\*/" G "<0>/* my multiline comment */" +"/\*[\d\D]*?\*/" G "<0>/* my nested comment */" +"/\*[\d\D]*?\*/" "*/ anything here /*" +"/\*[\d\D]*?\*/" "anything between 2 separate comments" +"/\*[\d\D]*?\*/" "\* *\\" +"/\*[\p{N}\P{N}]*?\*/" G "<0>/* my comment */" +"/\*[\p{N}\P{N}]*?\*/" G "<0>/* my multiline comment */" +"/\*[\p{N}\P{N}]*?\*/" G "<0>/* my nested comment */" +"/\*[\p{N}\P{N}]*?\*/" "*/ anything here /*" +"/\*[\p{N}\P{N}]*?\*/" "anything between 2 separate comments" +"/\*[\p{N}\P{N}]*?\*/" "\* *\\" +"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))" G "<0>1/31/2002" +"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))" G "<0>04-30-02" +"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))" G "<0>12-01/2002" +"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))" "2/31/2002" +"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))" "13/0/02" +"((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((\d{4})|(\d{2}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((\d{4}|\d{2})))" "Jan 1, 2001" +'^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$' G "<0>blah@[10.0.0.1]" +'^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$' G "<0>a@b.c" +'^(([^<>;()\[\]\\.,;:@"]+(\.[^<>()\[\]\\.,;:@"]+)*)|(".+"))@((([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))\.)*(([a-z]([-a-z0-9]*[a-z0-9])?)|(#[0-9]+)|(\[((([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\.){3}(([01]?[0-9]{0,2})|(2(([0-4][0-9])|(5[0-5]))))\]))$' "non@match@." +"^\d{9}[\d|X]$" G "<0>1234123412" +"^\d{9}[\d|X]$" G "<0>123412341X" +"^\d{9}[\d|X]$" "not an isbn" +"^\d{9}(\d|X)$" G "<0>1234123412" +"^\d{9}(\d|X)$" G "<0>123412341X" +"^\d{9}(\d|X)$" "not an isbn" +"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$" G "<0>01/01/2001" +"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$" G "<0>1/1/1999" +"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$" G "<0>10/20/2080" +"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$" "13/01/2001" +"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$" "1/1/1800" +"^(([1-9])|(0[1-9])|(1[0-2]))\/(([0-9])|([0-2][0-9])|(3[0-1]))\/(([0-9][0-9])|([1-2][0,9][0-9][0-9]))$" "10/32/2080" +"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$" G "<0>0.25" +"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$" G "<0>.75" +"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$" G "<0>123.50" +"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$" ".77" +"^\d*\.?((25)|(50)|(5)|(75)|(0)|(00))?$" "1.435" +"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$" G "<0>12345" +"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$" G "<0>932 68" +"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$" G "<0>S-621 46" +"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$" "5367" +"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$" "425611" +"^(s-|S-){0,1}[0-9]{3}\s?[0-9]{2}$" "31 545" +"^\d{5}(-\d{4})?$" G "<0>48222" +"^\d{5}(-\d{4})?$" G "<0>48222-1746" +"^\d{5}(-\d{4})?$" "4632" +"^\d{5}(-\d{4})?$" "Blake" +"^\d{5}(-\d{4})?$" "37333-32" +'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$' G "<0>test.txt" +'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$' G "<0>test.jpg.txt" +'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$' G "<0>a&b c.bmp" +'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$' "CON" +'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$' ".pdf" +'^(?!^(PRN|AUX|CLOCK\$|NUL|CON|COM\d|LPT\d|\..*)(\..+)?$)[^\x00-\x1f\\?*:\";|/]+$' "test:2.pdf" +"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$" G "<0>1'235.140" +"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$" G "<0>1'222'333.120" +"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$" G "<0>456" +"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$" "1234.500" +"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$" "78'45.123" +"^(\d{1,3}'(\d{3}')*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$" "123,0012" +"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$" G "<0>T2p 3c7" +"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$" G "<0>T3P3c7" +"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$" G "<0>T2P 3C7" +"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$" "123456" +"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$" "3C7T2P" +"^[a-zA-Z][0-9][a-zA-Z]\s?[0-9][a-zA-Z][0-9]$" "11T21RWW" +"^\$[0-9]+(\.[0-9][0-9])?$" G "<0>$1.50" +"^\$[0-9]+(\.[0-9][0-9])?$" G "<0>$49" +"^\$[0-9]+(\.[0-9][0-9])?$" G "<0>$0.50" +"^\$[0-9]+(\.[0-9][0-9])?$" "1.5" +"^\$[0-9]+(\.[0-9][0-9])?$" "$1.333" +"^\$[0-9]+(\.[0-9][0-9])?$" "this $5.12 fails" +"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b" G "<0>217.6.9.89" +"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b" G "<0>0.0.0.0" +"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b" G "<0>255.255.255.255" +"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b" "256.0.0.0" +"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b" "0978.3.3.3" +"\b((25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\.){3}(25[0-5]|2[0-4]\d|[01]\d\d|\d?\d)\b" "65.4t.54.3" +"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)" G "<0>http://www.aspemporium.com" +"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)" G "<0>mailto:dominionx@hotmail.com" +"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)" G "<0>ftp://ftp.test.com" +"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)" "www.aspemporium.com" +"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)" "dominionx@hotmail.com" +"((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)" "bloggs" +"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}" G "<0>(12) 123 1234" +"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}" G "<0>(01512) 123 1234" +"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}" G "<0>(0xx12) 1234 1234" +"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}" "12 123 1234" +"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}" "(012) 123/1234" +"\(([0-9]{2}|0{1}((x|[0-9]){2}[0-9]{2}))\)\s*[0-9]{3,4}[- ]*[0-9]{4}" "(012) 123 12345" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" G "<0>bob-smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" G "<0>bob.smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" G "<0>bob_smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" "-smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" ".smith@foo.com" +"^\w+[\w-\.]*\@\w+((-\w+)|(\w*))\.[a-z]{2,3}$" "smith@foo_com" +"^(?=.*\d).{4,8}$" G "<0>1234" +"^(?=.*\d).{4,8}$" G "<0>asdf1234" +"^(?=.*\d).{4,8}$" G "<0>asp123" +"^(?=.*\d).{4,8}$" "asdf" +"^(?=.*\d).{4,8}$" "asdf12345" +"^(?=.*\d).{4,8}$" "password" +"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}" G "<0>user name" +"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}" G "<0>user#name" +"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}" G "<0>....." +"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}" "User_Name1" +"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}" "username@foo.com" +"[^A-Za-z0-9_@\.]|@{2,}|\.{5,}" "user.name@mail.foo.com" +"^100$|^[0-9]{1,2}$|^[0-9]{1,2}\,[0-9]{1,3}$" G "<0>12,654" +"^100$|^[0-9]{1,2}$|^[0-9]{1,2}\,[0-9]{1,3}$" G "<0>1,987" +"^100$|^[0-9]{1,2}$|^[0-9]{1,2}\,[0-9]{1,3}$" "128,2" +"^100$|^[0-9]{1,2}$|^[0-9]{1,2}\,[0-9]{1,3}$" "12," +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*[^\.\,\)\(\s]$" G "<0>https://www.restrictd.com/~myhome/" +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*[^\.\,\)\(\s]$" "http://www.krumedia.com." +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*[^\.\,\)\(\s]$" "(http://www.krumedia.com)" +"^(http|https|ftp)\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(:[a-zA-Z0-9]*)?/?([a-zA-Z0-9\-\._\?\,\'/\\\+\&%\$#\=~])*[^\.\,\)\(\s]$" "http://www.krumedia.com," +"(\d{1,3},(\d{3},)*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$" G "<0>2&651.50" +"(\d{1,3},(\d{3},)*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$" G "<0>987.895" +"(\d{1,3},(\d{3},)*\d{3}(\.\d{1,3})?|\d{1,3}(\.\d{3})?)$" "25$%787*" +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$" G "<0>$1,456,983.00" +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$" G "<0>$1,700.07" +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$" G "<0>$68,944.23" +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$" "$20,86.93" +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$" "$1098.84" +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9]?)?$" "$150." +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9])?$" G "<0>$28,009,987.88" +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9])?$" G "<0>$23,099.05" +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9])?$" G "<0>$.88" +"\$[0-9]?[0-9]?[0-9]?((\,[0-9][0-9][0-9])*)?(\.[0-9][0-9])?$" "$234,5.99" +"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$" G "<0>29/02/2004 20:15:27" +"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$" G "<0>29/2/04 8:9:5" +"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$" G "<0>31/3/2004 9:20:17" +"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$" "29/02/2003 20:15:15" +"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$" "2/29/04 20:15:15" +"^((((31\/(0?[13578]|1[02]))|((29|30)\/(0?[1,3-9]|1[0-2])))\/(1[6-9]|[2-9]\d)?\d{2})|(29\/0?2\/(((1[6-9]|[2-9]\d)?(0[48]|[2468][048]|[13579][26])|((16|[2468][048]|[3579][26])00))))|(0?[1-9]|1\d|2[0-8])\/((0?[1-9])|(1[0-2]))\/((1[6-9]|[2-9]\d)?\d{2})) (20|21|22|23|[0-1]?\d):[0-5]?\d:[0-5]?\d$" "31/3/4 9:20:17" +"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$" G "<0>something@someserver.com" +"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$" G "<0>firstname.lastname@mailserver.domain.com" +"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$" G "<0>username-something@some-server.nl" +"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$" "username@someserver.domain.c" +"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$" "somename@server.domain-com" +"^([a-zA-Z0-9_\-\.]+)@([a-zA-Z0-9_\-\.]+)\.([a-zA-Z]{2,5})$" "someone@something.se_eo" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)" G "<0>8am" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)" G "<0>8 am" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)" G "<0>8:00 am" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)" "8a" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)" "8 a" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)|(^([0-9]|[1][0-9]|[2][0-3])(\s{0,1})(AM|PM|am|pm|aM|Am|pM|Pm{2,2})$)" "8:00 a" +"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$" G "<0>55(21)123-4567" +"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$" G "<0>(11)1234-5678" +"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$" G "<0>55(71)4562-2234" +"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$" "3434-3432" +"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$" "4(23)232-3232" +"^([0-9]{2})?(\([0-9]{2})\)([0-9]{3}|[0-9]{4})-[0-9]{4}$" "55(2)232-232" +"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$" G "<0>1:01 AM" +"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$" G "<0>23:52:01" +"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$" G "<0>03.24.36 AM" +"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$" "19:31 AM" +"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$" "9:9 PM" +"^((([0]?[1-9]|1[0-2])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)[0-5][0-9]((:|\.)[0-5][0-9])?))$" "25:60:61" +"^\d{0,2}(\.\d{1,2})?$" G "<0>99.99" +"^\d{0,2}(\.\d{1,2})?$" G "<0>99" +"^\d{0,2}(\.\d{1,2})?$" G "<0>.99" +"^\d{0,2}(\.\d{1,2})?$" "999.999" +"^\d{0,2}(\.\d{1,2})?$" "999" +"^\d{0,2}(\.\d{1,2})?$" ".999" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$" G "<0>1agdA*$#" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$" G "<0>1agdA*$#" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$" G "<0>1agdA*$#" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$" "wyrn%@*&$# f" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$" "mbndkfh782" +"^(?=.*\d)(?=.*[a-z])(?=.*[A-Z])(?!.*\s).{4,8}$" "BNfhjdhfjd&*)%#$)" +"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$" G "<0>freshmeat.net" +"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$" G "<0>123.com" +"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$" G "<0>TempLate-toolkKt.orG" +"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$" "-dog.com" +"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$" "?boy.net" +"^([a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]\.)+([a-zA-Z0-9]{3,5})$" "this.domain" +"^[^']*$" G "<0>asljas" +"^[^']*$" G "<0>%/&89uhuhadjkh" +"^[^']*$" G '<0>"hi there!"' +"^[^']*$" "'hi there!'" +"^[^']*$" "It's 9 o'clock" +"^[^']*$" "'''''" +"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$" G "<0>((24,((1,2,3),(3,4,5))))" +"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$" G "<0>((1,((2,3,4),(4,5,6),(96,34,26))),(12,((1,3,4),(4,5,6),(7,8,9))))" +"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$" G "<0>()" +"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$" "(24,((1,2,3),(3,4,5)))" +"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$" "( )" +"(^\(\)$|^\(((\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\),)*(\([0-9]+,(\((\([0-9]+,[0-9]+,[0-9]+\),)*(\([0-9]+,[0-9]+,[0-9]+\)){1}\))+\)){1}\)))$" "((23,(12,3,4),(4,5,6)))" +"^[a-zA-Z0-9\s .\-_']+$" G "<0>dony d'gsa" +"^[a-zA-Z0-9\s .\-_']+$" "^[a-zA-Z0-9\s.\-_']+$" +"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$" G "<0>example@example.com" +"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$" G "<0>foo@bar.info" +"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$" G "<0>blah@127.0.0.1" +"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$" "broken@@example.com" +"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$" "foo@bar.infp" +"^[_a-zA-Z0-9-]+(\.[_a-zA-Z0-9-]+)*@[a-zA-Z0-9-]+(\.[a-zA-Z0-9-]+)*\.(([0-9]{1,3})|([a-zA-Z]{2,3})|(aero|coop|info|museum|name))$" "blah@.nospam.biz" +"^\d{5}(-\d{3})?$" G "<0>13165-000" +"^\d{5}(-\d{3})?$" G "<0>38175-000" +"^\d{5}(-\d{3})?$" G "<0>81470-276" +"^\d{5}(-\d{3})?$" "13165-00" +"^\d{5}(-\d{3})?$" "38175-abc" +"^\d{5}(-\d{3})?$" "81470-2763" +"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$" G "<0>$0.84" +"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$" G "<0>$123458" +"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$" G "<0>$1,234,567.89" +"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$" "$12,3456.01" +"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$" "12345" +"^\$(\d{1,3}(\,\d{3})*|(\d+))(\.\d{2})?$" "$1.234" +"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})" G "<0>C:\\temp\\this allows spaces\\web.config" +"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})" G "<0>\\\\Andromeda\\share\\file name.123" +"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})" "tz:\temp\ fi*le?na:m.doc" +"([A-Z]:\\[^/:\*\?<>\|]+\.\w{2,6})|(\\{2}[^/:\*\?<>\|]+\.\w{2,6})" "\\Andromeda\share\filename.a" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)" G "<0>10:35" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)" G "<0>9:20" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)" G "<0>23" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)" "24:00" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)" "20 PM" +"(^([0-9]|[0-1][0-9]|[2][0-3]):([0-5][0-9])$)|(^([0-9]|[1][0-9]|[2][0-3])$)" "20:15 PM" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$" G "<0>$3,023,123.34" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$" G "<0>9,876,453" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$" G "<0>123456.78" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$" "4,33,234.34" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$" "$1.234" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(\.[0-9][0-9])?$" "abc" +"^\$?\d+(\.(\d{2}))?$" G "<0>$2.43" +"^\$?\d+(\.(\d{2}))?$" G "<0>2.02" +"^\$?\d+(\.(\d{2}))?$" G "<0>$2112" +"^\$?\d+(\.(\d{2}))?$" "2.1" +"^\$?\d+(\.(\d{2}))?$" "$.14" +"^\$?\d+(\.(\d{2}))?$" "$2,222.12" +/("[^"]*")|('[^\r]*)(\r\n)?/ G '<0>"my string"' +/("[^"]*")|('[^\r]*)(\r\n)?/ G '<0>"a string with \u0027 in it"' +/("[^"]*")|('[^\r]*)(\r\n)?/ G "<0>' comment" +/("[^"]*")|('[^\r]*)(\r\n)?/ /asd "/ +"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$" G "<0>BFDB4D31-3E35-4DAB-AFCA-5E6E5C8F61EA" +"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$" G "<0>BFDB4d31-3e35-4dab-afca-5e6e5c8f61ea" +"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$" "qqqBFDB4D31-3E35-4DAB-AFCA-5E6E5C8F61EA" +"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$" "BFDB4D31-3E-4DAB-AFCA-5E6E5C8F61EA" +"^[A-Za-z0-9]{8}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{4}-[A-Za-z0-9]{12}$" "BFDB4D31-3E35-4DAB-AF" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" G "<0>12.345-678" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" G "<0>23.345-123" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" G "<0>99.999" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" "41222-222" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" "3.444-233" +"^\d{2}(\x2e)(\d{3})(-\d{3})?$" "43.324444" +"^\d{2}(\u002e)(\d{3})(-\d{3})?$" G "<0>12.345-678" +"^\d{2}(\u002e)(\d{3})(-\d{3})?$" G "<0>23.345-123" +"^\d{2}(\u002e)(\d{3})(-\d{3})?$" G "<0>99.999" +"^\d{2}(\u002e)(\d{3})(-\d{3})?$" "41222-222" +"^\d{2}(\u002e)(\d{3})(-\d{3})?$" "3.444-233" +"^\d{2}(\u002e)(\d{3})(-\d{3})?$" "43.324444" +#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" G "<0>c:\file.txt" # TODO: debug +#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" G "<0>c:\folder\sub folder\file.txt" # TODO: debug +#"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" G "<0>\\network\folder\file.txt" # TODO: debug +"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" "C:" +"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" "C:\file.xls" +"^(([a-zA-Z]:)|(\\{2}\w+)\$?)(\\(\w[\w ]*))+\.(txt|TXT)$" "folder.txt" +"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>my.domain.com" +"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>regexlib.com" +"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>big-reg.com" +"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" ".mydomain.com" +"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "regexlib.comm" +"^[a-zA-Z0-9]+([a-zA-Z0-9\-\.]+)?\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "-bigreg.com" +"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$" G "<0>0001-12-31" +"^\d{4}[\-\/\s ]?((((0[13578])|(1[02]))[\-\/\s ]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s ]?(([0-2][0-9])|(30)))|(02[\-\/\s ]?[0-2][0-9]))$" G "<0>9999 09 30" +"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$" G "<0>2002/03/03" +"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$" "0001\\02\\30" +"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$" "9999.15.01" +"^\d{4}[\-\/\s]?((((0[13578])|(1[02]))[\-\/\s]?(([0-2][0-9])|(3[01])))|(((0[469])|(11))[\-\/\s]?(([0-2][0-9])|(30)))|(02[\-\/\s]?[0-2][0-9]))$" "2002/3/3" +"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$" G "<0>http://psychopop.org" +"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$" G "<0>http://www.edsroom.com/newUser.asp" +"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$" G "<0>http://unpleasant.jarrin.net/markov/inde" +"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$" "ftp://psychopop.org" +"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$" "http://www.edsroom/" +"^http\://[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(/\S*)?$" "http://un/pleasant.jarrin.net/markov/index.asp" +"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$" G "<0>1145" +"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$" G "<0>933" +"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$" G "<0> 801" +"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$" "0000" +"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$" "1330" +"^( [1-9]|[1-9]|0[1-9]|10|11|12)[0-5]\d$" "8:30" +"^\d{1,2}\/\d{2,4}$" G "<0>9/02" +"^\d{1,2}\/\d{2,4}$" G "<0>09/2002" +"^\d{1,2}\/\d{2,4}$" G "<0>09/02" +"^\d{1,2}\/\d{2,4}$" "Fall 2002" +"^\d{1,2}\/\d{2,4}$" "Sept 2002" +"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$" G "<0>01/01/2001" +"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$" G "<0>02/30/2001" +"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$" G "<0>12/31/2002" +"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$" "1/1/02" +"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$" "1/1/2002" +"^(|(0[1-9])|(1[0-2]))\/((0[1-9])|(1\d)|(2\d)|(3[0-1]))\/((\d{4}))$" "1/25/2002" +"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$" G "<0>15615552323" +"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$" G "<0>1-561-555-1212" +"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$" G "<0>5613333" +"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$" "1-555-5555" +"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$" "15553333" +"^(1?(-?\d{3})-?)?(\d{3})(-?\d{4})$" "0-561-555-1212" +'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>' G '<0>' +'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>' G '<0>" # TODO: \w in pattern +'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>' '' # TODO: \w in pattern +'<[^>]*name[\s]*=[\s]*"?[^\w_]*"?[^>]*>' "The dirty brown fox stank like" +"^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$" G "<0>1:00 AM" +"^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$" G "<0>12:00 PM" +"^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$" G "<0>1:00am" +"^(1|01|2|02|3|03|4|04|5|05|6|06|7|07|8|08|9|09|10|11|12{1,2}):(([0-5]{1}[0-9]{1}\s{0,1})([AM|PM|am|pm]{2,2}))\W{0}$" "24:00" +"^\d*$" G "<0>123" +"^\d*$" G "<0>000" +"^\d*$" G "<0>43" +"^\d*$" "asbc" +"^\d*$" "-34" +"^\d*$" "3.1415" +"^[-+]?\d*$" G "<0>123" +"^[-+]?\d*$" G "<0>-123" +"^[-+]?\d*$" G "<0>+123" +"^[-+]?\d*$" "abc" +"^[-+]?\d*$" "3.14159" +"^[-+]?\d*$" "-3.14159" +"^\d*\.?\d*$" G "<0>123" +"^\d*\.?\d*$" G "<0>3.14159" +"^\d*\.?\d*$" G "<0>.234" +"^\d*\.?\d*$" "abc" +"^\d*\.?\d*$" "-3.14159" +"^\d*\.?\d*$" "3.4.2" +"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$" G "<0>44240" +"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$" G "<0>44240-5555" +"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$" G "<0>T2P 3C7" +"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$" "44240ddd" +"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$" "t44240-55" +"^((\d{5}-\d{4})|(\d{5})|([A-Z]\d[A-Z]\s\d[A-Z]\d))$" "t2p3c7" +"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$" G "<0>(910)456-7890" +"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$" G "<0>(910)456-8970 x12" +"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$" G "<0>(910)456-8970 1211" +"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$" "(910) 156-7890" +"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$" "(910) 056-7890" +"^[\\(]{0,1}([0-9]){3}[\\)]{0,1}[ ]?([^0-1]){1}([0-9]){2}[ ]?[-]?[ ]?([0-9]){4}[ ]*((x){0,1}([0-9]){1,5}){0,1}$" "(910) 556-7890 x" +"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$" G "<0>31.01.2002" +"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$" G "<0>29.2.2004" +"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$" G "<0>09.02.2005" +"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$" "31.11.2002" +"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$" "29.2.2002" +"^((0?[1-9]|[12][1-9]|3[01])\.(0?[13578]|1[02])\.20[0-9]{2}|(0?[1-9]|[12][1-9]|30)\.(0?[13456789]|1[012])\.20[0-9]{2}|(0?[1-9]|1[1-9]|2[0-8])\.(0?[123456789]|1[012])\.20[0-9]{2}|(0?[1-9]|[12][1-9])\.(0?[123456789]|1[012])\.20(00|04|08|12|16|20|24|28|32|36|40|44|48|52|56|60|64|68|72|76|80|84|88|92|96))$" "33.06.2000" +"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$" G "<0>12/31/2003" +"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$" G "<0>01/01/1900" +"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$" G "<0>11/31/2002" +"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$" "1/1/2002" +"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$" "01/01/02" +"^(0[1-9]|1[0-2])\/((0[1-9]|2\d)|3[0-1])\/(19\d\d|200[0-3])$" "01/01/2004" +"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$" G "<0>3/3/2003" +"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$" G "<0>3/3/2002 3:33 pm" +"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$" G "<0>3/3/2003 3:33:33 am" +"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$" "13/1/2002" +"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$" "3/3/2002 3:33" +"^((((([13578])|(1[0-2]))[\-\/\s]?(([1-9])|([1-2][0-9])|(3[01])))|((([469])|(11))[\-\/\s]?(([1-9])|([1-2][0-9])|(30)))|(2[\-\/\s]?(([1-9])|([1-2][0-9]))))[\-\/\s]?\d{4})(\s((([1-9])|(1[02]))\:([0-5][0-9])((\s)|(\:([0-5][0-9])\s))([AM|PM|am|pm]{2,2})))?$" "31/3/2002" +"([a-zA-Z]:(\\w+)*\\[a-zA-Z0_9]+)?.xls" G "<0>E:\DyAGT\SD01A_specV2.xls" +"([a-zA-Z]:(\\w+)*\\[a-zA-Z0_9]+)?.xls" "E:\DyAGT\SD01A_specV2.txt" +"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))" G "<0>02/29/2084" +"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))" G "<0>01/31/2000" +"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))" G "<0>11/30/2000" +"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))" "02/29/2083" +"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))" "11/31/2000" +"(((0[13578]|10|12)([-./])(0[1-9]|[12][0-9]|3[01])([-./])(\d{4}))|((0[469]|11)([-./])([0][1-9]|[12][0-9]|30)([-./])(\d{4}))|((2)([-./])(0[1-9]|1[0-9]|2[0-8])([-./])(\d{4}))|((2)(\.|-|\/)(29)([-./])([02468][048]00))|((2)([-./])(29)([-./])([13579][26]00))|((2)([-./])(29)([-./])([0-9][0-9][0][48]))|((2)([-./])(29)([-./])([0-9][0-9][2468][048]))|((2)([-./])(29)([-./])([0-9][0-9][13579][26])))" "01/32/2000" +"^[a-zA-Z0-9\s .\-]+$" G "<0>2222 Mock St." # TODO: \s in patterns not implemented +"^[a-zA-Z0-9\s .\-]+$" G "<0>1 A St." +"^[a-zA-Z0-9\s .\-]+$" G "<0>555-1212" +"^[a-zA-Z0-9\s.\-]+$" "[A Street]" +"^[a-zA-Z0-9\s.\-]+$" "(3 A St.)" +"^[a-zA-Z0-9\s.\-]+$" "{34 C Ave.}" +"^[a-zA-Z0-9\s.\-]+$" "Last.*?(\d+.?\d*)" +"^[a-zA-Z0-9\s .\-]+$" G " Last1-(123)-123-1234" +"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$" G "<0>123 123 1234" +"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$" G "<0>1-800-ALPHNUM" +"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$" "1.123.123.1234" +"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$" "(123)-1234-123" +"^([0-9]( |-)?)?(\(?[0-9]{3}\)?|[0-9]{3})( |-)?([0-9]{3}( |-)?[0-9]{4}|[a-zA-Z0-9]{7})$" "123-1234" +"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$" G "<0>02:04" +"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$" G "<0>16:56" +"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$" G "<0>23:59" +"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$" "02:00 PM" +"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$" "PM2:00" +"^([0-1][0-9]|[2][0-3]):([0-5][0-9])$" "24:00" +"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$" G "<0>01/01/1990" +"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$" G "<0>12/12/9999" +"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$" G "<0>3/28/2001" +"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$" "3-8-01" +"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$" "13/32/1001" +"^[0,1]?\d{1}\/(([0-2]?\d{1})|([3][0,1]{1}))\/(([1]{1}[9]{1}[9]{1}\d{1})|([2-9]{1}\d{3}))$" "03/32/1989" +"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})" G "<0>1.2123644567" +"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})" G "<0>0-234.567/8912" +"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})" G "<0>1-(212)-123 4567" +"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})" "0-212364345" +"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})" "1212-364,4321" +"((\(\d{3}\)?)|(\d{3}))([\s \-./]?)(\d{3})([\s \-./]?)(\d{4})" "0212\345/6789" +"^([0-9]{6}[\s \-]{1}[0-9]{12}|[0-9]{18})$" G "<0>000000 000000000000" +"^([0-9]{6}[\s \-]{1}[0-9]{12}|[0-9]{18})$" G "<0>000000-000000000000" +"^([0-9]{6}[\s \-]{1}[0-9]{12}|[0-9]{18})$" G "<0>000000000000000000" +"^([0-9]{6}[\s \-]{1}[0-9]{12}|[0-9]{18})$" "000000_000000000000" +"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$" G "<0>01/01/2001" +"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$" G "<0>1/1/2001" +"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$" G "<0>01/1/01" +"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$" "13/01/2001" +"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$" "1/2/100" +"^(([1-9])|(0[1-9])|(1[0-2]))\/((0[1-9])|([1-31]))\/((\d{2})|(\d{4}))$" "09/32/2001" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$" G "<0>$3,023,123.34" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$" G "<0>9,876,453" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$" G "<0>123456.78" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$" "4,33,234.34" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$" "$1.234" +"^\$?([0-9]{1,3},([0-9]{3},)*[0-9]{3}|[0-9]+)(.[0-9][0-9])?$" "abc" +"^\d{5}$|^\d{5}-\d{4}$" G "<0>55555-5555" +"^\d{5}$|^\d{5}-\d{4}$" G "<0>34564-3342" +"^\d{5}$|^\d{5}-\d{4}$" G "<0>90210" +"^\d{5}$|^\d{5}-\d{4}$" "434454444" +"^\d{5}$|^\d{5}-\d{4}$" "645-32-2345" +"^\d{5}$|^\d{5}-\d{4}$" "abc" +"^\d{3}-\d{2}-\d{4}$" G "<0>333-22-4444" +"^\d{3}-\d{2}-\d{4}$" G "<0>123-45-6789" +"^\d{3}-\d{2}-\d{4}$" "123456789" +"^\d{3}-\d{2}-\d{4}$" "SSN" +"^[2-9]\d{2}-\d{3}-\d{4}$" G "<0>800-555-5555" +"^[2-9]\d{2}-\d{3}-\d{4}$" G "<0>333-444-5555" +"^[2-9]\d{2}-\d{3}-\d{4}$" G "<0>212-666-1234" +"^[2-9]\d{2}-\d{3}-\d{4}$" "000-000-0000" +"^[2-9]\d{2}-\d{3}-\d{4}$" "123-456-7890" +"^[2-9]\d{2}-\d{3}-\d{4}$" "2126661234" +"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$" G "<0>44240" +"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$" G "<0>44240-5555" +"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$" G "<0>G3H 6A3" +"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$" "Ohio" +"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$" "abc" +"^\d{5}-\d{4}|\d{5}|[A-Z]\d[A-Z] \d[A-Z]\d$" "g3h6a3" +"[0-9]{4}\s*[a-zA-Z]{2}" G "<0>1054 WD" +"[0-9]{4}\s*[a-zA-Z]{2}" G "<0>1054WD" +"[0-9]{4}\s*[a-zA-Z]{2}" G "<0>1054 wd" +"[0-9]{4}\s*[a-zA-Z]{2}" "10543" +"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)" G "<0>0732105432" +"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)" G "<0>1300333444" +"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)" G "<0>131313" +"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)" "32105432" +"(^1300\d{6}$)|(^1800|1900|1902\d{6}$)|(^0[2|3|7|8]{1}[0-9]{8}$)|(^13\d{4}$)|(^04\d{2,3}\d{6}$)" "13000456" +"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$" G "<0>http://207.68.172.254/home.ashx" +"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$" G "<0>ftp://ftp.netscape.com/" +"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$" G "<0>https://www.brinkster.com/login.asp" +"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$" "htp://mistake.com/" +"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$" "http://www_address.com/" +"^((https?|ftp)\://((\[?(\d{1,3}\.){3}\d{1,3}\]?)|(([\-a-zA-Z0-9]+\.)+[a-zA-Z]{2,4}))(\:\d+)?(/[\-a-zA-Z0-9._?,'+\&%$#=~\\]+)*/?)$" "ftp://www.files.com/file with spaces.txt" +"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})" G "<0>2002-11-03" +"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})" G "<0>2007-17-08" +"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})" G "<0>9999-99-99" +"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})" "2002/17/18" +"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})" "2002.18.45" +"([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})" "18.45.2002" +"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$" G "<0>$0,234.50" +"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$" G "<0>0234.5" +"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$" G "<0>0,234." +"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$" "$1,23,50" +"^\$?(\d{1,3}(\,\d{3})*|(\d+))(\.\d{0,2})?$" "$123.123" +"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})" G "<0>12.345-678" +"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})" G "<0>12345-678" +"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})" G "<0>12345678" +"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})" "12.345678" +"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})" "12345-1" +"(^\d{5}-\d{3}|^\d{2}.\d{3}-\d{3}|\d{8})" "123" +'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$' G "<0>x:\\test\\testing.htm" +'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$' G "<0>x:\\test\\test#$ ing.html" +'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$' G "<0>\\\\test\testing.html" +'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$' "x:\test\test/ing.htm" +'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$' "x:\test\test*.htm" +'^([a-zA-Z]\:|\\)\\([^\\]+\\)*[^\/:*?"<>|]+\.htm(l)?$' "\\test?<.htm" +"^[1-9]{1}[0-9]{3}$" G "<0>1234" +"^[1-9]{1}[0-9]{3}$" "123" +"^[1-9]{1}[0-9]{3}$" "123A" +"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$" G "<0>A-1234" +"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$" G "<0>A 1234" +"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$" G "<0>A1234" +"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$" "AA-1234" +"^[A-Z]{1}( |-)?[1-9]{1}[0-9]{3}$" "A12345" +"^(F-)?[0-9]{5}$" G "<0>12345" +"^(F-)?[0-9]{5}$" G "<0>F-12345" +"^(F-)?[0-9]{5}$" "F12345" +"^(F-)?[0-9]{5}$" "F-123456" +"^(F-)?[0-9]{5}$" "123456" +"^(V-|I-)?[0-9]{4}$" G "<0>1234" +"^(V-|I-)?[0-9]{4}$" G "<0>V-1234" +"^(V-|I-)?[0-9]{4}$" "12345" +"^[1-9]{1}[0-9]{3} ?[A-Z]{2}$" G "<0>1234 AB" +"^[1-9]{1}[0-9]{3} ?[A-Z]{2}$" G "<0>1234AB" +"^[1-9]{1}[0-9]{3} ?[A-Z]{2}$" "123AB" +"^[1-9]{1}[0-9]{3} ?[A-Z]{2}$" "1234AAA" +"^([1-9]{2}|[0-9][1-9]|[1-9][0-9])[0-9]{3}$" G "<0>12345" +"^([1-9]{2}|[0-9][1-9]|[1-9][0-9])[0-9]{3}$" G "<0>10234" +"^([1-9]{2}|[0-9][1-9]|[1-9][0-9])[0-9]{3}$" G "<0>01234" +"^([1-9]{2}|[0-9][1-9]|[1-9][0-9])[0-9]{3}$" "00123" +"^(/w|/W|[^<>+?$%\{}\&])+$" G "<0>John Doe Sr." +"^(/w|/W|[^<>+?$%\{}\&])+$" G "<0>100 Elm St., Suite 25" +"^(/w|/W|[^<>+?$%\{}\&])+$" G "<0>Valerie's Gift Shop" +"^(/w|/W|[^<>+?$%\{}\&])+$" "

    Hey

    " +/<[a-zA-Z][^>]*\son\w+=(\w+|'[^']*'|"[^"]*")[^>]*>/ G '<0>' +/<[a-zA-Z][^>]*\son\w+=(\w+|'[^']*'|"[^"]*")[^>]*>/ '' +"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$" G "<0>1" +"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$" G "<0>12345.123" +"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$" G "<0>0.5" +"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$" "0" +"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$" "0.0" +"(?!^0*$)(?!^0*\.0*$)^\d{1,5}(\.\d{1,3})?$" "123456.1234" +"^.+@[^\.].*\.[a-z]{2,}$" G "<0>whatever@somewhere.museum" +"^.+@[^\.].*\.[a-z]{2,}$" G "<0>foreignchars@myforeigncharsdomain.nu" +"^.+@[^\.].*\.[a-z]{2,}$" G "<0>me+mysomething@mydomain.com" +"^.+@[^\.].*\.[a-z]{2,}$" "a@b.c" +"^.+@[^\.].*\.[a-z]{2,}$" "me@.my.com" +"^.+@[^\.].*\.[a-z]{2,}$" "a@b.comFOREIGNCHAR" +"^(\d{5}-\d{4}|\d{5})$" G "<0>12345" +"^(\d{5}-\d{4}|\d{5})$" G "<0>12345-1234" +"^(\d{5}-\d{4}|\d{5})$" "12345-12345" +"^(\d{5}-\d{4}|\d{5})$" "123" +"^(\d{5}-\d{4}|\d{5})$" "12345-abcd" +"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$" G "<0>0.0.0.0" +"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$" G "<0>255.255.255.02" +"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$" G "<0>192.168.0.136" +"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$" "256.1.3.4" +"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$" "023.44.33.22" +"^(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])\.(\d{1,2}|1\d\d|2[0-4]\d|25[0-5])$" "10.57.98.23." +"]*[^/])>" G '<0>' +"]*[^/])>" '' +"" G "<0>" +"" G "<0>" +"" "this is a comment" +"" G "<0>" +"" G "<0>" +"" "this is a comment" +/<\u002f?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*>/ G "<0>" +/<\u002f?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*>/ G '<0>' +/<\u002f?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*>/ G "<0>" +/<\u002f?(\w+)(\s+\w+=(\w+|"[^"]*"|'[^']*'))*>/ "No Tag Here ..." +"(\{\\f\d*)\\([^;]+;)" G "<0>{\\f0\\Some Font names here;" +"(\{\\f\d*)\\([^;]+;)" G "<0>{\\f1\\fswiss\\fcharset0\\fprq2{\\*\\panose 020b0604020202020204}Arial;" +"(\{\\f\d*)\\([^;]+;)" G "{\\f" +"(\{\\f\d*)\\([^;]+;)" "{f0fs20 some text}" +#"" G '<0>space' # TODO: Can't quote this pattern with the test syntax! +#"" "this is not a tag" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$" G "<0>12/30/2002" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$" G "<0>01/12/1998 13:30" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$" G "<0>01/28/2002 22:35:00" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$" "13/30/2002" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$" "01/12/1998 24:30" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0-1]\d|[2][0-3])(\:[0-5]\d){1,2})?$" "01/28/2002 22:35:64" +#"((?(^[A-Z0-9-;=]*:))(?(.*)))" G "<0>BEGIN:" #named capture +#"((?(^[A-Z0-9-;=]*:))(?(.*)))" G "<0>TEL;WORK;VOICE:" #named capture +#"((?(^[A-Z0-9-;=]*:))(?(.*)))" G "<0>TEL:" #named capture +#"((?(^[A-Z0-9-;=]*:))(?(.*)))" "begin:" #named capture +#"((?(^[A-Z0-9-;=]*:))(?(.*)))" "TEL;PREF;" #named capture +'^]*)>(.*?(?=<\/a>))<\/a>$' G '<0>my external link' +'^]*)>(.*?(?=<\/a>))<\/a>$' G ']*)>(.*?(?=<\/a>))<\/a>$' 'my internal link' +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$" G "<0>12/31/2002" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$" G "<0>12/31/2002 08:00" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$" G "<0>12/31/2002 08:00 AM" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$" "12/31/02" +"^([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\/([2][01]|[1][6-9])\d{2}(\s([0]\d|[1][0-2])(\:[0-5]\d){1,2})*\s*([aApP][mM]{0,2})?$" "12/31/2002 14:00" +"
    (?:\s*([^<]+)
    \s*)+
    " G "<0>
    string1
    string2
    string3
    " +"
    (?:\s*([^<]+)
    \s*)+
    " ".." +"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$" G "<0>1/2/03" +"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$" G "<0>2/30/1999" +"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$" G "<0>03/04/19" +"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$" "3/4/2020" +"^((0?[13578]|10|12)(-|\/)((0[0-9])|([12])([0-9]?)|(3[01]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1}))|(0?[2469]|11)(-|\/)((0[0-9])|([12])([0-9]?)|(3[0]?))(-|\/)((19)([2-9])(\d{1})|(20)([01])(\d{1})|([8901])(\d{1})))$" "3/4/1919" +']*))*|/?>' G '<0>' +']*))*|/?>' G "<0>" +']*))*|/?>' G "<0>
    " +']*))*|/?>' "this is a test..." +"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$" G "<0>12:00am" +"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$" G "<0>1:00 PM" +"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$" G "<0> 12:59 pm" +"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$" "0:00" +"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$" "0:01 am" +"^ *(1[0-2]|[1-9]):[0-5][0-9] *(a|p|A|P)(m|M) *$" "13:00 pm" +"\({1}[0-9]{3}\){1}\-{1}[0-9]{3}\-{1}[0-9]{4}" G "<0>(111)-111-1111" +"\({1}[0-9]{3}\){1}\-{1}[0-9]{3}\-{1}[0-9]{4}" "11111111111" +"[^abc]" G "<0>def" +"[^abc]" "abc" +"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$" G "<0>01/01/2002 04:42" +"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$" G "<0>5-12-02 04:42 AM" +"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$" G "<0>01.01/02 04-42aM" +"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$" "01-12-1999 4:50PM" +"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$" "01-12-2002 15:10PM" +"^(([0]?[1-9]|[1][0-2])[\/|\-|\.]([0-2]\d|[3][0-1]|[1-9])[\/|\-|\.]([2][0])?\d{2}\s+((([0][0-9]|[1][0-2]|[0-9])[\:|\-|\.]([0-5]\d)\s*([aApP][mM])?)|(([0-1][0-9]|[2][0-3]|[0-9])[\:|\-|\.]([0-5]\d))))$" "01-12-002 8:20PM" +"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$" G "<0>11-02-02" +"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$" G "<0>1-25-2002" +"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$" G "<0>01/25/2002" +"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$" "13-02-02" +"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$" "11.02.02" +"^([1][12]|[0]?[1-9])[\/-]([3][01]|[12]\d|[0]?[1-9])[\/-](\d{4}|\d{2})$" "11/32/2002" +"(([0-1][0-9])|([2][0-3])):([0-5][0-9]):([0-5][0-9])" G "<0>09:30:00" +"(([0-1][0-9])|([2][0-3])):([0-5][0-9]):([0-5][0-9])" G "<0>17:45:20" +"(([0-1][0-9])|([2][0-3])):([0-5][0-9]):([0-5][0-9])" G "<0>23:59:59" +"(([0-1][0-9])|([2][0-3])):([0-5][0-9]):([0-5][0-9])" "24:00:00" +"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))" G "<0>29/02/2000" +"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))" G "<0>31/01/2000" +"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))" G "<0>30-01-2000" +"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))" "29/02/2002" +"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))" "32/01/2002" +"(((0[1-9]|[12][0-9]|3[01])([-./])(0[13578]|10|12)([-./])(\d{4}))|(([0][1-9]|[12][0-9]|30)([-./])(0[469]|11)([-./])(\d{4}))|((0[1-9]|1[0-9]|2[0-8])([-./])(02)([-./])(\d{4}))|((29)(\.|-|\/)(02)([-./])([02468][048]00))|((29)([-./])(02)([-./])([13579][26]00))|((29)([-./])(02)([-./])([0-9][0-9][0][48]))|((29)([-./])(02)([-./])([0-9][0-9][2468][048]))|((29)([-./])(02)([-./])([0-9][0-9][13579][26])))" "10/2/2002" +"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$" G "<0>01 46 70 89 12" +"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$" G "<0>01-46-70-89-12" +"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$" G "<0>0146708912" +"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$" "01-46708912" +"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$" "01 46708912" +"^0[1-6]{1}(([0-9]{2}){4})|((\s[0-9]{2}){4})|((-[0-9]{2}){4})$" "+33235256677" +"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$" G "<0>good.gif" +"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$" G "<0>go d.GIf" +"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$" G "<0>goo_d.jPg" +"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$" "junk" +"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$" "bad.bad.gif" +"^[0-9A-Za-z_ ]+(.[jJ][pP][gG]|.[gG][iI][fF])$" "slash\gif." +"<[^>\s]*\bauthor\b[^>]*>" G '<0>' +"<[^>\s]*\bauthor\b[^>]*>" G "<0>" +# "<[^>\s]*\bauthor\b[^>]*>" G '<0>' #Debug should work +"<[^> ]*\bauthor\b[^>]*>" G "<0>" +"<[^> ]*\bauthor\b[^>]*>" G '<0>' +"<[^>\s]*\bauthor\b[^>]*>" "" +"<[^>\s]*\bauthor\b[^>]*>" "" +"<[^>\s]*\bauthor\b[^>]*>" "author" +"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$" G "<0>04/2/29" +"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$" G "<0>2002-4-30" +"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$" G "<0>02.10.31" +"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$" "2003/2/29" +"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$" "02.4.31" +"^(?:(?:(?:(?:1[6-9]|[2-9]\d)?(?:0[48]|[2468][048]|[13579][26])|(?:(?:16|[2468][048]|[3579][26])00)))(\/|-|\.)(?:0?2\1(?:29))$)|(?:(?:1[6-9]|[2-9]\d)?\d{2})(\/|-|\.)(?:(?:(?:0?[13578]|1[02])\2(?:31))|(?:(?:0?[1,3-9]|1[0-2])\2(29|30))|(?:(?:0?[1-9])|(?:1[0-2]))\2(?:0?[1-9]|1\d|2[0-8]))$" "00/00/00" +'(\d*)\u0027*-*(\d*)/*(\d*)"' G '<0>5\u0027-3/16"' +'(\d*)\u0027*-*(\d*)/*(\d*)"' G '<0>1\u0027-2"' +'(\d*)\u0027*-*(\d*)/*(\d*)"' G '<0>5/16"' +'(\d*)\u0027*-*(\d*)/*(\d*)"' '1 3/16' +"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$" G "<0>1" +"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$" G "<0>23" +"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$" G "<0>50" +"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$" "0" +"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$" "111" +"^[1-9]{1}$|^[1-4]{1}[0-9]{1}$|^50$" "xyz" +"^([ \u00c0-\u01ffa-zA-Z'])+$" G "<0>Jon Doe" +"^([ \u00c0-\u01ffa-zA-Z'])+$" G "<0>J\u00f8rn" +"^([ \u00c0-\u01ffa-zA-Z'])+$" G "<0>Mc'Neelan" +"^([ \u00c0-\u01ffa-zA-Z'])+$" "Henry); hacking attempt" +"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$" G "<0>1:00 PM" +"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$" G "<0>6:45 am" +"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$" G "<0>17:30" +"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$" "4:32 am" +"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$" "5:30:00 am" +"^((([0]?[1-9]|1[0-2])(:|\.)(00|15|30|45)?( )?(AM|am|aM|Am|PM|pm|pM|Pm))|(([0]?[0-9]|1[0-9]|2[0-3])(:|\.)(00|15|30|45)?))$" "17:01" +"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)" G "<0>0.050" +"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)" G "<0>5.0000" +"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)" G "<0>5000" +"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)" "0" +"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)" "0.0" +"(^\d*\.?\d*[1-9]+\d*$)|(^[1-9]+\d*\.\d*$)" ".0" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" G "<0>Sacramento" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "<0><2>San Francisco" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "<0><3>San Luis Obispo" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "SanFrancisco" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "SanLuisObispo" +"^([A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^([A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,}\040[A-Z]{1}[a-z]{1,})$|^$" "San francisco" +"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$" G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}" +"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$" G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0" +"^\{?[a-fA-F\d]{8}-([a-fA-F\d]{4}-){3}[a-fA-F\d]{12}\}?$" "0xe02ff0e400ad090Ac0300d00a0008ba0" +"^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$" G "<0>{e02ff0e4-00ad-090A-c030-0d00a0008ba0}" +"^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$" G "<0>e02ff0e4-00ad-090A-c030-0d00a0008ba0" +"^\{?[a-fA-F0-9]{8}-([a-fA-F0-9]{4}-){3}[a-fA-F0-9]{12}\}?$" "0xe02ff0e400ad090Ac0300d00a0008ba0" +"^([a-zA-Z0-9@*#]{8,15})$" G "<0>@12X*567" +"^([a-zA-Z0-9@*#]{8,15})$" G "<0>1#Zv96g@*Yfasd4" +"^([a-zA-Z0-9@*#]{8,15})$" G "<0>#67jhgt@erd" +"^([a-zA-Z0-9@*#]{8,15})$" "$12X*567" +"^([a-zA-Z0-9@*#]{8,15})$" "1#Zv_96" +"^([a-zA-Z0-9@*#]{8,15})$" "+678jhgt@erd" +'(("|\u0027)[a-z0-9\/\.\?\=\&]*(\.htm|\.asp|\.php|\.jsp)[a-z0-9\/\.\?\=\&]*("|\u0027))|(href=*?[a-z0-9\/\.\?\=\&"\u0027]*)' G '<0>href="produktsida.asp?kategori2=218"' +'(("|\u0027)[a-z0-9\/\.\?\=\&]*(\.htm|\.asp|\.php|\.jsp)[a-z0-9\/\.\?\=\&]*("|\u0027))|(href=*?[a-z0-9\/\.\?\=\&"\u0027]*)' G '<0>href="NuclearTesting.htm"' +'(("|\u0027)[a-z0-9\/\.\?\=\&]*(\.htm|\.asp|\.php|\.jsp)[a-z0-9\/\.\?\=\&]*("|\u0027))|(href=*?[a-z0-9\/\.\?\=\&"\u0027]*)' 'U Suck' +"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$" G "<0>05-01-2002" +"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$" G "<0>29-02-2004" +"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$" G "<0>31-12-2002" +"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$" "1-1-02" +"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$" "29-02-2002" +"^(((((0[1-9])|(1\d)|(2[0-8]))-((0[1-9])|(1[0-2])))|((31-((0[13578])|(1[02])))|((29|30)-((0[1,3-9])|(1[0-2])))))-((20[0-9][0-9]))|(29-02-20(([02468][048])|([13579][26]))))$" "31-11-2002" +"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$" G "<0>123456.123456" +"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$" G "<0>123456,123456" +"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$" G "<0>123456" +"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$" "123a.123" +"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$" "123a,123" +"^\d*[0-9](|.\d*[0-9]|,\d*[0-9])?$" "a" +"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$" G "<0>AC" +"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$" G "<0>RJ" +"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$" G "<0>SP" +"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$" "XX" +"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$" "AB" +"^(ac|AC|al|AL|am|AM|ap|AP|ba|BA|ce|CE|df|DF|es|ES|go|GO|ma|MA|mg|MG|ms|MS|mt|MT|pa|PA|pb|PB|pe|PE|pi|PI|pr|PR|rj|RJ|rn|RN|ro|RO|rr|RR|rs|RS|sc|SC|se|SE|sp|SP|to|TO)$" "HJ" +"^[+]?\d*$" G "<0>0123456789" +"^[+]?\d*$" G "<0>1234" +"^[+]?\d*$" G "<0>1" +"^[+]?\d*$" "1.0?&" +"^[+]?\d*$" "a1" +"^[+]?\d*$" "2a-" +#/<[aA][ ]{0,}([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[ \f]){0,}>((<(([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[ \f]){0,})>([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[ \f]){0,})|(([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[ \f]){0,})){1,}/ G "<0>this text is italicized" #TODO: Need infinite loop breaking +#/<[aA][ ]{0,}([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[ \f]){0,}>((<(([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[ \f]){0,})>([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[ \f]){0,})|(([a-zA-Z0-9"'_,.:;!?@$\&()%=\u002f ]|[\-]|[ \f]){0,})){1,}/ "

    " #TODO: need infinite loop breaking. +"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$" G "<0>0:00" +"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$" G "<0>23:00" +"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$" G "<0>00:59" +"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$" "0:0" +"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$" "24:00" +"^([0-1]?[0-9]|[2][0-3]):([0-5][0-9])$" "00:60" +"^((0[1-9])|(1[0-2]))\/(\d{2})$" G "<0>11/03" +"^((0[1-9])|(1[0-2]))\/(\d{2})$" G "<0>01/04" +"^((0[1-9])|(1[0-2]))\/(\d{2})$" "13/03" +"^((0[1-9])|(1[0-2]))\/(\d{2})$" "10/2003" +"]*>[\w|\t|\r|\W]*" G '<0>' +"]*>[\w|\t|\r|\W]*" "--" +"]*>[\w|\t|\r|\W]*" "A-Z][a-z]+" +#"]*>[\w|\t|\r|\W]*" G "<0>strFirstName" # Test Case damaged? +#"]*>[\w|\t|\r|\W]*" G "<0>intAgeInYears" # Test Case damaged? +#"]*>[\w|\t|\r|\W]*" G "<0>Where the Wild Things Are" # Test Case damaged? +"]*>[\w|\t|\r|\W]*" "123" +"]*>[\w|\t|\r|\W]*" "abc" +"]*>[\w|\t|\r|\W]*" "this has no caps in it" +"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)" G "<0>-0.050" +"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)" G "<0>-5.000" +"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)" G "<0>-5" +"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)" "0" +"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)" "0.0" +"(^-\d*\.?\d*[1-9]+\d*$)|(^-[1-9]+\d*\.\d*$)" ".0" +"^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1]))$|^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" G "<0>2002/02/03" +"^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1]))$|^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" G "<0>2002/02/03 12:12:18" +"^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1]))$|^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" "2002/02/36" +"^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1]))$|^([2][0]\d{2}\/([0]\d|[1][0-2])\/([0-2]\d|[3][0-1])\s([0-1]\d|[2][0-3])\:[0-5]\d\:[0-5]\d)$" "02/03/2002" +"^(\d|,)*\.?\d*$" G "<0>1,000" +"^(\d|,)*\.?\d*$" G "<0>3,000.05" +"^(\d|,)*\.?\d*$" G "<0>5,000,000" +"^(\d|,)*\.?\d*$" "abc" +"^(\d|,)*\.?\d*$" "$100,000" +"^(\d|,)*\.?\d*$" "Forty" +"^\d$" G "<0>1" +"^\d$" G "<0>2" +"^\d$" G "<0>3" +"^\d$" "a" +"^\d$" "324" +"^\d$" "num" +"^[0-9]+$" G "<0>1234567890" +"^[0-9]+$" G "<0>1234567890" +"^[0-9]+$" G "<0>1234567890" +"^[0-9]+$" "http://none" +"^[0-9]+$" "http://none" +"^[0-9]+$" "http://none" +"^.{4,8}$" G "<0>asdf" +"^.{4,8}$" G "<0>1234" +"^.{4,8}$" G "<0>asdf1234" +"^.{4,8}$" "asd" +"^.{4,8}$" "123" +"^.{4,8}$" "asdfe12345" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" G "<0>a@a.com" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" G "<0>a@a.com.au" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" G "<0>a@a.au" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" "word" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" "word@" +"^[\w\.=-]+@[\w\.-]+\.[\w]{2,3}$" "@word" +"^\d{5}-\d{4}$" G "<0>22222-3333" +"^\d{5}-\d{4}$" G "<0>34545-2367" +"^\d{5}-\d{4}$" G "<0>56334-2343" +"^\d{5}-\d{4}$" "123456789" +"^\d{5}-\d{4}$" "A3B 4C5" +"^\d{5}-\d{4}$" "55335" +"(a|b|c).(a.b)*.b+.c" G "<0>autbfc" +"(a|b|c).(a.b)*.b+.c" "attc" +'"((\\")|[^"(\\")])+"' G '<0>"test"' +'"((\\")|[^"(\\")])+"' G '<0>"escape\"quote"' +'"((\\")|[^"(\\")])+"' G '<0>"\\""' +'"((\\")|[^"(\\")])+"' "test" +'"((\\")|[^"(\\")])+"' '"test' +'"((\\")|[^"(\\")])+"' '""test\\"' +"((0[1-9])|(1[02]))/\d{2}" G "<0>01/00" +"((0[1-9])|(1[02]))/\d{2}" G "<0>12/99" +"((0[1-9])|(1[02]))/\d{2}" "13/00" +"((0[1-9])|(1[02]))/\d{2}" "12/AS" +"^[a-zA-Z]$" G "<0>a" +"^[a-zA-Z]$" G "<0>B" +"^[a-zA-Z]$" G "<0>c" +"^[a-zA-Z]$" "0" +"^[a-zA-Z]$" "&" +"^[a-zA-Z]$" "AbC" +"^[a-zA-Z]+$" G "<0>abc" +"^[a-zA-Z]+$" G "<0>ABC" +"^[a-zA-Z]+$" G "<0>aBcDeF" +"^[a-zA-Z]+$" "abc123" +"^[a-zA-Z]+$" "mr." +"^[a-zA-Z]+$" "a word" +"^\s*[a-zA-Z,\p{Zs}]+\s*$" G "<0>Smith, Ed" +"^\s*[a-zA-Z,\p{Zs}]+\s*$" G "<0>Ed Smith" +"^\s*[a-zA-Z,\p{Zs}]+\s*$" G "<0>aBcDeFgH" +"^\s*[a-zA-Z,\p{Zs}]+\s*$" "a123" +"^\s*[a-zA-Z,\p{Zs}]+\s*$" "AB5" +"^\s*[a-zA-Z,\p{Zs}]+\s*$" "Mr. Ed" +"(\w+?@\w+?\u002E.+)" G "<0>bob@vsnl.com" +"(\w+?@\w+?\u002E.+)" "[AABB]" +"^\d+$" G "<0>123" +"^\d+$" G "<0>10" +"^\d+$" G "<0>54" +"^\d+$" "-54" +"^\d+$" "54.234" +"^\d+$" "abc" +"^(\+|-)?\d+$" G "<0>-34" +"^(\+|-)?\d+$" G "<0>34" +"^(\+|-)?\d+$" G "<0>+5" +"^(\+|-)?\d+$" "abc" +"^(\+|-)?\d+$" "3.1415" +"^(\+|-)?\d+$" "-5.3" +"foo" G "<0>foo" +"foo" "bar" +"^[1-5]$" G "<0>1" +"^[1-5]$" G "<0>3" +"^[1-5]$" G "<0>4" +"^[1-5]$" "6" +"^[1-5]$" "23" +"^[1-5]$" "a" +"^[12345]$" G "<0>1" +"^[12345]$" G "<0>2" +"^[12345]$" G "<0>4" +"^[12345]$" "6" +"^[12345]$" "-1" +"^[12345]$" "abc" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" G "<0>joe@aol.com" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" G "<0>joe@wrox.co.uk" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" G "<0>joe@domain.info" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" "a@b" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" "notanemail" +"^[\w-\.]+@([\w-]+\.)+[\w-]{2,4}$" "joe@@." +"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" G "<0>joe@aol.com" +"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" G "<0>ssmith@aspalliance.com" +"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" G "<0>a@b.cc" +"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" "joe@123aspx.com" +"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" "joe@web.info" +"^\w+@[a-zA-Z_]+?\.[a-zA-Z]{2,3}$" "joe@company.co.uk" +"[\w-]+@([\w-]+\.)+[\w-]+" G "<0>joe@aol.com" +"[\w-]+@([\w-]+\.)+[\w-]+" G "<0>a@b.c" +"[\w-]+@([\w-]+\.)+[\w-]+" "asdf" +"[\w-]+@([\w-]+\.)+[\w-]+" "1234" +"\d{4}-?\d{4}-?\d{4}-?\d{4}" G "<0>1234-1234-1234-1234" +"\d{4}-?\d{4}-?\d{4}-?\d{4}" G "<0>1234123412341234" +"\d{4}-?\d{4}-?\d{4}-?\d{4}" "1234123412345" +"^\d{5}$" G "<0>33333" +"^\d{5}$" G "<0>55555" +"^\d{5}$" G "<0>23445" +"^\d{5}$" "abcd" +"^\d{5}$" "1324" +"^\d{5}$" "as;lkjdf" +"(\w+)\s+\1" G "<0>hubba hubba" +"(\w+)\s+\1" G "<0>mandate dated" +"(\w+)\s+\1" G "<0>an annual" +"(\w+)\s+\1" "may day" +"(\w+)\s+\1" "gogo" +"(\w+)\s+\1" "1212" +"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>3SquareBand.com" +"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>asp.net" +"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" G "<0>army.mil" +"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "$SquareBand.com" +"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "asp/dot.net" +"^[a-zA-Z0-9\-\.]+\.(com|org|net|mil|edu|COM|ORG|NET|MIL|EDU)$" "army.military" + diff --git a/go/mysql/icuregex/testdata/regextst_extended.txt b/go/mysql/icuregex/testdata/regextst_extended.txt new file mode 100644 index 00000000000..841e5e46092 --- /dev/null +++ b/go/mysql/icuregex/testdata/regextst_extended.txt @@ -0,0 +1,126 @@ +# Copyright (C) 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html +# Copyright (c) 2001-2015 International Business Machines +# Corporation and others. All Rights Reserved. +# +# file: +# +# ICU regular expression test cases. +# +# format: one test case per line, +# = [# comment] +# = "" +# = "" +# the quotes on the pattern and match string can be " or ' or / +# = text, with the start and end of each +# capture group tagged with .... The overall match, +# if any, is group 0, as in <0>matched text +# A region can be specified with ... tags. +# Standard ICU unescape will be applied, allowing \u, \U, etc. to appear. +# +# = any combination of +# i case insensitive match +# x free spacing and comments +# s dot-matches-all mode +# m multi-line mode. +# ($ and ^ match at embedded new-lines) +# D Unix Lines mode (only recognize 0x0a as new-line) +# Q UREGEX_LITERAL flag. Entire pattern is literal string. +# v If icu configured without break iteration, this +# regex test pattern should not compile. +# e set the UREGEX_ERROR_ON_UNKNOWN_ESCAPES flag +# d dump the compiled pattern +# t trace operation of match engine. +# 2-9 a digit between 2 and 9, specifies the number of +# times to execute find(). The expected results are +# for the last find() in the sequence. +# G Only check match / no match. Do not check capture groups. +# E Pattern compilation error expected +# L Use LookingAt() rather than find() +# M Use matches() rather than find(). +# +# a Use non-Anchoring Bounds. +# b Use Transparent Bounds. +# The a and b options only make a difference if +# a region has been specified in the string. +# z|Z hitEnd was expected(z) or not expected (Z). +# With neither, hitEnd is not checked. +# y|Y Require End expected(y) or not expected (Y). +# +# White space must be present between the flags and the match string. +# + +"[:xdigit:]" " <0>4f" +"\P{XDIGIT}+" "4f<0> " + +"[:blank:]" "<0> 4f" +"\P{BLANK}+" "<0>4f " + +"[:print:]" "<0> 4f\x07" +"\P{PRINT}+" " 4f<0>\x07" + +"\p{Age=1.1}" "<0>4f🥱" +"\p{Age=11}" "4f🥱" +"\p{Age=12}" "4f<0>🥱" + +"\p{Name=LATIN SMALL LETTER B}" "Good<0>bye" + +"\p{Numeric_Value=3}" "Good<0>3ye" +"\p{Numeric_Value=14}" "Good<0>⑭ye" + +"\p{Script_Extensions=Greek}" "Good<0>βye" + +"\p{Bidi_Control}" "Good<0>\u200Eye" +"\p{Bidi_Class=LeftToRight}" "<0>Goodbye" +"\p{Bidi_Class=RightToLeft}" "Goodbye" +"\p{Bidi_Class=LeftToRight}" "؈" +"\p{Bidi_Paired_Bracket_Type=Open}" "Good<0>(ye" + +"\p{Soft_Dotted}" "Good<0>iye" + +"\p{Changes_When_Lowercased}" "<0>Goodbye" +"\p{Changes_When_Titlecased}" "<0>goodbye" +"\p{Changes_When_Uppercased}" "G<0>oodbye" +"\p{Changes_When_CaseMapped}" " <0>Goodbye3" +"\p{Cased}" " <0>Goodbye3" +"\p{CaseIgnorable}" "foo<0>.bar" + +"\p{Indic_Syllabic_Category=Avagraha}" "foo<0>\u09BDbar" +"\p{IndicPositionalCategory=Top_And_Left_And_Right}" "foo<0>\u0B4Cbar" +"\p{VerticalOrientation=U}" "foo<0>\uA015bar" + +"\p{Canonical_Combining_Class=Nukta}" "foo<0>\u093Cbar" +"\p{Lead_Canonical_Combining_Class=Above}" "foo<0>\u0300bar" +"\p{Trail_Canonical_Combining_Class=Above}" "foo<0>\u0300bar" + +"\p{Changes_When_Casefolded}" "<0>\uFB03Goodbye" +"\p{Changes_When_Casefolded}" 2 "\uFB03<0>Goodbye" + +"\p{NFC_Inert}" "foo<0>\uFB03bar" +"\p{NFKC_Inert}" "foo<0>\uFB03bar" +"\P{NFD_Inert}" "foo<0>Àbar" +"\P{NFKD_Inert}" "foo<0>Àbar" + +"\p{NFC_Quick_Check=No}" "foo<0>\u0340bar" +"\p{NFKC_Quick_Check=No}" "foo<0>\u0340bar" +"\p{NFD_Quick_Check=No}" "foo<0>\u00C0bar" +"\p{NFKD_Quick_Check=No}" "foo<0>\u00C0bar" + +"\p{Full_Composition_Exclusion}" "foo<0>\u0374bar" + +"\p{Numeric_Type=Decimal}" "foo<0>3bar" +"\p{Joining_Type=Dual_Joining}" "foo<0>\u0626bar" +"\p{Joining_Group=African_Feh}" "foo<0>\u08BBbar" +"\p{General_Category=Close_Punctuation}" "foo[bar" +"\p{General_Category=Close_Punctuation}" "foo<0>]]bar" +"\p{General_Category=Close_Punctuation}" 2 "foo]<0>]bar" + +"\p{Hangul_Syllable_Type=Not_Applicable}" "<0>f" +"\p{Hangul_Syllable_Type=Leading_Jamo}" "foo<0>\u1100bar" + +"\p{Regional_Indicator=Yes}" "foo<0>\U0001F1E6bar" + +# Currently unsupported property classes below. They require +# significant additional code to support. +"\p{Changes_When_NFKC_Casefolded}" E "foo<0>\uFB03bar" +"\p{Segment_Starter}" E "<0>\uFB03Goodbye" \ No newline at end of file diff --git a/go/mysql/sql_error.go b/go/mysql/sql_error.go index c400de4ef9a..ac988033e3d 100644 --- a/go/mysql/sql_error.go +++ b/go/mysql/sql_error.go @@ -218,6 +218,28 @@ var stateToMysqlCode = map[vterrors.State]mysqlCode{ vterrors.WrongArguments: {num: ERWrongArguments, state: SSUnknownSQLState}, vterrors.UnknownStmtHandler: {num: ERUnknownStmtHandler, state: SSUnknownSQLState}, vterrors.UnknownTimeZone: {num: ERUnknownTimeZone, state: SSUnknownSQLState}, + vterrors.RegexpStringNotTerminated: {num: ERRegexpStringNotTerminated, state: SSUnknownSQLState}, + vterrors.RegexpBufferOverflow: {num: ERRegexpBufferOverflow, state: SSUnknownSQLState}, + vterrors.RegexpIllegalArgument: {num: ERRegexpIllegalArgument, state: SSUnknownSQLState}, + vterrors.RegexpIndexOutOfBounds: {num: ERRegexpIndexOutOfBounds, state: SSUnknownSQLState}, + vterrors.RegexpInternal: {num: ERRegexpInternal, state: SSUnknownSQLState}, + vterrors.RegexpRuleSyntax: {num: ERRegexpRuleSyntax, state: SSUnknownSQLState}, + vterrors.RegexpBadEscapeSequence: {num: ERRegexpBadEscapeSequence, state: SSUnknownSQLState}, + vterrors.RegexpUnimplemented: {num: ERRegexpUnimplemented, state: SSUnknownSQLState}, + vterrors.RegexpMismatchParen: {num: ERRegexpMismatchParen, state: SSUnknownSQLState}, + vterrors.RegexpBadInterval: {num: ERRegexpBadInterval, state: SSUnknownSQLState}, + vterrors.RegexpMaxLtMin: {num: ERRRegexpMaxLtMin, state: SSUnknownSQLState}, + vterrors.RegexpInvalidBackRef: {num: ERRegexpInvalidBackRef, state: SSUnknownSQLState}, + vterrors.RegexpLookBehindLimit: {num: ERRegexpLookBehindLimit, state: SSUnknownSQLState}, + vterrors.RegexpMissingCloseBracket: {num: ERRegexpMissingCloseBracket, state: SSUnknownSQLState}, + vterrors.RegexpInvalidRange: {num: ERRegexpInvalidRange, state: SSUnknownSQLState}, + vterrors.RegexpStackOverflow: {num: ERRegexpStackOverflow, state: SSUnknownSQLState}, + vterrors.RegexpTimeOut: {num: ERRegexpTimeOut, state: SSUnknownSQLState}, + vterrors.RegexpPatternTooBig: {num: ERRegexpPatternTooBig, state: SSUnknownSQLState}, + vterrors.RegexpInvalidFlag: {num: ERRegexpInvalidFlag, state: SSUnknownSQLState}, + vterrors.RegexpInvalidCaptureGroup: {num: ERRegexpInvalidCaptureGroup, state: SSUnknownSQLState}, + vterrors.CharacterSetMismatch: {num: ERCharacterSetMismatch, state: SSUnknownSQLState}, + vterrors.WrongParametersToNativeFct: {num: ERWrongParametersToNativeFct, state: SSUnknownSQLState}, } func getStateToMySQLState(state vterrors.State) mysqlCode { diff --git a/go/vt/vterrors/state.go b/go/vt/vterrors/state.go index d7ed04e1c7b..609ab6fbd1b 100644 --- a/go/vt/vterrors/state.go +++ b/go/vt/vterrors/state.go @@ -88,6 +88,31 @@ const ( // unknown timezone UnknownTimeZone + // regexp errors + RegexpStringNotTerminated + RegexpBufferOverflow + RegexpIllegalArgument + RegexpIndexOutOfBounds + RegexpInternal + RegexpRuleSyntax + RegexpBadEscapeSequence + RegexpUnimplemented + RegexpMismatchParen + RegexpBadInterval + RegexpMaxLtMin + RegexpInvalidBackRef + RegexpLookBehindLimit + RegexpMissingCloseBracket + RegexpInvalidRange + RegexpStackOverflow + RegexpTimeOut + RegexpPatternTooBig + RegexpInvalidCaptureGroup + RegexpInvalidFlag + + CharacterSetMismatch + WrongParametersToNativeFct + // No state should be added below NumOfStates NumOfStates ) diff --git a/go/vt/vtgate/evalengine/cached_size.go b/go/vt/vtgate/evalengine/cached_size.go index c249bf3e86c..ea525e46a25 100644 --- a/go/vt/vtgate/evalengine/cached_size.go +++ b/go/vt/vtgate/evalengine/cached_size.go @@ -1257,6 +1257,54 @@ func (cached *builtinRandomBytes) CachedSize(alloc bool) int64 { size += cached.CallExpr.CachedSize(false) return size } +func (cached *builtinRegexpInstr) CachedSize(alloc bool) int64 { + if cached == nil { + return int64(0) + } + size := int64(0) + if alloc { + size += int64(48) + } + // field CallExpr vitess.io/vitess/go/vt/vtgate/evalengine.CallExpr + size += cached.CallExpr.CachedSize(false) + return size +} +func (cached *builtinRegexpLike) CachedSize(alloc bool) int64 { + if cached == nil { + return int64(0) + } + size := int64(0) + if alloc { + size += int64(48) + } + // field CallExpr vitess.io/vitess/go/vt/vtgate/evalengine.CallExpr + size += cached.CallExpr.CachedSize(false) + return size +} +func (cached *builtinRegexpReplace) CachedSize(alloc bool) int64 { + if cached == nil { + return int64(0) + } + size := int64(0) + if alloc { + size += int64(48) + } + // field CallExpr vitess.io/vitess/go/vt/vtgate/evalengine.CallExpr + size += cached.CallExpr.CachedSize(false) + return size +} +func (cached *builtinRegexpSubstr) CachedSize(alloc bool) int64 { + if cached == nil { + return int64(0) + } + size := int64(0) + if alloc { + size += int64(48) + } + // field CallExpr vitess.io/vitess/go/vt/vtgate/evalengine.CallExpr + size += cached.CallExpr.CachedSize(false) + return size +} func (cached *builtinRepeat) CachedSize(alloc bool) int64 { if cached == nil { return int64(0) diff --git a/go/vt/vtgate/evalengine/compare.go b/go/vt/vtgate/evalengine/compare.go index f2262cf8730..deee5fdb520 100644 --- a/go/vt/vtgate/evalengine/compare.go +++ b/go/vt/vtgate/evalengine/compare.go @@ -137,7 +137,7 @@ func compareStrings(l, r eval) (int, error) { if err != nil { return 0, err } - collation := col.Get() + collation := col.Collation.Get() if collation == nil { panic("unknown collation after coercion") } diff --git a/go/vt/vtgate/evalengine/compiler_asm.go b/go/vt/vtgate/evalengine/compiler_asm.go index 870c32fd767..1267eaf1d1d 100644 --- a/go/vt/vtgate/evalengine/compiler_asm.go +++ b/go/vt/vtgate/evalengine/compiler_asm.go @@ -35,6 +35,8 @@ import ( "github.com/google/uuid" + "vitess.io/vitess/go/mysql/icuregex" + "vitess.io/vitess/go/hack" "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/mysql/collations/charset" @@ -3942,10 +3944,6 @@ func (asm *assembler) Fn_YEARWEEK() { }, "FN YEARWEEK DATE(SP-1)") } -func intervalStackOffset(l, i int) int { - return l - i + 1 -} - func (asm *assembler) Interval_i(l int) { asm.adjustStack(-l) asm.emit(func(env *ExpressionEnv) int { @@ -4285,3 +4283,442 @@ func (asm *assembler) Fn_DATEADD_s(unit datetime.IntervalType, sub bool, col col }, "FN DATEADD TEMPORAL(SP-2), INTERVAL(SP-1)") } + +func (asm *assembler) Fn_REGEXP_LIKE(m *icuregex.Matcher, negate bool, c charset.Charset, offset int) { + asm.adjustStack(-offset) + asm.emit(func(env *ExpressionEnv) int { + input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes) + m.Reset(charset.Expand(nil, input.bytes, c)) + + ok, err := m.Find() + if err != nil { + env.vm.err = err + env.vm.sp -= offset + return 1 + } + if negate { + ok = !ok + } + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalBool(ok) + env.vm.sp -= offset + return 1 + }, "FN REGEXP_LIKE VARCHAR(SP-2), VARCHAR(SP-1)") +} + +func (asm *assembler) Fn_REGEXP_LIKE_slow(negate bool, c collations.Charset, flags icuregex.RegexpFlag, offset int) { + asm.adjustStack(-offset) + asm.emit(func(env *ExpressionEnv) int { + var err error + input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes) + pattern := env.vm.stack[env.vm.sp-offset].(*evalBytes) + + if offset > 1 { + fe := env.vm.stack[env.vm.sp-offset+1] + flags, err = regexpFlags(fe, flags, "regexp_like") + if err != nil { + env.vm.err = err + env.vm.sp -= offset + return 1 + } + } + + p, err := compileRegex(pattern, c, flags) + if err != nil { + env.vm.err = err + env.vm.sp -= offset + return 1 + } + + m := icuregex.NewMatcher(p) + m.Reset(charset.Expand(nil, input.bytes, c)) + + ok, err := m.Find() + if err != nil { + env.vm.err = err + env.vm.sp-- + return 1 + } + if negate { + ok = !ok + } + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalBool(ok) + env.vm.sp -= offset + return 1 + }, "FN REGEXP_LIKE_SLOW VARCHAR(SP-2), VARCHAR(SP-1)") +} + +func (asm *assembler) Fn_REGEXP_INSTR(m *icuregex.Matcher, c charset.Charset, offset int) { + asm.adjustStack(-offset) + asm.emit(func(env *ExpressionEnv) int { + input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes) + runes := charset.Expand(nil, input.bytes, c) + + if len(runes) == 0 { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(0) + env.vm.sp -= offset + return 1 + } + + pos := int64(1) + if offset > 1 { + pos, env.vm.err = positionInstr(env.vm.stack[env.vm.sp-offset+1].(*evalInt64), int64(len(runes))) + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + } + + occ := int64(1) + if offset > 2 { + occ = occurrence(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), occ) + } + + returnOpt := int64(0) + if offset > 3 { + returnOpt, env.vm.err = returnOption(env.vm.stack[env.vm.sp-offset+3].(*evalInt64), "regexp_instr") + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + } + + m.Reset(runes[pos-1:]) + + found := false + for i := int64(0); i < occ; i++ { + found, env.vm.err = m.Find() + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + if !found { + break + } + } + if !found { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(0) + } else if returnOpt == 0 { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(int64(m.Start()) + pos) + } else { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(int64(m.End()) + pos) + } + env.vm.sp -= offset + return 1 + }, "FN REGEXP_INSTR VARCHAR(SP-2), VARCHAR(SP-1)") +} + +func (asm *assembler) Fn_REGEXP_INSTR_slow(c collations.Charset, flags icuregex.RegexpFlag, offset int) { + asm.adjustStack(-offset) + asm.emit(func(env *ExpressionEnv) int { + input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes) + pattern := env.vm.stack[env.vm.sp-offset].(*evalBytes) + + if offset > 4 { + fe := env.vm.stack[env.vm.sp-offset+4] + flags, env.vm.err = regexpFlags(fe, flags, "regexp_instr") + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + } + + p, err := compileRegex(pattern, c, flags) + if err != nil { + env.vm.err = err + env.vm.sp -= offset + return 1 + } + + runes := charset.Expand(nil, input.bytes, c) + if len(runes) == 0 { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(0) + env.vm.sp -= offset + return 1 + } + + pos := int64(1) + if offset > 1 { + pos, env.vm.err = positionInstr(env.vm.stack[env.vm.sp-offset+1].(*evalInt64), int64(len(runes))) + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + } + + occ := int64(1) + if offset > 2 { + occ = occurrence(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), occ) + } + + returnOpt := int64(0) + if offset > 3 { + returnOpt, env.vm.err = returnOption(env.vm.stack[env.vm.sp-offset+3].(*evalInt64), "regexp_instr") + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + } + + m := icuregex.NewMatcher(p) + m.Reset(runes[pos-1:]) + + found := false + for i := int64(0); i < occ; i++ { + found, env.vm.err = m.Find() + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + if !found { + break + } + } + if !found { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(0) + } else if returnOpt == 0 { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(int64(m.Start()) + pos) + } else { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalInt64(int64(m.End()) + pos) + } + env.vm.sp -= offset + return 1 + }, "FN REGEXP_INSTR_SLOW VARCHAR(SP-2), VARCHAR(SP-1)") +} + +func (asm *assembler) Fn_REGEXP_SUBSTR(m *icuregex.Matcher, merged collations.TypedCollation, offset int) { + asm.adjustStack(-offset) + asm.emit(func(env *ExpressionEnv) int { + input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes) + c := merged.Collation.Get().Charset() + runes := charset.Expand(nil, input.bytes, c) + + pos := int64(1) + if offset > 1 { + limit := int64(len(runes)) + pos, env.vm.err = position(env.vm.stack[env.vm.sp-offset+1].(*evalInt64), limit, "regexp_substr") + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + if pos-1 == limit { + env.vm.stack[env.vm.sp-offset-1] = nil + env.vm.sp -= offset + return 1 + } + } + + occ := int64(1) + if offset > 2 { + occ = occurrence(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), occ) + } + + m.Reset(runes[pos-1:]) + + found := false + for i := int64(0); i < occ; i++ { + found, env.vm.err = m.Find() + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + if !found { + break + } + } + + if !found { + env.vm.stack[env.vm.sp-offset-1] = nil + } else { + out := runes[int64(m.Start())+pos-1 : int64(m.End())+pos-1] + b := charset.Collapse(nil, out, c) + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalText(b, resultCollation(merged)) + } + env.vm.sp -= offset + return 1 + }, "FN REGEXP_SUBSTR VARCHAR(SP-2), VARCHAR(SP-1)") +} + +func (asm *assembler) Fn_REGEXP_SUBSTR_slow(merged collations.TypedCollation, flags icuregex.RegexpFlag, offset int) { + asm.adjustStack(-offset) + asm.emit(func(env *ExpressionEnv) int { + input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes) + pattern := env.vm.stack[env.vm.sp-offset].(*evalBytes) + c := merged.Collation.Get().Charset() + runes := charset.Expand(nil, input.bytes, c) + + pos := int64(1) + if offset > 1 { + limit := int64(len(runes)) + pos, env.vm.err = position(env.vm.stack[env.vm.sp-offset+1].(*evalInt64), limit, "regexp_substr") + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + if pos-1 == limit { + env.vm.stack[env.vm.sp-offset-1] = nil + env.vm.sp -= offset + return 1 + } + } + + occ := int64(1) + if offset > 2 { + occ = occurrence(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), occ) + } + + if offset > 3 { + fe := env.vm.stack[env.vm.sp-offset+3] + flags, env.vm.err = regexpFlags(fe, flags, "regexp_substr") + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + } + + p, err := compileRegex(pattern, c, flags) + if err != nil { + env.vm.err = err + env.vm.sp -= offset + return 1 + } + + m := icuregex.NewMatcher(p) + m.Reset(runes[pos-1:]) + + found := false + for i := int64(0); i < occ; i++ { + found, env.vm.err = m.Find() + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + if !found { + break + } + } + + if !found { + env.vm.stack[env.vm.sp-offset-1] = nil + } else { + out := runes[int64(m.Start())+pos-1 : int64(m.End())+pos-1] + b := charset.Collapse(nil, out, c) + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalText(b, resultCollation(merged)) + } + env.vm.sp -= offset + return 1 + }, "FN REGEXP_SUBSTR_SLOW VARCHAR(SP-2), VARCHAR(SP-1)") +} + +func (asm *assembler) Fn_REGEXP_REPLACE(m *icuregex.Matcher, merged collations.TypedCollation, offset int) { + asm.adjustStack(-offset) + asm.emit(func(env *ExpressionEnv) int { + input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes) + repl := env.vm.stack[env.vm.sp-offset+1].(*evalBytes) + + c := merged.Collation.Get().Charset() + inputRunes := charset.Expand(nil, input.bytes, c) + replRunes := charset.Expand(nil, repl.bytes, c) + + pos := int64(1) + if offset > 2 { + limit := int64(len(inputRunes)) + pos, env.vm.err = position(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), limit, "regexp_replace") + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + if pos-1 == limit { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(input.bytes, sqltypes.Text, resultCollation(merged)) + env.vm.sp -= offset + return 1 + } + } + + occ := int64(0) + if offset > 3 { + occ = occurrence(env.vm.stack[env.vm.sp-offset+3].(*evalInt64), occ) + } + + m.Reset(inputRunes[pos-1:]) + + b, replaced, err := regexpReplace(m, inputRunes, replRunes, pos, occ, merged.Collation.Get().Charset()) + if err != nil { + env.vm.err = err + env.vm.sp -= offset + return 1 + } + if !replaced { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(input.bytes, sqltypes.Text, resultCollation(merged)) + } else { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(b, sqltypes.Text, resultCollation(merged)) + } + env.vm.sp -= offset + return 1 + }, "FN REGEXP_REPLACE VARCHAR(SP-2), VARCHAR(SP-1)") +} + +func (asm *assembler) Fn_REGEXP_REPLACE_slow(merged collations.TypedCollation, flags icuregex.RegexpFlag, offset int) { + asm.adjustStack(-offset) + asm.emit(func(env *ExpressionEnv) int { + input := env.vm.stack[env.vm.sp-offset-1].(*evalBytes) + pattern := env.vm.stack[env.vm.sp-offset].(*evalBytes) + repl := env.vm.stack[env.vm.sp-offset+1].(*evalBytes) + + c := merged.Collation.Get().Charset() + inputRunes := charset.Expand(nil, input.bytes, c) + replRunes := charset.Expand(nil, repl.bytes, c) + + pos := int64(1) + if offset > 2 { + limit := int64(len(inputRunes)) + pos, env.vm.err = position(env.vm.stack[env.vm.sp-offset+2].(*evalInt64), limit, "regexp_replace") + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + if pos-1 == limit { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(input.bytes, sqltypes.Text, resultCollation(merged)) + env.vm.sp -= offset + return 1 + } + } + + occ := int64(0) + if offset > 3 { + occ = occurrence(env.vm.stack[env.vm.sp-offset+3].(*evalInt64), 0) + } + + if offset > 4 { + fe := env.vm.stack[env.vm.sp-offset+4] + flags, env.vm.err = regexpFlags(fe, flags, "regexp_replace") + if env.vm.err != nil { + env.vm.sp -= offset + return 1 + } + } + + p, err := compileRegex(pattern, c, flags) + if err != nil { + env.vm.err = err + env.vm.sp -= offset + return 1 + } + + m := icuregex.NewMatcher(p) + m.Reset(inputRunes[pos-1:]) + + b, replaced, err := regexpReplace(m, inputRunes, replRunes, pos, occ, merged.Collation.Get().Charset()) + if err != nil { + env.vm.err = err + env.vm.sp -= offset + return 1 + } + if !replaced { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(input.bytes, sqltypes.Text, resultCollation(merged)) + } else { + env.vm.stack[env.vm.sp-offset-1] = env.vm.arena.newEvalRaw(b, sqltypes.Text, resultCollation(merged)) + } + env.vm.sp -= offset + return 1 + }, "FN REGEXP_REPLACE_SLOW VARCHAR(SP-2), VARCHAR(SP-1)") +} diff --git a/go/vt/vtgate/evalengine/compiler_test.go b/go/vt/vtgate/evalengine/compiler_test.go index 92ef9d3d465..1b5ace371c9 100644 --- a/go/vt/vtgate/evalengine/compiler_test.go +++ b/go/vt/vtgate/evalengine/compiler_test.go @@ -444,6 +444,10 @@ func TestCompilerSingle(t *testing.T) { expression: `INTERVAL(0, 0, 0, -1, NULL, NULL, 1)`, result: `INT64(5)`, }, + { + expression: `REGEXP_REPLACE(1234, 12, 6, 1)`, + result: `TEXT("634")`, + }, } for _, tc := range testCases { diff --git a/go/vt/vtgate/evalengine/expr_collate.go b/go/vt/vtgate/evalengine/expr_collate.go index 16fe8351880..2ba2e3dba61 100644 --- a/go/vt/vtgate/evalengine/expr_collate.go +++ b/go/vt/vtgate/evalengine/expr_collate.go @@ -54,6 +54,12 @@ var collationUtf8mb3 = collations.TypedCollation{ Repertoire: collations.RepertoireUnicode, } +var collationRegexpFallback = collations.TypedCollation{ + Collation: collations.CollationLatin1Swedish, + Coercibility: collations.CoerceCoercible, + Repertoire: collations.RepertoireASCII, +} + type ( CollateExpr struct { UnaryExpr @@ -152,16 +158,16 @@ func mergeCollations(c1, c2 collations.TypedCollation, t1, t2 sqltypes.Type) (co }) } -func mergeAndCoerceCollations(left, right eval) (eval, eval, collations.ID, error) { +func mergeAndCoerceCollations(left, right eval) (eval, eval, collations.TypedCollation, error) { lt := left.SQLType() rt := right.SQLType() mc, coerceLeft, coerceRight, err := mergeCollations(evalCollation(left), evalCollation(right), lt, rt) if err != nil { - return nil, nil, 0, err + return nil, nil, collations.TypedCollation{}, err } if coerceLeft == nil && coerceRight == nil { - return left, right, mc.Collation, nil + return left, right, mc, nil } left1 := newEvalRaw(lt, left.(*evalBytes).bytes, mc) @@ -170,16 +176,16 @@ func mergeAndCoerceCollations(left, right eval) (eval, eval, collations.ID, erro if coerceLeft != nil { left1.bytes, err = coerceLeft(nil, left1.bytes) if err != nil { - return nil, nil, 0, err + return nil, nil, collations.TypedCollation{}, err } } if coerceRight != nil { right1.bytes, err = coerceRight(nil, right1.bytes) if err != nil { - return nil, nil, 0, err + return nil, nil, collations.TypedCollation{}, err } } - return left1, right1, mc.Collation, nil + return left1, right1, mc, nil } type collationAggregation struct { diff --git a/go/vt/vtgate/evalengine/expr_compare.go b/go/vt/vtgate/evalengine/expr_compare.go index cef7493e026..3aca0cc1151 100644 --- a/go/vt/vtgate/evalengine/expr_compare.go +++ b/go/vt/vtgate/evalengine/expr_compare.go @@ -558,7 +558,7 @@ func (l *LikeExpr) eval(env *ExpressionEnv) (eval, error) { return nil, err } - var col collations.ID + var col collations.TypedCollation left, right, col, err = mergeAndCoerceCollations(left, right) if err != nil { return nil, err @@ -567,11 +567,11 @@ func (l *LikeExpr) eval(env *ExpressionEnv) (eval, error) { var matched bool switch { case typeIsTextual(left.SQLType()) && typeIsTextual(right.SQLType()): - matched = l.matchWildcard(left.(*evalBytes).bytes, right.(*evalBytes).bytes, col) + matched = l.matchWildcard(left.(*evalBytes).bytes, right.(*evalBytes).bytes, col.Collation) case typeIsTextual(right.SQLType()): - matched = l.matchWildcard(left.ToRawBytes(), right.(*evalBytes).bytes, col) + matched = l.matchWildcard(left.ToRawBytes(), right.(*evalBytes).bytes, col.Collation) case typeIsTextual(left.SQLType()): - matched = l.matchWildcard(left.(*evalBytes).bytes, right.ToRawBytes(), col) + matched = l.matchWildcard(left.(*evalBytes).bytes, right.ToRawBytes(), col.Collation) default: matched = l.matchWildcard(left.ToRawBytes(), right.ToRawBytes(), collations.CollationBinaryID) } diff --git a/go/vt/vtgate/evalengine/fn_regexp.go b/go/vt/vtgate/evalengine/fn_regexp.go new file mode 100644 index 00000000000..5886a5c3765 --- /dev/null +++ b/go/vt/vtgate/evalengine/fn_regexp.go @@ -0,0 +1,1062 @@ +/* +Copyright 2023 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package evalengine + +import ( + "errors" + "strings" + + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/mysql/collations/charset" + "vitess.io/vitess/go/mysql/icuregex" + icuerrors "vitess.io/vitess/go/mysql/icuregex/errors" + "vitess.io/vitess/go/sqltypes" + querypb "vitess.io/vitess/go/vt/proto/query" + vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" + "vitess.io/vitess/go/vt/vterrors" +) + +func regexpFlags(m eval, flags icuregex.RegexpFlag, f string) (icuregex.RegexpFlag, error) { + switch m := m.(type) { + case *evalBytes: + for _, b := range m.bytes { + switch b { + case 'c': + flags &= ^icuregex.CaseInsensitive + case 'i': + flags |= icuregex.CaseInsensitive + case 'm': + flags |= icuregex.Multiline + case 'n': + flags |= icuregex.DotAll + case 'u': + flags |= icuregex.UnixLines + default: + return flags, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongArguments, "Incorrect arguments to %s.", f) + } + } + default: + return flags, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongArguments, "Incorrect arguments to %s.", f) + } + + return flags, nil +} + +func occurrence(e *evalInt64, min int64) int64 { + if e.i < min { + return min + } + return e.i +} + +func returnOption(val *evalInt64, f string) (int64, error) { + switch val.i { + case 0, 1: + // Valid return options. + return val.i, nil + } + return 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongArguments, "Incorrect arguments to %s: return_option must be 1 or 0.", f) +} + +func positionInstr(val *evalInt64, limit int64) (int64, error) { + pos := val.i + if pos < 1 || pos > limit { + return 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpIndexOutOfBounds, "Index out of bounds in regular expression search.") + } + return pos, nil +} + +func position(val *evalInt64, limit int64, f string) (int64, error) { + pos := val.i + if pos < 1 { + return 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.WrongParametersToNativeFct, "Incorrect parameters in the call to native function '%s'", f) + } + if pos-1 > limit { + return 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpIndexOutOfBounds, "Index out of bounds in regular expression search.") + } + return pos, nil +} + +func evalRegexpCollation(input, pat eval, f string) (eval, eval, collations.TypedCollation, icuregex.RegexpFlag, error) { + var typedCol collations.TypedCollation + var err error + + if inputBytes, ok := input.(*evalBytes); ok { + if patBytes, ok := pat.(*evalBytes); ok { + inputCol := inputBytes.col.Collation + patCol := patBytes.col.Collation + if (inputCol == collations.CollationBinaryID && patCol != collations.CollationBinaryID) || + (inputCol != collations.CollationBinaryID && patCol == collations.CollationBinaryID) { + inputColName := inputCol.Get().Name() + patColName := patCol.Get().Name() + return nil, nil, typedCol, 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.CharacterSetMismatch, "Character set '%s' cannot be used in conjunction with '%s' in call to %s.", inputColName, patColName, f) + } + } + } + + input, pat, typedCol, err = mergeAndCoerceCollations(input, pat) + if err != nil { + return nil, nil, collations.TypedCollation{}, 0, err + } + + var flags icuregex.RegexpFlag + var collation = typedCol.Collation.Get() + if strings.Contains(collation.Name(), "_ci") { + flags |= icuregex.CaseInsensitive + } + + return input, pat, typedCol, flags, nil +} + +func compileRegexpCollation(input, pat ctype, f string) (collations.TypedCollation, icuregex.RegexpFlag, error) { + var merged collations.TypedCollation + var err error + + if input.isTextual() && pat.isTextual() { + inputCol := input.Col.Collation + patCol := pat.Col.Collation + if (inputCol == collations.CollationBinaryID && patCol != collations.CollationBinaryID) || + (inputCol != collations.CollationBinaryID && patCol == collations.CollationBinaryID) { + inputColName := inputCol.Get().Name() + patColName := patCol.Get().Name() + return input.Col, 0, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.CharacterSetMismatch, "Character set '%s' cannot be used in conjunction with '%s' in call to %s.", inputColName, patColName, f) + } + } + + if input.Col.Collation != pat.Col.Collation { + merged, _, _, err = mergeCollations(input.Col, pat.Col, input.Type, pat.Type) + } else { + merged = input.Col + } + if err != nil { + return input.Col, 0, err + } + + var flags icuregex.RegexpFlag + var collation = merged.Collation.Get() + if strings.Contains(collation.Name(), "_ci") { + flags |= icuregex.CaseInsensitive + } + return merged, flags, nil +} + +func compileRegex(pat eval, c collations.Charset, flags icuregex.RegexpFlag) (*icuregex.Pattern, error) { + patRunes := charset.Expand(nil, pat.ToRawBytes(), c) + + if len(patRunes) == 0 { + return nil, vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpIllegalArgument, "Illegal argument to a regular expression.") + } + + regexp, err := icuregex.Compile(patRunes, flags) + if err == nil { + return regexp, nil + } + + var compileErr *icuregex.CompileError + if errors.Is(err, icuerrors.ErrUnsupported) { + err = vterrors.NewErrorf(vtrpcpb.Code_UNIMPLEMENTED, vterrors.RegexpUnimplemented, err.Error()) + } else if errors.Is(err, icuerrors.ErrIllegalArgument) { + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpIllegalArgument, err.Error()) + } else if errors.As(err, &compileErr) { + switch compileErr.Code { + case icuregex.InternalError: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInternal, compileErr.Error()) + case icuregex.RuleSyntax: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpRuleSyntax, compileErr.Error()) + case icuregex.BadEscapeSequence: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpBadEscapeSequence, compileErr.Error()) + case icuregex.PropertySyntax: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpRuleSyntax, compileErr.Error()) + case icuregex.Unimplemented: + err = vterrors.NewErrorf(vtrpcpb.Code_UNIMPLEMENTED, vterrors.RegexpUnimplemented, compileErr.Error()) + case icuregex.MismatchedParen: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpMismatchParen, compileErr.Error()) + case icuregex.BadInterval: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpBadInterval, compileErr.Error()) + case icuregex.MaxLtMin: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpMaxLtMin, compileErr.Error()) + case icuregex.InvalidBackRef: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInvalidBackRef, compileErr.Error()) + case icuregex.InvalidFlag: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInvalidFlag, compileErr.Error()) + case icuregex.LookBehindLimit: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpLookBehindLimit, compileErr.Error()) + case icuregex.MissingCloseBracket: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpMissingCloseBracket, compileErr.Error()) + case icuregex.InvalidRange: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInvalidRange, compileErr.Error()) + case icuregex.PatternTooBig: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpPatternTooBig, compileErr.Error()) + case icuregex.InvalidCaptureGroupName: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInvalidCaptureGroup, compileErr.Error()) + default: + err = vterrors.NewErrorf(vtrpcpb.Code_INVALID_ARGUMENT, vterrors.RegexpInternal, compileErr.Error()) + } + } + + return nil, err +} + +func compileConstantRegex(c *compiler, args TupleExpr, pat, mt int, cs collations.TypedCollation, flags icuregex.RegexpFlag, f string) (*icuregex.Pattern, error) { + pattern := args[pat] + if !pattern.constant() { + return nil, c.unsupported(pattern) + } + var err error + staticEnv := EmptyExpressionEnv() + pattern, err = simplifyExpr(staticEnv, pattern) + if err != nil { + return nil, err + } + + if len(args) > mt { + fl := args[mt] + if !fl.constant() { + return nil, c.unsupported(fl) + } + fl, err = simplifyExpr(staticEnv, fl) + if err != nil { + return nil, err + } + flags, err = regexpFlags(fl.(*Literal).inner, flags, f) + if err != nil { + return nil, err + } + } + + if pattern.(*Literal).inner == nil { + return nil, c.unsupported(pattern) + } + + innerPat, err := evalToVarchar(pattern.(*Literal).inner, cs.Collation, true) + if err != nil { + return nil, err + } + + return compileRegex(innerPat, cs.Collation.Get().Charset(), flags) +} + +// resultCollation returns the collation to use for the result of a regexp. +// This falls back to latin1_swedish if the input collation is binary. This +// seems to be a side effect of how MySQL also works. Probably due to how it +// is using ICU and converting there. +func resultCollation(in collations.TypedCollation) collations.TypedCollation { + if in.Collation == collationBinary.Collation { + return collationRegexpFallback + } + return in +} + +type builtinRegexpLike struct { + CallExpr + Negate bool +} + +func (r *builtinRegexpLike) eval(env *ExpressionEnv) (eval, error) { + input, err := r.Arguments[0].eval(env) + if err != nil || input == nil { + return nil, err + } + + pat, err := r.Arguments[1].eval(env) + if err != nil || pat == nil { + return nil, err + } + + input, pat, typedCol, flags, err := evalRegexpCollation(input, pat, "regexp_like") + if err != nil { + return nil, err + } + collation := typedCol.Collation.Get() + + if len(r.Arguments) > 2 { + m, err := r.Arguments[2].eval(env) + if err != nil || m == nil { + return nil, err + } + flags, err = regexpFlags(m, flags, "regexp_like") + if err != nil { + return nil, err + } + } + + regexp, err := compileRegex(pat, collation.Charset(), flags) + if err != nil { + return nil, err + } + + inputRunes := charset.Expand(nil, input.ToRawBytes(), collation.Charset()) + m := icuregex.NewMatcher(regexp) + m.Reset(inputRunes) + + ok, err := m.Find() + if err != nil { + return nil, err + } + if r.Negate { + ok = !ok + } + return newEvalBool(ok), nil +} + +func (r *builtinRegexpLike) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) { + _, f1 := r.Arguments[0].typeof(env, fields) + _, f2 := r.Arguments[1].typeof(env, fields) + var f3 typeFlag + if len(r.Arguments) > 2 { + _, f3 = r.Arguments[2].typeof(env, fields) + } + return sqltypes.Int64, f1 | f2 | f3 | flagIsBoolean +} + +func (r *builtinRegexpLike) compileSlow(c *compiler, input, pat, fl ctype, merged collations.TypedCollation, flags icuregex.RegexpFlag, skips ...*jump) (ctype, error) { + if !pat.isTextual() || pat.Col.Collation != merged.Collation { + c.asm.Convert_xce(len(r.Arguments)-1, sqltypes.VarChar, merged.Collation) + } + + c.asm.Fn_REGEXP_LIKE_slow(r.Negate, merged.Collation.Get().Charset(), flags, len(r.Arguments)-1) + c.asm.jumpDestination(skips...) + return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | fl.Flag | flagIsBoolean}, nil +} + +func (r *builtinRegexpLike) compile(c *compiler) (ctype, error) { + input, err := r.Arguments[0].compile(c) + if err != nil { + return ctype{}, err + } + var skips []*jump + skips = append(skips, c.compileNullCheckArg(input, 0)) + + pat, err := r.Arguments[1].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(pat, 1)) + + var f ctype + + if len(r.Arguments) > 2 { + f, err = r.Arguments[2].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(f, 2)) + } + + merged, flags, err := compileRegexpCollation(input, pat, "regexp_like") + if err != nil { + return ctype{}, err + } + + if !input.isTextual() || input.Col.Collation != merged.Collation { + c.asm.Convert_xce(len(r.Arguments), sqltypes.VarChar, merged.Collation) + } + + // We optimize for the case where the pattern is a constant. If not, + // we fall back to the slow path. + p, err := compileConstantRegex(c, r.Arguments, 1, 2, merged, flags, "regexp_like") + if err != nil { + return r.compileSlow(c, input, pat, f, merged, flags, skips...) + } + + c.asm.Fn_REGEXP_LIKE(icuregex.NewMatcher(p), r.Negate, merged.Collation.Get().Charset(), len(r.Arguments)-1) + c.asm.jumpDestination(skips...) + + return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | f.Flag | flagIsBoolean}, nil +} + +var _ Expr = (*builtinRegexpLike)(nil) + +type builtinRegexpInstr struct { + CallExpr +} + +func (r *builtinRegexpInstr) eval(env *ExpressionEnv) (eval, error) { + input, err := r.Arguments[0].eval(env) + if err != nil || input == nil { + return nil, err + } + + pat, err := r.Arguments[1].eval(env) + if err != nil || pat == nil { + return nil, err + } + + input, pat, typedCol, flags, err := evalRegexpCollation(input, pat, "regexp_instr") + if err != nil { + return nil, err + } + + var posExpr eval + if len(r.Arguments) > 2 { + posExpr, err = r.Arguments[2].eval(env) + if err != nil || posExpr == nil { + return nil, err + } + } + + var occExpr eval + if len(r.Arguments) > 3 { + occExpr, err = r.Arguments[3].eval(env) + if err != nil || occExpr == nil { + return nil, err + } + } + + var retExpr eval + if len(r.Arguments) > 4 { + retExpr, err = r.Arguments[4].eval(env) + if err != nil || retExpr == nil { + return nil, err + } + } + + var mtExpr eval + if len(r.Arguments) > 5 { + mtExpr, err = r.Arguments[5].eval(env) + if err != nil || mtExpr == nil { + return nil, err + } + } + + collation := typedCol.Collation.Get() + + pos := int64(1) + occ := int64(1) + returnOpt := int64(0) + + if mtExpr != nil { + flags, err = regexpFlags(mtExpr, flags, "regexp_instr") + if err != nil { + return nil, err + } + } + + regexp, err := compileRegex(pat, collation.Charset(), flags) + if err != nil { + return nil, err + } + + inputRunes := charset.Expand(nil, input.ToRawBytes(), collation.Charset()) + if len(inputRunes) == 0 { + return newEvalInt64(0), nil + } + + if posExpr != nil { + pos, err = positionInstr(evalToInt64(posExpr), int64(len(inputRunes))) + if err != nil { + return nil, err + } + } + + if occExpr != nil { + occ = occurrence(evalToInt64(occExpr), occ) + } + + if retExpr != nil { + returnOpt, err = returnOption(evalToInt64(retExpr), "regexp_instr") + if err != nil { + return nil, err + } + } + + m := icuregex.NewMatcher(regexp) + m.Reset(inputRunes[pos-1:]) + + found := false + for i := int64(0); i < occ; i++ { + found, err = m.Find() + if err != nil { + return nil, err + } + if !found { + break + } + } + if !found { + return newEvalInt64(0), nil + } + if returnOpt == 0 { + return newEvalInt64(int64(m.Start()) + pos), nil + } + return newEvalInt64(int64(m.End()) + pos), nil +} + +func (r *builtinRegexpInstr) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) { + _, f1 := r.Arguments[0].typeof(env, fields) + _, f2 := r.Arguments[1].typeof(env, fields) + var f3, f4, f5, f6 typeFlag + if len(r.Arguments) > 2 { + _, f3 = r.Arguments[2].typeof(env, fields) + } + if len(r.Arguments) > 3 { + _, f4 = r.Arguments[3].typeof(env, fields) + } + if len(r.Arguments) > 4 { + _, f5 = r.Arguments[4].typeof(env, fields) + } + if len(r.Arguments) > 5 { + _, f6 = r.Arguments[5].typeof(env, fields) + } + return sqltypes.Int64, f1 | f2 | f3 | f4 | f5 | f6 +} + +func (r *builtinRegexpInstr) compileSlow(c *compiler, input, pat, pos, occ, returnOption, matchType ctype, merged collations.TypedCollation, flags icuregex.RegexpFlag, skips ...*jump) (ctype, error) { + if !pat.isTextual() || pat.Col.Collation != merged.Collation { + c.asm.Convert_xce(len(r.Arguments)-1, sqltypes.VarChar, merged.Collation) + } + + c.asm.Fn_REGEXP_INSTR_slow(merged.Collation.Get().Charset(), flags, len(r.Arguments)-1) + c.asm.jumpDestination(skips...) + return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | pos.Flag | occ.Flag | returnOption.Flag | matchType.Flag}, nil +} + +func (r *builtinRegexpInstr) compile(c *compiler) (ctype, error) { + input, err := r.Arguments[0].compile(c) + if err != nil { + return ctype{}, err + } + var skips []*jump + skips = append(skips, c.compileNullCheckArg(input, 0)) + + pat, err := r.Arguments[1].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(pat, 1)) + + var pos ctype + if len(r.Arguments) > 2 { + pos, err = r.Arguments[2].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(pos, 2)) + _ = c.compileToInt64(pos, 1) + } + + var occ ctype + if len(r.Arguments) > 3 { + occ, err = r.Arguments[3].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(occ, 3)) + _ = c.compileToInt64(occ, 1) + } + + var returnOpt ctype + if len(r.Arguments) > 4 { + returnOpt, err = r.Arguments[4].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(returnOpt, 4)) + _ = c.compileToInt64(returnOpt, 1) + } + + var matchType ctype + if len(r.Arguments) > 5 { + matchType, err = r.Arguments[5].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(matchType, 5)) + switch { + case matchType.isTextual(): + default: + c.asm.Convert_xb(1, sqltypes.VarBinary, 0, false) + } + } + + merged, flags, err := compileRegexpCollation(input, pat, "regexp_instr") + if err != nil { + return ctype{}, err + } + + if !input.isTextual() || input.Col.Collation != merged.Collation { + c.asm.Convert_xce(len(r.Arguments), sqltypes.VarChar, merged.Collation) + } + + // We optimize for the case where the pattern is a constant. If not, + // we fall back to the slow path. + p, err := compileConstantRegex(c, r.Arguments, 1, 5, merged, flags, "regexp_instr") + if err != nil { + return r.compileSlow(c, input, pat, pos, occ, returnOpt, matchType, merged, flags, skips...) + } + + c.asm.Fn_REGEXP_INSTR(icuregex.NewMatcher(p), merged.Collation.Get().Charset(), len(r.Arguments)-1) + c.asm.jumpDestination(skips...) + + return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | flagIsBoolean}, nil +} + +var _ Expr = (*builtinRegexpInstr)(nil) + +type builtinRegexpSubstr struct { + CallExpr +} + +func (r *builtinRegexpSubstr) eval(env *ExpressionEnv) (eval, error) { + input, err := r.Arguments[0].eval(env) + if err != nil || input == nil { + return nil, err + } + + pat, err := r.Arguments[1].eval(env) + if err != nil || pat == nil { + return nil, err + } + + input, pat, typedCol, flags, err := evalRegexpCollation(input, pat, "regexp_substr") + if err != nil { + return nil, err + } + + var posExpr eval + // For some reason this gets checked before NULL checks of the other values + if len(r.Arguments) > 2 { + posExpr, err = r.Arguments[2].eval(env) + if err != nil || posExpr == nil { + return nil, err + } + } + + var occExpr eval + if len(r.Arguments) > 3 { + occExpr, err = r.Arguments[3].eval(env) + if err != nil || occExpr == nil { + return nil, err + } + } + + var mtExpr eval + if len(r.Arguments) > 4 { + mtExpr, err = r.Arguments[4].eval(env) + if err != nil || mtExpr == nil { + return nil, err + } + } + + collation := typedCol.Collation.Get() + pos := int64(1) + occ := int64(1) + inputRunes := charset.Expand(nil, input.ToRawBytes(), collation.Charset()) + + if posExpr != nil { + pos, err = position(evalToInt64(posExpr), int64(len(inputRunes)), "regexp_substr") + if err != nil { + return nil, err + } + + } + + if occExpr != nil { + occ = occurrence(evalToInt64(occExpr), occ) + } + + if mtExpr != nil { + flags, err = regexpFlags(mtExpr, flags, "regexp_substr") + if err != nil { + return nil, err + } + } + + regexp, err := compileRegex(pat, collation.Charset(), flags) + if err != nil { + return nil, err + } + + m := icuregex.NewMatcher(regexp) + m.Reset(inputRunes[pos-1:]) + + found := false + for i := int64(0); i < occ; i++ { + found, err = m.Find() + if err != nil { + return nil, err + } + if !found { + break + } + } + if !found { + return nil, nil + } + out := inputRunes[int64(m.Start())+pos-1 : int64(m.End())+pos-1] + b := charset.Collapse(nil, out, collation.Charset()) + return newEvalText(b, resultCollation(typedCol)), nil +} + +func (r *builtinRegexpSubstr) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) { + _, f1 := r.Arguments[0].typeof(env, fields) + _, f2 := r.Arguments[1].typeof(env, fields) + var f3, f4, f5 typeFlag + if len(r.Arguments) > 2 { + _, f3 = r.Arguments[2].typeof(env, fields) + } + if len(r.Arguments) > 3 { + _, f4 = r.Arguments[3].typeof(env, fields) + } + if len(r.Arguments) > 4 { + _, f5 = r.Arguments[4].typeof(env, fields) + } + return sqltypes.VarChar, f1 | f2 | f3 | f4 | f5 +} + +func (r *builtinRegexpSubstr) compileSlow(c *compiler, input, pat, pos, occ, matchType ctype, merged collations.TypedCollation, flags icuregex.RegexpFlag, skips ...*jump) (ctype, error) { + if !pat.isTextual() || pat.Col.Collation != merged.Collation { + c.asm.Convert_xce(len(r.Arguments)-1, sqltypes.VarChar, merged.Collation) + } + + c.asm.Fn_REGEXP_SUBSTR_slow(merged, flags, len(r.Arguments)-1) + c.asm.jumpDestination(skips...) + return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | pos.Flag | occ.Flag | matchType.Flag}, nil +} + +func (r *builtinRegexpSubstr) compile(c *compiler) (ctype, error) { + input, err := r.Arguments[0].compile(c) + if err != nil { + return ctype{}, err + } + var skips []*jump + skips = append(skips, c.compileNullCheckArg(input, 0)) + + pat, err := r.Arguments[1].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(pat, 1)) + + var pos ctype + if len(r.Arguments) > 2 { + pos, err = r.Arguments[2].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(pos, 2)) + _ = c.compileToInt64(pos, 1) + } + + var occ ctype + if len(r.Arguments) > 3 { + occ, err = r.Arguments[3].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(occ, 3)) + _ = c.compileToInt64(occ, 1) + } + + var matchType ctype + if len(r.Arguments) > 4 { + matchType, err = r.Arguments[4].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(matchType, 4)) + switch { + case matchType.isTextual(): + default: + c.asm.Convert_xb(1, sqltypes.VarBinary, 0, false) + } + } + + merged, flags, err := compileRegexpCollation(input, pat, "regexp_substr") + if err != nil { + return ctype{}, err + } + + if !input.isTextual() || input.Col.Collation != merged.Collation { + c.asm.Convert_xce(len(r.Arguments), sqltypes.VarChar, merged.Collation) + } + + // We optimize for the case where the pattern is a constant. If not, + // we fall back to the slow path. + p, err := compileConstantRegex(c, r.Arguments, 1, 4, merged, flags, "regexp_substr") + if err != nil { + return r.compileSlow(c, input, pat, pos, occ, matchType, merged, flags, skips...) + } + + c.asm.Fn_REGEXP_SUBSTR(icuregex.NewMatcher(p), merged, len(r.Arguments)-1) + c.asm.jumpDestination(skips...) + + return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | pos.Flag | occ.Flag | matchType.Flag}, nil +} + +var _ Expr = (*builtinRegexpSubstr)(nil) + +type builtinRegexpReplace struct { + CallExpr +} + +func regexpReplace(m *icuregex.Matcher, inputRunes, replRunes []rune, pos, occ int64, c collations.Charset) ([]byte, bool, error) { + var err error + found := false + if occ > 0 { + for i := int64(0); i < occ; i++ { + found, err = m.Find() + if err != nil { + return nil, false, err + } + if !found { + break + } + } + if !found { + return nil, false, nil + } + + out := append(inputRunes[:int64(m.Start())+pos-1], replRunes...) + out = append(out, inputRunes[int64(m.End())+pos-1:]...) + return charset.Collapse(nil, out, c), true, nil + } + + found, err = m.Find() + if err != nil { + return nil, false, err + } + + if !found { + return nil, false, nil + } + + start := int64(m.Start()) + pos - 1 + out := append(inputRunes[:start], replRunes...) + end := int64(m.End()) + pos - 1 + for { + found, err = m.Find() + if err != nil { + return nil, false, err + } + if !found { + break + } + nextStart := int64(m.Start()) + pos - 1 + out = append(out, inputRunes[end:nextStart]...) + out = append(out, replRunes...) + end = int64(m.End()) + pos - 1 + } + + out = append(out, inputRunes[end:]...) + return charset.Collapse(nil, out, c), true, nil +} + +func (r *builtinRegexpReplace) eval(env *ExpressionEnv) (eval, error) { + input, err := r.Arguments[0].eval(env) + if err != nil || input == nil { + return nil, err + } + + pat, err := r.Arguments[1].eval(env) + if err != nil || pat == nil { + return nil, err + } + + replArg, err := r.Arguments[2].eval(env) + if err != nil || replArg == nil { + return nil, err + } + + input, pat, typedCol, flags, err := evalRegexpCollation(input, pat, "regexp_replace") + if err != nil { + return nil, err + } + + var posExpr eval + // For some reason this gets checked before NULL checks of the other values + if len(r.Arguments) > 3 { + posExpr, err = r.Arguments[3].eval(env) + if err != nil || posExpr == nil { + return nil, err + } + } + + var occExpr eval + if len(r.Arguments) > 4 { + occExpr, err = r.Arguments[4].eval(env) + if err != nil || occExpr == nil { + return nil, err + } + } + + var mtExpr eval + if len(r.Arguments) > 5 { + mtExpr, err = r.Arguments[5].eval(env) + if err != nil || mtExpr == nil { + return nil, err + } + } + + collation := typedCol.Collation.Get() + + repl, ok := replArg.(*evalBytes) + if !ok { + repl, err = evalToVarchar(replArg, typedCol.Collation, true) + if err != nil { + return nil, err + } + } + pos := int64(1) + occ := int64(0) + inputRunes := charset.Expand(nil, input.ToRawBytes(), collation.Charset()) + replRunes := charset.Expand(nil, repl.ToRawBytes(), repl.col.Collation.Get().Charset()) + + if posExpr != nil { + pos, err = position(evalToInt64(posExpr), int64(len(inputRunes)), "regexp_replace") + if err != nil { + return nil, err + } + } + + if occExpr != nil { + occ = occurrence(evalToInt64(occExpr), occ) + } + + if mtExpr != nil { + flags, err = regexpFlags(mtExpr, flags, "regexp_replace") + if err != nil { + return nil, err + } + } + + regexp, err := compileRegex(pat, collation.Charset(), flags) + if err != nil { + return nil, err + } + + m := icuregex.NewMatcher(regexp) + m.Reset(inputRunes[pos-1:]) + + bytes, replaced, err := regexpReplace(m, inputRunes, replRunes, pos, occ, collation.Charset()) + if err != nil { + return nil, err + } + if !replaced { + return newEvalRaw(sqltypes.Text, input.ToRawBytes(), resultCollation(typedCol)), nil + } + return newEvalRaw(sqltypes.Text, bytes, resultCollation(typedCol)), nil +} + +func (r *builtinRegexpReplace) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) { + _, f1 := r.Arguments[0].typeof(env, fields) + _, f2 := r.Arguments[1].typeof(env, fields) + _, f3 := r.Arguments[2].typeof(env, fields) + var f4, f5, f6 typeFlag + if len(r.Arguments) > 3 { + _, f4 = r.Arguments[3].typeof(env, fields) + } + if len(r.Arguments) > 4 { + _, f5 = r.Arguments[4].typeof(env, fields) + } + if len(r.Arguments) > 5 { + _, f6 = r.Arguments[5].typeof(env, fields) + } + return sqltypes.Text, f1 | f2 | f3 | f4 | f5 | f6 +} + +func (r *builtinRegexpReplace) compileSlow(c *compiler, input, pat, repl, pos, occ, matchType ctype, merged collations.TypedCollation, flags icuregex.RegexpFlag, skips ...*jump) (ctype, error) { + if !pat.isTextual() || pat.Col.Collation != merged.Collation { + c.asm.Convert_xce(len(r.Arguments)-1, sqltypes.VarChar, merged.Collation) + } + + c.asm.Fn_REGEXP_REPLACE_slow(merged, flags, len(r.Arguments)-1) + c.asm.jumpDestination(skips...) + return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | repl.Flag | pos.Flag | occ.Flag | matchType.Flag}, nil +} + +func (r *builtinRegexpReplace) compile(c *compiler) (ctype, error) { + input, err := r.Arguments[0].compile(c) + if err != nil { + return ctype{}, err + } + var skips []*jump + skips = append(skips, c.compileNullCheckArg(input, 0)) + + pat, err := r.Arguments[1].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(pat, 1)) + + repl, err := r.Arguments[2].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(repl, 2)) + + var pos ctype + if len(r.Arguments) > 3 { + pos, err = r.Arguments[3].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(pos, 3)) + _ = c.compileToInt64(pos, 1) + } + + var occ ctype + if len(r.Arguments) > 4 { + occ, err = r.Arguments[4].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(occ, 4)) + _ = c.compileToInt64(occ, 1) + } + + var matchType ctype + if len(r.Arguments) > 5 { + matchType, err = r.Arguments[5].compile(c) + if err != nil { + return ctype{}, err + } + skips = append(skips, c.compileNullCheckArg(matchType, 5)) + switch { + case matchType.isTextual(): + default: + c.asm.Convert_xb(1, sqltypes.VarBinary, 0, false) + } + } + + merged, flags, err := compileRegexpCollation(input, pat, "regexp_replace") + if err != nil { + return ctype{}, err + } + + if !input.isTextual() || input.Col.Collation != merged.Collation { + c.asm.Convert_xce(len(r.Arguments), sqltypes.VarChar, merged.Collation) + } + + if !repl.isTextual() || repl.Col.Collation != merged.Collation { + c.asm.Convert_xce(len(r.Arguments)-2, sqltypes.VarChar, merged.Collation) + } + + // We optimize for the case where the pattern is a constant. If not, + // we fall back to the slow path. + p, err := compileConstantRegex(c, r.Arguments, 1, 5, merged, flags, "regexp_replace") + if err != nil { + return r.compileSlow(c, input, pat, repl, pos, occ, matchType, merged, flags, skips...) + } + + c.asm.Fn_REGEXP_REPLACE(icuregex.NewMatcher(p), merged, len(r.Arguments)-1) + c.asm.jumpDestination(skips...) + + return ctype{Type: sqltypes.Int64, Col: collationNumeric, Flag: input.Flag | pat.Flag | repl.Flag | pos.Flag | occ.Flag | matchType.Flag}, nil +} + +var _ Expr = (*builtinRegexpReplace)(nil) diff --git a/go/vt/vtgate/evalengine/integration/fuzz_test.go b/go/vt/vtgate/evalengine/integration/fuzz_test.go index 24cd2733fd4..563bb323244 100644 --- a/go/vt/vtgate/evalengine/integration/fuzz_test.go +++ b/go/vt/vtgate/evalengine/integration/fuzz_test.go @@ -98,6 +98,11 @@ var ( regexp.MustCompile(`Invalid JSON text in argument (\d+) to function (\w+): (.*?)`), regexp.MustCompile(`Illegal mix of collations`), regexp.MustCompile(`Incorrect (DATE|DATETIME) value`), + regexp.MustCompile(`Syntax error in regular expression`), + regexp.MustCompile(`The regular expression contains an unclosed bracket expression`), + regexp.MustCompile(`Illegal argument to a regular expression`), + regexp.MustCompile(`Incorrect arguments to regexp_substr`), + regexp.MustCompile(`Incorrect arguments to regexp_replace`), } ) diff --git a/go/vt/vtgate/evalengine/mysql_test.go b/go/vt/vtgate/evalengine/mysql_test.go index 18802cfb8dc..987ad906b88 100644 --- a/go/vt/vtgate/evalengine/mysql_test.go +++ b/go/vt/vtgate/evalengine/mysql_test.go @@ -147,6 +147,6 @@ func TestMySQLGolden(t *testing.T) { func TestDebug1(t *testing.T) { // Debug - eval, err := testSingle(t, `SELECT DATE_SUB(TIMESTAMP'2025-01-01 00:00:00', INTERVAL '1.999999' year_month)`) + eval, err := testSingle(t, `SELECT _latin1 0xFF regexp _latin1 '[[:lower:]]' COLLATE latin1_bin`) t.Logf("eval=%s err=%v coll=%s", eval.String(), err, eval.Collation().Get().Name()) } diff --git a/go/vt/vtgate/evalengine/testcases/cases.go b/go/vt/vtgate/evalengine/testcases/cases.go index b72c5dae816..d6e692b1a99 100644 --- a/go/vt/vtgate/evalengine/testcases/cases.go +++ b/go/vt/vtgate/evalengine/testcases/cases.go @@ -151,6 +151,10 @@ var Cases = []TestCase{ {Run: FnUUID}, {Run: FnUUIDToBin}, {Run: DateMath}, + {Run: RegexpLike}, + {Run: RegexpInstr}, + {Run: RegexpSubstr}, + {Run: RegexpReplace}, } func JSONPathOperations(yield Query) { @@ -1898,3 +1902,287 @@ func DateMath(yield Query) { } } } + +func RegexpLike(yield Query) { + mysqlDocSamples := []string{ + `'Michael!' REGEXP '.*'`, + `'Michael!' RLIKE '.*'`, + `'Michael!' NOT REGEXP '.*'`, + `'Michael!' NOT RLIKE '.*'`, + `'new*\n*line' REGEXP 'new\\*.\\*line'`, + `'a' REGEXP '^[a-d]'`, + `REGEXP_LIKE('CamelCase', 'CAMELCASE')`, + `REGEXP_LIKE('CamelCase', 'CAMELCASE' COLLATE utf8mb4_0900_as_cs)`, + `REGEXP_LIKE('abc', 'ABC'`, + `REGEXP_LIKE('abc', 'ABC', 'c')`, + `REGEXP_LIKE(1234, 12)`, + `REGEXP_LIKE(1234, 12, 'c')`, + `' ' REGEXP '[[:blank:]]'`, + `'\t' REGEXP '[[:blank:]]'`, + `' ' REGEXP '[[:space:]]'`, + `'\t' REGEXP '[[:space:]]'`, + `_latin1 0xFF regexp _latin1 '[[:lower:]]' COLLATE latin1_bin`, + `_koi8r 0xFF regexp _koi8r '[[:lower:]]' COLLATE koi8r_bin`, + `_latin1 0xFF regexp _latin1 '[[:upper:]]' COLLATE latin1_bin`, + `_koi8r 0xFF regexp _koi8r '[[:upper:]]' COLLATE koi8r_bin`, + `_latin1 0xF7 regexp _latin1 '[[:alpha:]]'`, + `_koi8r 0xF7 regexp _koi8r '[[:alpha:]]'`, + `_latin1'a' regexp _latin1'A' collate latin1_general_ci`, + `_latin1'a' regexp _latin1'A' collate latin1_bin`, + + `_latin1 'ÿ' regexp _utf8mb4 'ÿ'`, + `_utf8mb4 'ÿ' regexp _latin1 'ÿ'`, + `convert('ÿ' as char character set latin1) regexp _utf8mb4 'ÿ'`, + `_utf8mb4 'ÿ' regexp convert('ÿ' as char character set latin1)`, + + `'a' regexp '\\p{alphabetic}'`, + `'a' regexp '\\P{alphabetic}'`, + `'👌🏾regexp '\\p{Emoji}\\p{Emoji_modifier}'`, + `'a' regexp '\\p{Lowercase_letter}'`, + `'a' regexp '\\p{Uppercase_letter}'`, + `'A' regexp '\\p{Lowercase_letter}'`, + `'A' regexp '\\p{Uppercase_letter}'`, + `'a' collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}'`, + `'A' collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}'`, + `'a' collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}'`, + `'A' collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}'`, + `0xff REGEXP 0xff`, + `0xff REGEXP 0xfe`, + `cast(time '12:34:58' as json) REGEXP 0xff`, + } + + for _, q := range mysqlDocSamples { + yield(q, nil) + } + + for _, i := range regexInputs { + for _, p := range regexInputs { + yield(fmt.Sprintf("%s REGEXP %s", i, p), nil) + yield(fmt.Sprintf("%s NOT REGEXP %s", i, p), nil) + for _, m := range regexMatchStrings { + yield(fmt.Sprintf("REGEXP_LIKE(%s, %s, %s)", i, p, m), nil) + } + } + } +} + +func RegexpInstr(yield Query) { + mysqlDocSamples := []string{ + `REGEXP_INSTR('Michael!', '.*')`, + `REGEXP_INSTR('new*\n*line', 'new\\*.\\*line')`, + `REGEXP_INSTR('a', '^[a-d]')`, + `REGEXP_INSTR('CamelCase', 'CAMELCASE')`, + `REGEXP_INSTR('CamelCase', 'CAMELCASE' COLLATE utf8mb4_0900_as_cs)`, + `REGEXP_INSTR('abc', 'ABC'`, + `REGEXP_INSTR('abc', 'ABC', 'c')`, + `REGEXP_INSTR('0', '0', 1, 0)`, + `REGEXP_INSTR(' ', '[[:blank:]]')`, + `REGEXP_INSTR('\t', '[[:blank:]]')`, + `REGEXP_INSTR(' ', '[[:space:]]')`, + `REGEXP_INSTR('\t', '[[:space:]]')`, + `REGEXP_INSTR(_latin1 0xFF, _latin1 '[[:lower:]]' COLLATE latin1_bin)`, + `REGEXP_INSTR(_koi8r 0xFF, _koi8r '[[:lower:]]' COLLATE koi8r_bin)`, + `REGEXP_INSTR(_latin1 0xFF, _latin1 '[[:upper:]]' COLLATE latin1_bin)`, + `REGEXP_INSTR(_koi8r 0xFF, _koi8r '[[:upper:]]' COLLATE koi8r_bin)`, + `REGEXP_INSTR(_latin1 0xF7, _latin1 '[[:alpha:]]')`, + `REGEXP_INSTR(_koi8r 0xF7, _koi8r '[[:alpha:]]')`, + `REGEXP_INSTR(_latin1'a', _latin1'A' collate latin1_general_ci)`, + `REGEXP_INSTR(_latin1'a', _latin1'A' collate latin1_bin)`, + `REGEXP_INSTR('a', '\\p{alphabetic}')`, + `REGEXP_INSTR('a', '\\P{alphabetic}')`, + `REGEXP_INSTR('👌🏾, '\\p{Emoji}\\p{Emoji_modifier}')`, + `REGEXP_INSTR('a', '\\p{Lowercase_letter}')`, + `REGEXP_INSTR('a', '\\p{Uppercase_letter}')`, + `REGEXP_INSTR('A', '\\p{Lowercase_letter}')`, + `REGEXP_INSTR('A', '\\p{Uppercase_letter}')`, + `REGEXP_INSTR('a', collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}')`, + `REGEXP_INSTR('A', collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}')`, + `REGEXP_INSTR('a', collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}')`, + `REGEXP_INSTR('A', collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}')`, + `REGEXP_INSTR('dog cat dog', 'dog')`, + `REGEXP_INSTR('dog cat dog', 'dog', 2)`, + `REGEXP_INSTR('dog cat dog', 'dog', 1, 1)`, + `REGEXP_INSTR('dog cat dog', 'dog', 1, 1, 0)`, + `REGEXP_INSTR('dog cat dog', 'dog', 1, 1, 1)`, + `REGEXP_INSTR('dog cat dog', 'DOG', 1, 1, 1, 'i')`, + `REGEXP_INSTR('dog cat dog', 'DOG', 1, 1, 1, 'c')`, + `REGEXP_INSTR('dog cat dog', 'dog', 1, 2)`, + `REGEXP_INSTR('dog cat dog', 'dog', 1, 2, 0)`, + `REGEXP_INSTR('dog cat dog', 'dog', 1, 2, 1)`, + `REGEXP_INSTR('dog cat dog', 'DOG', 1, 2, 1, 'i')`, + `REGEXP_INSTR('dog cat dog', 'DOG', 1, 2, 1, 'c')`, + `REGEXP_INSTR('aa aaa aaaa', 'a{2}')`, + `REGEXP_INSTR('aa aaa aaaa', 'a{4}')`, + `REGEXP_INSTR(1234, 12)`, + `REGEXP_INSTR(1234, 12, 1)`, + `REGEXP_INSTR(1234, 12, 100)`, + `REGEXP_INSTR(1234, 12, 1, 1)`, + `REGEXP_INSTR(1234, 12, 1, 1, 1)`, + `REGEXP_INSTR(1234, 12, 1, 1, 1, 'c')`, + `REGEXP_INSTR('', ' ', 1000)`, + `REGEXP_INSTR(' ', ' ', 1000)`, + `REGEXP_INSTR(NULL, 'DOG', 1, 2, 1, 'c')`, + `REGEXP_INSTR('dog cat dog', NULL, 1, 2, 1, 'c')`, + `REGEXP_INSTR('dog cat dog', 'DOG', NULL, 2, 1, 'c')`, + `REGEXP_INSTR('dog cat dog', 'DOG', 1, NULL, 1, 'c')`, + `REGEXP_INSTR('dog cat dog', 'DOG', 1, 2, NULL, 'c')`, + `REGEXP_INSTR('dog cat dog', 'DOG', 1, 2, 1, NULL)`, + + `REGEXP_INSTR('dog cat dog', NULL, 1, 2, 1, 'c')`, + `REGEXP_INSTR('dog cat dog', _latin1 'DOG', NULL, 2, 1, 'c')`, + `REGEXP_INSTR('dog cat dog', _latin1 'DOG', 1, NULL, 1, 'c')`, + `REGEXP_INSTR('dog cat dog', _latin1 'DOG', 1, 2, NULL, 'c')`, + `REGEXP_INSTR('dog cat dog', _latin1 'DOG', 1, 2, 1, NULL)`, + } + + for _, q := range mysqlDocSamples { + yield(q, nil) + } +} + +func RegexpSubstr(yield Query) { + mysqlDocSamples := []string{ + `REGEXP_SUBSTR('Michael!', '.*')`, + `REGEXP_SUBSTR('new*\n*line', 'new\\*.\\*line')`, + `REGEXP_SUBSTR('a', '^[a-d]')`, + `REGEXP_SUBSTR('CamelCase', 'CAMELCASE')`, + `REGEXP_SUBSTR('CamelCase', 'CAMELCASE' COLLATE utf8mb4_0900_as_cs)`, + `REGEXP_SUBSTR('abc', 'ABC'`, + `REGEXP_SUBSTR(' ', '[[:blank:]]')`, + `REGEXP_SUBSTR('\t', '[[:blank:]]')`, + `REGEXP_SUBSTR(' ', '[[:space:]]')`, + `REGEXP_SUBSTR('\t', '[[:space:]]')`, + `REGEXP_SUBSTR(_latin1'a', _latin1'A' collate latin1_general_ci)`, + `REGEXP_SUBSTR(_latin1'a', _latin1'A' collate latin1_bin)`, + `REGEXP_SUBSTR('a', '\\p{alphabetic}')`, + `REGEXP_SUBSTR('a', '\\P{alphabetic}')`, + `REGEXP_SUBSTR('👌🏾, '\\p{Emoji}\\p{Emoji_modifier}')`, + `REGEXP_SUBSTR('a', '\\p{Lowercase_letter}')`, + `REGEXP_SUBSTR('a', '\\p{Uppercase_letter}')`, + `REGEXP_SUBSTR('A', '\\p{Lowercase_letter}')`, + `REGEXP_SUBSTR('A', '\\p{Uppercase_letter}')`, + `REGEXP_SUBSTR('a', collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}')`, + `REGEXP_SUBSTR('A', collate utf8mb4_0900_as_cs regexp '\\p{Lowercase_letter}')`, + `REGEXP_SUBSTR('a', collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}')`, + `REGEXP_SUBSTR('A', collate utf8mb4_0900_as_cs regexp '\\p{Uppercase_letter}')`, + `REGEXP_SUBSTR('dog cat dog', 'dog')`, + `REGEXP_SUBSTR('dog cat dog', 'dog', 2)`, + `REGEXP_SUBSTR('dog cat dog', 'dog', 1, 1)`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 1, 'i')`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 1, 'c')`, + `REGEXP_SUBSTR('dog cat dog', 'dog', 1, 2)`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 2, 'i')`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 2, 'c')`, + `REGEXP_SUBSTR('aa aaa aaaa', 'a{2}')`, + `REGEXP_SUBSTR('aa aaa aaaa', 'a{4}')`, + `REGEXP_SUBSTR(1234, 12)`, + `REGEXP_SUBSTR(1234, 12, 1)`, + `REGEXP_SUBSTR(1234, 12, 100)`, + `REGEXP_SUBSTR(1234, 12, 1, 1)`, + `REGEXP_SUBSTR(1234, 12, 1, 1, 'c')`, + + `REGEXP_SUBSTR(NULL, 'DOG', 1, 1, 'i')`, + `REGEXP_SUBSTR('dog cat dog', NULL, 1, 1, 'i')`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', NULL, 1, 'i')`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', 1, NULL, 'i')`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 1, NULL)`, + + `REGEXP_SUBSTR(NULL, '[', 1, 1, 'i')`, + `REGEXP_SUBSTR('dog cat dog', '[', NULL, 1, 'i')`, + `REGEXP_SUBSTR('dog cat dog', '[', 1, NULL, 'i')`, + `REGEXP_SUBSTR('dog cat dog', '[', 1, 1, NULL)`, + + `REGEXP_SUBSTR('dog cat dog', 'DOG', 0, 1, 'i')`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', -1, 1, 'i')`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', 100, 1, 'i')`, + `REGEXP_SUBSTR('dog cat dog', 'DOG', 1, 1, 0)`, + + `REGEXP_SUBSTR(' ', ' ', 1)`, + `REGEXP_SUBSTR(' ', ' ', 2)`, + `REGEXP_SUBSTR(' ', ' ', 3)`, + } + + for _, q := range mysqlDocSamples { + yield(q, nil) + } +} + +func RegexpReplace(yield Query) { + mysqlDocSamples := []string{ + `REGEXP_REPLACE('a b c', 'b', 'X')`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 0)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 1)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 2)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 1, 3)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 2, 0)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 2, 1)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 2, 2)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 2, 3)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 3, 0)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 3, 1)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 3, 2)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 3, 3)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 4, 0)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 4, 1)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 4, 2)`, + `REGEXP_REPLACE('abc def ghi', '[a-z]+', 'X', 4, 3)`, + `REGEXP_REPLACE('a', '\\p{Lowercase_letter}', 'X')`, + `REGEXP_REPLACE('a', '\\p{Uppercase_letter}', 'X')`, + `REGEXP_REPLACE('A', '\\p{Lowercase_letter}', 'X')`, + `REGEXP_REPLACE('A', '\\p{Uppercase_letter}', 'X')`, + `REGEXP_REPLACE(1234, 12, 6)`, + `REGEXP_REPLACE(1234, 12, 6, 1)`, + `REGEXP_REPLACE(1234, 12, 6, 100)`, + `REGEXP_REPLACE(1234, 12, 6, 1, 1)`, + `REGEXP_REPLACE(1234, 12, 6, 1, 1, 'c')`, + + `REGEXP_REPLACE(NULL, 'DOG', 'bar', 1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', NULL, 'bar', 1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', 'DOG', NULL, 1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 1, NULL, 'i')`, + `REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 1, 1, NULL)`, + `REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', '1', '1', 0)`, + + `REGEXP_REPLACE(NULL, _latin1'DOG', 'bar', 1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', _latin1'DOG', NULL, 1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 1, NULL, 'i')`, + `REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 1, 1, NULL)`, + `REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', '1', '1', 0)`, + + `REGEXP_REPLACE(NULL, '[', 'bar', 1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', '[', NULL, 1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', '[', 'bar', 1, NULL, 'i')`, + `REGEXP_REPLACE('dog cat dog', '[', 'bar', 1, 1, NULL)`, + + `REGEXP_REPLACE(NULL, _latin1'[', 'bar', 1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', _latin1'[', NULL, 1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', _latin1'[', 'bar', 1, NULL, 'i')`, + `REGEXP_REPLACE('dog cat dog', _latin1'[', 'bar', 1, 1, NULL)`, + + `REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 0, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', -1, 1, 'i')`, + `REGEXP_REPLACE('', 'DOG', 'bar', -1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 100, 1, 'i')`, + `REGEXP_REPLACE('', 'DOG', 'bar', 100, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', 'DOG', 'bar', 1, 1, 0)`, + + `REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 0, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', -1, 1, 'i')`, + `REGEXP_REPLACE('', _latin1'DOG', 'bar', -1, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 100, 1, 'i')`, + `REGEXP_REPLACE('', _latin1'DOG', 'bar', 100, 1, 'i')`, + `REGEXP_REPLACE('dog cat dog', _latin1'DOG', 'bar', 1, 1, 0)`, + + `REGEXP_REPLACE(' ', ' ', 'x', 1)`, + `REGEXP_REPLACE(' ', ' ', 'x', 2)`, + `REGEXP_REPLACE(' ', ' ', 'x', 3)`, + + `REGEXP_REPLACE(' ', _latin1' ', 'x', 1)`, + `REGEXP_REPLACE(' ', _latin1' ', 'x', 2)`, + `REGEXP_REPLACE(' ', _latin1' ', 'x', 3)`, + } + + for _, q := range mysqlDocSamples { + yield(q, nil) + } +} diff --git a/go/vt/vtgate/evalengine/testcases/inputs.go b/go/vt/vtgate/evalengine/testcases/inputs.go index 47f50b677c5..5785375955f 100644 --- a/go/vt/vtgate/evalengine/testcases/inputs.go +++ b/go/vt/vtgate/evalengine/testcases/inputs.go @@ -133,6 +133,41 @@ var inputConversions = []string{ "cast(time '12:34:56' as json)", "cast(time '12:34:58' as json)", "cast(time '5 12:34:58' as json)", } +var regexInputs = []string{ + "0", "1", "' 0 '", `'\t1foo\t'`, + `'foobar'`, `_utf8 'foobar'`, `''`, `_binary 'foobar'`, + `0x0`, `0x1`, `0xff`, + "NULL", "true", "false", + "0xFF666F6F626172FF", + "time '10:04:58'", "date '2000-01-01'", + "timestamp '2000-01-01 10:34:58'", + "cast(0 as json)", "cast(1 as json)", + "cast(true as json)", "cast(false as json)", + // JSON numbers + "cast(2 as json)", "cast(1.1 as json)", "cast(-1.1 as json)", + // JSON strings + "cast('\"foo\"' as json)", + // JSON binary values + "cast(_binary' \"foo\"' as json)", + "cast(0xFF666F6F626172FF as json)", + "cast(0b01 as json)", + // JSON arrays + "cast('[\"a\"]' as json)", + // JSON objects + "cast('{\"a\": 1, \"b\": 2}' as json)", +} + +var regexMatchStrings = []string{ + "NULL", + "'c'", "'i'", "'m'", "'n'", "'u'", "'cimnu'", "'cimnuunmic'", +} + +var regexCounters = []string{ + "NULL", + "0", "1", "5", "100000", + "'2'", "0.4", "0.5", "0x1", +} + const inputPi = "314159265358979323846264338327950288419716939937510582097494459" var inputStrings = []string{ diff --git a/go/vt/vtgate/evalengine/translate.go b/go/vt/vtgate/evalengine/translate.go index 7690201f2a3..8cc6df7bd02 100644 --- a/go/vt/vtgate/evalengine/translate.go +++ b/go/vt/vtgate/evalengine/translate.go @@ -75,6 +75,14 @@ func (ast *astCompiler) translateComparisonExpr2(op sqlparser.ComparisonExprOper return &LikeExpr{BinaryExpr: binaryExpr}, nil case sqlparser.NotLikeOp: return &LikeExpr{BinaryExpr: binaryExpr, Negate: true}, nil + case sqlparser.RegexpOp, sqlparser.NotRegexpOp: + return &builtinRegexpLike{ + CallExpr: CallExpr{ + Arguments: []Expr{left, right}, + Method: "REGEXP_LIKE", + }, + Negate: op == sqlparser.NotRegexpOp, + }, nil default: return nil, vterrors.Errorf(vtrpcpb.Code_UNIMPLEMENTED, op.ToString()) } diff --git a/go/vt/vtgate/evalengine/translate_builtin.go b/go/vt/vtgate/evalengine/translate_builtin.go index fb6f988af7d..49784973180 100644 --- a/go/vt/vtgate/evalengine/translate_builtin.go +++ b/go/vt/vtgate/evalengine/translate_builtin.go @@ -765,6 +765,167 @@ func (ast *astCompiler) translateCallable(call sqlparser.Callable) (Expr, error) collate: ast.cfg.Collation, }, nil + case *sqlparser.RegexpLikeExpr: + input, err := ast.translateExpr(call.Expr) + if err != nil { + return nil, err + } + + pattern, err := ast.translateExpr(call.Pattern) + if err != nil { + return nil, err + } + + args := []Expr{input, pattern} + + if call.MatchType != nil { + matchType, err := ast.translateExpr(call.MatchType) + if err != nil { + return nil, err + } + args = append(args, matchType) + } + + return &builtinRegexpLike{ + CallExpr: CallExpr{Arguments: args, Method: "REGEXP_LIKE"}, + Negate: false, + }, nil + + case *sqlparser.RegexpInstrExpr: + input, err := ast.translateExpr(call.Expr) + if err != nil { + return nil, err + } + + pattern, err := ast.translateExpr(call.Pattern) + if err != nil { + return nil, err + } + + args := []Expr{input, pattern} + + if call.Position != nil { + position, err := ast.translateExpr(call.Position) + if err != nil { + return nil, err + } + args = append(args, position) + } + + if call.Occurrence != nil { + occurrence, err := ast.translateExpr(call.Occurrence) + if err != nil { + return nil, err + } + args = append(args, occurrence) + } + + if call.ReturnOption != nil { + returnOption, err := ast.translateExpr(call.ReturnOption) + if err != nil { + return nil, err + } + args = append(args, returnOption) + } + + if call.MatchType != nil { + matchType, err := ast.translateExpr(call.MatchType) + if err != nil { + return nil, err + } + args = append(args, matchType) + } + + return &builtinRegexpInstr{ + CallExpr: CallExpr{Arguments: args, Method: "REGEXP_INSTR"}, + }, nil + + case *sqlparser.RegexpSubstrExpr: + input, err := ast.translateExpr(call.Expr) + if err != nil { + return nil, err + } + + pattern, err := ast.translateExpr(call.Pattern) + if err != nil { + return nil, err + } + + args := []Expr{input, pattern} + + if call.Position != nil { + position, err := ast.translateExpr(call.Position) + if err != nil { + return nil, err + } + args = append(args, position) + } + + if call.Occurrence != nil { + occurrence, err := ast.translateExpr(call.Occurrence) + if err != nil { + return nil, err + } + args = append(args, occurrence) + } + + if call.MatchType != nil { + matchType, err := ast.translateExpr(call.MatchType) + if err != nil { + return nil, err + } + args = append(args, matchType) + } + + return &builtinRegexpSubstr{ + CallExpr: CallExpr{Arguments: args, Method: "REGEXP_SUBSTR"}, + }, nil + + case *sqlparser.RegexpReplaceExpr: + input, err := ast.translateExpr(call.Expr) + if err != nil { + return nil, err + } + + pattern, err := ast.translateExpr(call.Pattern) + if err != nil { + return nil, err + } + + repl, err := ast.translateExpr(call.Repl) + if err != nil { + return nil, err + } + + args := []Expr{input, pattern, repl} + + if call.Position != nil { + position, err := ast.translateExpr(call.Position) + if err != nil { + return nil, err + } + args = append(args, position) + } + + if call.Occurrence != nil { + occurrence, err := ast.translateExpr(call.Occurrence) + if err != nil { + return nil, err + } + args = append(args, occurrence) + } + + if call.MatchType != nil { + matchType, err := ast.translateExpr(call.MatchType) + if err != nil { + return nil, err + } + args = append(args, matchType) + } + + return &builtinRegexpReplace{ + CallExpr: CallExpr{Arguments: args, Method: "REGEXP_REPLACE"}, + }, nil default: return nil, translateExprNotSupported(call) } diff --git a/go/vt/vttablet/tabletmanager/vreplication/utils.go b/go/vt/vttablet/tabletmanager/vreplication/utils.go index 02bcbb235be..1e26687e147 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/utils.go +++ b/go/vt/vttablet/tabletmanager/vreplication/utils.go @@ -155,6 +155,26 @@ func isUnrecoverableError(err error) bool { mysql.ERInvalidJSONTextInParams, mysql.ERJSONDocumentTooDeep, mysql.ERJSONValueTooBig, + mysql.ERRegexpError, + mysql.ERRegexpStringNotTerminated, + mysql.ERRegexpIllegalArgument, + mysql.ERRegexpIndexOutOfBounds, + mysql.ERRegexpInternal, + mysql.ERRegexpRuleSyntax, + mysql.ERRegexpBadEscapeSequence, + mysql.ERRegexpUnimplemented, + mysql.ERRegexpMismatchParen, + mysql.ERRegexpBadInterval, + mysql.ERRRegexpMaxLtMin, + mysql.ERRegexpInvalidBackRef, + mysql.ERRegexpLookBehindLimit, + mysql.ERRegexpMissingCloseBracket, + mysql.ERRegexpInvalidRange, + mysql.ERRegexpStackOverflow, + mysql.ERRegexpTimeOut, + mysql.ERRegexpPatternTooBig, + mysql.ERRegexpInvalidCaptureGroup, + mysql.ERRegexpInvalidFlag, mysql.ERNoDefault, mysql.ERNoDefaultForField, mysql.ERNonUniq,