You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
pub(crate)fntest(this:&JsValue,args:&[JsValue],context:&mutContext,) -> JsResult<JsValue>{// 1. Let R be the this value.// 2. If Type(R) is not Object, throw a TypeError exception.let this = this.as_object().ok_or_else(|| {
context
.construct_type_error("RegExp.prototype.test method called on incompatible value")})?;// 3. Let string be ? ToString(S).let arg_str = args
.get(0).cloned().unwrap_or_default().to_string(context)?;// 4. Let match be ? RegExpExec(R, string).let m = Self::abstract_exec(this, arg_str, context)?;// 5. If match is not null, return true; else return false.if m.is_some(){Ok(JsValue::new(true))}else{Ok(JsValue::new(false))}}
在test()方法中找到了Self::abstract_exec()方法
pub(crate)fnabstract_exec(this:&JsObject,input:JsString,context:&mutContext,) -> JsResult<Option<JsObject>>{// 1. Assert: Type(R) is Object.// 2. Assert: Type(S) is String.// 3. Let exec be ? Get(R, "exec").let exec = this.get("exec", context)?;// 4. If IsCallable(exec) is true, thenifletSome(exec) = exec.as_callable(){// a. Let result be ? Call(exec, R, « S »).let result = exec.call(&this.clone().into(),&[input.into()], context)?;// b. If Type(result) is neither Object nor Null, throw a TypeError exception.if !result.is_object() && !result.is_null(){return context.throw_type_error("regexp exec returned neither object nor null");}// c. Return result.returnOk(result.as_object().cloned());}// 5. Perform ? RequireInternalSlot(R, [[RegExpMatcher]]).if !this.is_regexp(){return context.throw_type_error("RegExpExec called with invalid value");}// 6. Return ? RegExpBuiltinExec(R, S).Self::abstract_builtin_exec(this,&input, context)}
pub(crate)fnabstract_builtin_exec(this:&JsObject,input:&JsString,context:&mutContext,) -> JsResult<Option<JsObject>>{// 1. Assert: R is an initialized RegExp instance.let rx = {let obj = this.borrow();ifletSome(rx) = obj.as_regexp(){
rx.clone()}else{return context.throw_type_error("RegExpBuiltinExec called with invalid value");}};// 2. Assert: Type(S) is String.// 3. Let length be the number of code units in S.let length = input.encode_utf16().count();// 4. Let lastIndex be ℝ(? ToLength(? Get(R, "lastIndex"))).letmut last_index = this.get("lastIndex", context)?.to_length(context)?;// 5. Let flags be R.[[OriginalFlags]].let flags = &rx.original_flags;// 6. If flags contains "g", let global be true; else let global be false.let global = flags.contains('g');// 7. If flags contains "y", let sticky be true; else let sticky be false.let sticky = flags.contains('y');// 8. If global is false and sticky is false, set lastIndex to 0.if !global && !sticky {
last_index = 0;}// 9. Let matcher be R.[[RegExpMatcher]].let matcher = &rx.matcher;// 10. If flags contains "u", let fullUnicode be true; else let fullUnicode be false.let unicode = flags.contains('u');// 11. Let matchSucceeded be false.// 12. Repeat, while matchSucceeded is false,let match_value = loop{// a. If lastIndex > length, thenif last_index > length {// i. If global is true or sticky is true, thenif global || sticky {// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
this.set("lastIndex",0,true, context)?;}// ii. Return null.returnOk(None);}// b. Let r be matcher(S, lastIndex).// Check if last_index is a valid utf8 index into input.let last_byte_index = matchString::from_utf16(&input.encode_utf16().take(last_index).collect::<Vec<u16>>(),){Ok(s) => s.len(),Err(_) => {return context
.throw_type_error("Failed to get byte index from utf16 encoded string")}};let r = matcher.find_from(input, last_byte_index).next();match r {// c. If r is failure, thenNone => {// i. If sticky is true, thenif sticky {// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
this.set("lastIndex",0,true, context)?;// 2. Return null.returnOk(None);}// ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
last_index = advance_string_index(input, last_index, unicode);}Some(m) => {// c. If r is failure, then#[allow(clippy::if_not_else)]if m.start() != last_index {// i. If sticky is true, thenif sticky {// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
this.set("lastIndex",0,true, context)?;// 2. Return null.returnOk(None);}// ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
last_index = advance_string_index(input, last_index, unicode);// d. Else,}else{//i. Assert: r is a State.//ii. Set matchSucceeded to true.break m;}}}};// 13. Let e be r's endIndex value.letmut e = match_value.end();// 14. If fullUnicode is true, thenif unicode {// e is an index into the Input character list, derived from S, matched by matcher.// Let eUTF be the smallest index into S that corresponds to the character at element e of Input.// If e is greater than or equal to the number of elements in Input, then eUTF is the number of code units in S.// b. Set e to eUTF.
e = input.split_at(e).0.encode_utf16().count();}// 15. If global is true or sticky is true, thenif global || sticky {// a. Perform ? Set(R, "lastIndex", 𝔽(e), true).
this.set("lastIndex", e,true, context)?;}// 16. Let n be the number of elements in r's captures List. (This is the same value as 22.2.2.1's NcapturingParens.)let n = match_value.captures.len();// 17. Assert: n < 23^2 - 1.debug_assert!(n < 23usize.pow(2) - 1);// 18. Let A be ! ArrayCreate(n + 1).// 19. Assert: The mathematical value of A's "length" property is n + 1.let a = Array::array_create(n + 1,None, context)?;// 20. Perform ! CreateDataPropertyOrThrow(A, "index", 𝔽(lastIndex)).
a.create_data_property_or_throw("index", match_value.start(), context).expect("this CreateDataPropertyOrThrow call must not fail");// 21. Perform ! CreateDataPropertyOrThrow(A, "input", S).
a.create_data_property_or_throw("input", input.clone(), context).expect("this CreateDataPropertyOrThrow call must not fail");// 22. Let matchedSubstr be the substring of S from lastIndex to e.let matched_substr = ifletSome(s) = input.get(match_value.range()){
s
}else{""};// 23. Perform ! CreateDataPropertyOrThrow(A, "0", matchedSubstr).
a.create_data_property_or_throw(0, matched_substr, context).expect("this CreateDataPropertyOrThrow call must not fail");// 24. If R contains any GroupName, then// 25. Else,let named_groups = match_value.named_groups();let groups = if named_groups.clone().count() > 0{// a. Let groups be ! OrdinaryObjectCreate(null).let groups = JsValue::from(JsObject::empty());// Perform 27.f here// f. If the ith capture of R was defined with a GroupName, then// i. Let s be the CapturingGroupName of the corresponding RegExpIdentifierName.// ii. Perform ! CreateDataPropertyOrThrow(groups, s, capturedValue).for(name, range)in named_groups {ifletSome(range) = range {let value = ifletSome(s) = input.get(range.clone()){
s
}else{""};
groups
.to_object(context)?
.create_data_property_or_throw(name, value, context).expect("this CreateDataPropertyOrThrow call must not fail");}}
groups
}else{// a. Let groups be undefined.JsValue::undefined()};// 26. Perform ! CreateDataPropertyOrThrow(A, "groups", groups).
a.create_data_property_or_throw("groups", groups, context).expect("this CreateDataPropertyOrThrow call must not fail");// 27. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, dofor i in1..=n {// a. Let captureI be ith element of r's captures List.let capture = match_value.group(i);let captured_value = match capture {// b. If captureI is undefined, let capturedValue be undefined.None => JsValue::undefined(),// c. Else if fullUnicode is true, then// d. Else,Some(range) => {ifletSome(s) = input.get(range){
s.into()}else{"".into()}}};// e. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue).
a.create_data_property_or_throw(i, captured_value, context).expect("this CreateDataPropertyOrThrow call must not fail");}// 28. Return A.Ok(Some(a))}
JavaScript正则中g标志
缘起
有一天在思否社区看到有个问题,大致描述如下
对问题而言,遍历中的
i
就是一个字符,不需要用到g
。但是就我对正则的理解(过于浅薄)感觉上有没有
g
(只是全局搜索,不会匹配到就停下来)应该不影响,激发了我的好奇心。上面题的建议写法如下
解密过程
首先可以确定的表现一定是
g
导致的搜索引擎
打开 MDN 仔细查看
g
标志的作用,得到结论和我的理解无二。我猜想应该就是
g
可能启用了某种缓存,又因为reg
相对过滤器是全局变量,我将代码改为:将正则声明到每一次遍历,得到结论就是正确的,验证了我的猜想。也得到了,缓存就是正则中的某个地方
下面我找到对应的源码来查看问题的原因
源码层面
由于最近在看 Rust,所以使用 Rust 编写的源码查看 https://github/boa-dev/boa
打开项目后,点击
.
进入 vscode 模式,command+p 搜索 regexp 关键词进入
test.rs
文件,command+f 搜索/g
可以找到在 90 行有个last_index()
的测试看到了有
lastIndex
关键字,这里再已经大致猜到问题的原因了,g 标志存在匹配后的最后一个下标,导致出现问题。我们将视线移入到
mod.rs
文件中,搜索test
在 631 行看到了
fn test()
方法在
test()
方法中找到了Self::abstract_exec()
方法又在
Self::abstract_exec()
方法中找到了Self::abstract_builtin_exec()
方法Self::abstract_builtin_exec()
方法中存在global
以及last_index
这样看来最终执行的方法就是在这里了,仔细查看该方法中的代码(代码写的很详细而且每一步都有注释)在第 12 步中:
match_value
)advance_string_index()
方法的返回值advance_string_index()
不在当前问题的考虑范围 https://tc39.es/ecma262/#sec-advancestringindex第 13 步获取匹配到的值的 endIndex
第 15 步将 lastIndex 置为 endIndex
至此也就整明白了
g
标志的含义,在正则的原型链中存在一个lastIndex
,如果匹配为真时lastIndex
不会重置为 0 ,下一次开始时继承了上次位置,结论
在问题代码中分析
The text was updated successfully, but these errors were encountered: