diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index 23ee4b5a36..628bcb0d3c 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -14,6 +14,7 @@ use PHPStan\Type\Constant\ConstantStringType; use PHPStan\Type\IntegerRangeType; use PHPStan\Type\IntegerType; +use PHPStan\Type\NullType; use PHPStan\Type\Regex\RegexAlternation; use PHPStan\Type\Regex\RegexCapturingGroup; use PHPStan\Type\Regex\RegexExpressionHelper; @@ -140,7 +141,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched && $onlyOptionalTopLevelGroup !== null ) { // if only one top level capturing optional group exists - // we build a more precise constant union of a empty-match and a match with the group + // we build a more precise tagged union of a empty-match and a match with the group $onlyOptionalTopLevelGroup->forceNonOptional(); @@ -154,18 +155,24 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched ); if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) { + // positive match has a subject but not any capturing group $combiType = TypeCombinator::union( new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true), $combiType, ); } + $onlyOptionalTopLevelGroup->clearOverrides(); + return $combiType; } elseif ( !$matchesAll - && $wasMatched->yes() + && $onlyOptionalTopLevelGroup === null && $onlyTopLevelAlternation !== null + && !$wasMatched->no() ) { + // if only a single top level alternation exist built a more precise tagged union + $combiTypes = []; $isOptionalAlternation = false; foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) { @@ -179,6 +186,9 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $beforeCurrentCombo = false; } elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) { $group->forceNonOptional(); + $group->forceType( + $this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''), + ); } elseif ( $group->getAlternationId() === $onlyTopLevelAlternation->getId() && !$this->containsUnmatchedAsNull($flags, $matchesAll) @@ -200,17 +210,26 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched foreach ($groupCombo as $groupId) { $group = $comboList[$groupId]; - $group->restoreNonOptional(); + $group->clearOverrides(); } } - if ($isOptionalAlternation && !$this->containsUnmatchedAsNull($flags, $matchesAll)) { + if ( + !$this->containsUnmatchedAsNull($flags, $matchesAll) + && ( + $onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations()) + || $isOptionalAlternation + ) + ) { + // positive match has a subject but not any capturing group $combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true); } return TypeCombinator::union(...$combiTypes); } + // the general case, which should work in all cases but does not yield the most + // precise result possible in some cases return $this->buildArrayType( $groupList, $wasMatched, diff --git a/src/Type/Regex/RegexAlternation.php b/src/Type/Regex/RegexAlternation.php index 70ba5a41ea..9a00411c10 100644 --- a/src/Type/Regex/RegexAlternation.php +++ b/src/Type/Regex/RegexAlternation.php @@ -10,7 +10,10 @@ final class RegexAlternation /** @var array> */ private array $groupCombinations = []; - public function __construct(private readonly int $alternationId) + public function __construct( + private readonly int $alternationId, + private readonly int $alternationsCount, + ) { } @@ -28,6 +31,11 @@ public function pushGroup(int $combinationIndex, RegexCapturingGroup $group): vo $this->groupCombinations[$combinationIndex][] = $group->getId(); } + public function getAlternationsCount(): int + { + return $this->alternationsCount; + } + /** * @return array> */ diff --git a/src/Type/Regex/RegexCapturingGroup.php b/src/Type/Regex/RegexCapturingGroup.php index 83fafbdd41..62708a2de3 100644 --- a/src/Type/Regex/RegexCapturingGroup.php +++ b/src/Type/Regex/RegexCapturingGroup.php @@ -9,6 +9,8 @@ final class RegexCapturingGroup private bool $forceNonOptional = false; + private ?Type $forceType = null; + public function __construct( private readonly int $id, private readonly ?string $name, @@ -30,9 +32,15 @@ public function forceNonOptional(): void $this->forceNonOptional = true; } - public function restoreNonOptional(): void + public function forceType(Type $type): void + { + $this->forceType = $type; + } + + public function clearOverrides(): void { $this->forceNonOptional = false; + $this->forceType = null; } public function resetsGroupCounter(): bool @@ -109,6 +117,9 @@ public function getName(): ?string public function getType(): Type { + if ($this->forceType !== null) { + return $this->forceType; + } return $this->type; } diff --git a/src/Type/Regex/RegexGroupParser.php b/src/Type/Regex/RegexGroupParser.php index f1d9331e2a..48a6b9e517 100644 --- a/src/Type/Regex/RegexGroupParser.php +++ b/src/Type/Regex/RegexGroupParser.php @@ -177,7 +177,7 @@ private function walkRegexAst( if ($ast->getId() === '#alternation') { $alternationId++; - $alternation = new RegexAlternation($alternationId); + $alternation = new RegexAlternation($alternationId, count($ast->getChildren())); } if ($ast->getId() === '#mark') { diff --git a/tests/PHPStan/Analyser/nsrt/bug-11311.php b/tests/PHPStan/Analyser/nsrt/bug-11311.php index 43bc3d6247..3a01594ded 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-11311.php +++ b/tests/PHPStan/Analyser/nsrt/bug-11311.php @@ -198,3 +198,29 @@ function (string $s): void { preg_match('/%a(\d*)?/', $s, $matches, PREG_UNMATCHED_AS_NULL); assertType("array{0?: string, 1?: ''|numeric-string|null}", $matches); // could be array{0?: string, 1?: ''|numeric-string} }; + +function (string $s): void { + if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL)) { + assertType("array{string, numeric-string|null, non-empty-string|null}", $matches); + } else { + assertType("array{}", $matches); + } + assertType("array{}|array{string, numeric-string|null, non-empty-string|null}", $matches); +}; + +function (string $s): void { + if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL|PREG_OFFSET_CAPTURE) === 1) { + assertType("array{array{string|null, int<-1, max>}, array{numeric-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}}", $matches); + } +}; + +function (string $s): void { + if (preg_match('~a|((u)x)|((v)y)~', $s, $matches, PREG_UNMATCHED_AS_NULL) === 1) { + assertType("array{string, 'ux'|null, 'u'|null, 'vy'|null, 'v'|null}", $matches); + } +}; + +function (string $s): void { + preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL); + assertType("array{0?: string, 1?: numeric-string|null, 2?: non-empty-string|null}", $matches); +}; diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 3e633455ab..d441a79fcd 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -129,16 +129,16 @@ function doUnknownFlags(string $s, int $flags): void { function doMultipleAlternativeCaptureGroupsWithSameNameWithModifier(string $s): void { if (preg_match('/(?J)(?[a-z]+)|(?[0-9]+)/', $s, $matches)) { - assertType('array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches); + assertType("array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches); } - assertType('array{}|array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches); + assertType("array{}|array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches); } function doMultipleConsecutiveCaptureGroupsWithSameNameWithModifier(string $s): void { if (preg_match('/(?J)(?[a-z]+)|(?[0-9]+)/', $s, $matches)) { - assertType('array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches); + assertType("array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches); } - assertType('array{}|array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches); + assertType("array{}|array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches); } // https://github.com/hoaproject/Regex/issues/31 @@ -307,21 +307,21 @@ function (string $size): void { if (preg_match('~^(?:(\\d+)x(\\d+)|(\\d+)|x(\\d+))$~', $size, $matches) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } - assertType('array{0: string, 1: numeric-string, 2: numeric-string, 3?: numeric-string, 4?: numeric-string}', $matches); + assertType("array{string, '', '', '', numeric-string}|array{string, '', '', numeric-string}|array{string, numeric-string, numeric-string}", $matches); }; function (string $size): void { if (preg_match('~^(?:(\\d+)x(\\d+)|(\\d+)|x(\\d+))?$~', $size, $matches) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } - assertType('array{0: string, 1: numeric-string, 2: numeric-string, 3?: numeric-string, 4?: numeric-string}|array{string}', $matches); + assertType("array{string, '', '', '', numeric-string}|array{string, '', '', numeric-string}|array{string, numeric-string, numeric-string}|array{string}", $matches); }; function (string $size): void { if (preg_match('~\{(?:(include)\\s+(?:[$]?\\w+(?}, 1?: array{non-empty-string, int<-1, max>}, 2?: array{non-empty-string, int<-1, max>}}", $matches); + assertType("array{0: array{string, int<-1, max>}, 1?: array{non-empty-string, int<-1, max>}}|array{array{string, int<-1, max>}, array{'', int<-1, max>}, array{non-empty-string, int<-1, max>}}", $matches); } }; @@ -683,3 +676,26 @@ static public function sayHello(string $source): void assertType("array{0?: string, dateFrom?: ''|numeric-string, 1?: ''|numeric-string, dateTo?: numeric-string, 2?: numeric-string}", $matches); } } + +function (string $s): void { + if (preg_match('~a|(\d)|(\s)~', $s, $matches) === 1) { + assertType("array{0: string, 1?: numeric-string}|array{string, '', non-empty-string}", $matches); + } +}; + +function (string $s): void { + if (preg_match('~a|((u)x)|((v)y)~', $s, $matches) === 1) { + assertType("array{string, '', '', 'vy', 'v'}|array{string, 'ux', 'u'}|array{string}", $matches); + } +}; + +function (string $s): void { + if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_OFFSET_CAPTURE) === 1) { + assertType("array{0: array{string, int<-1, max>}, 1?: array{numeric-string, int<-1, max>}}|array{array{string, int<-1, max>}, array{'', int<-1, max>}, array{non-empty-string, int<-1, max>}}", $matches); + } +}; + +function (string $s): void { + preg_match('~a|(\d)|(\s)~', $s, $matches); + assertType("array{0?: string, 1?: '', 2?: non-empty-string}|array{0?: string, 1?: numeric-string}", $matches); +};