Skip to content

Commit

Permalink
RegexArrayShapeMatcher - Fix shape of single top level alternations
Browse files Browse the repository at this point in the history
  • Loading branch information
staabm authored Aug 8, 2024
1 parent 427a319 commit 0132833
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 24 deletions.
27 changes: 23 additions & 4 deletions src/Type/Php/RegexArrayShapeMatcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
use PHPStan\Type\Constant\ConstantStringType;
use PHPStan\Type\IntegerRangeType;
use PHPStan\Type\IntegerType;
use PHPStan\Type\NullType;
use PHPStan\Type\Regex\RegexAlternation;
use PHPStan\Type\Regex\RegexCapturingGroup;
use PHPStan\Type\Regex\RegexExpressionHelper;
Expand Down Expand Up @@ -140,7 +141,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
&& $onlyOptionalTopLevelGroup !== null
) {
// if only one top level capturing optional group exists
// we build a more precise constant union of a empty-match and a match with the group
// we build a more precise tagged union of a empty-match and a match with the group

$onlyOptionalTopLevelGroup->forceNonOptional();

Expand All @@ -154,18 +155,24 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
);

if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
// positive match has a subject but not any capturing group
$combiType = TypeCombinator::union(
new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true),
$combiType,
);
}

$onlyOptionalTopLevelGroup->clearOverrides();

return $combiType;
} elseif (
!$matchesAll
&& $wasMatched->yes()
&& $onlyOptionalTopLevelGroup === null
&& $onlyTopLevelAlternation !== null
&& !$wasMatched->no()
) {
// if only a single top level alternation exist built a more precise tagged union

$combiTypes = [];
$isOptionalAlternation = false;
foreach ($onlyTopLevelAlternation->getGroupCombinations() as $groupCombo) {
Expand All @@ -179,6 +186,9 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
$beforeCurrentCombo = false;
} elseif ($beforeCurrentCombo && !$group->resetsGroupCounter()) {
$group->forceNonOptional();
$group->forceType(
$this->containsUnmatchedAsNull($flags, $matchesAll) ? new NullType() : new ConstantStringType(''),
);
} elseif (
$group->getAlternationId() === $onlyTopLevelAlternation->getId()
&& !$this->containsUnmatchedAsNull($flags, $matchesAll)
Expand All @@ -200,17 +210,26 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched

foreach ($groupCombo as $groupId) {
$group = $comboList[$groupId];
$group->restoreNonOptional();
$group->clearOverrides();
}
}

if ($isOptionalAlternation && !$this->containsUnmatchedAsNull($flags, $matchesAll)) {
if (
!$this->containsUnmatchedAsNull($flags, $matchesAll)
&& (
$onlyTopLevelAlternation->getAlternationsCount() !== count($onlyTopLevelAlternation->getGroupCombinations())
|| $isOptionalAlternation
)
) {
// positive match has a subject but not any capturing group
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [0], [], true);
}

return TypeCombinator::union(...$combiTypes);
}

// the general case, which should work in all cases but does not yield the most
// precise result possible in some cases
return $this->buildArrayType(
$groupList,
$wasMatched,
Expand Down
10 changes: 9 additions & 1 deletion src/Type/Regex/RegexAlternation.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ final class RegexAlternation
/** @var array<int, list<int>> */
private array $groupCombinations = [];

public function __construct(private readonly int $alternationId)
public function __construct(
private readonly int $alternationId,
private readonly int $alternationsCount,
)
{
}

Expand All @@ -28,6 +31,11 @@ public function pushGroup(int $combinationIndex, RegexCapturingGroup $group): vo
$this->groupCombinations[$combinationIndex][] = $group->getId();
}

public function getAlternationsCount(): int
{
return $this->alternationsCount;
}

/**
* @return array<int, list<int>>
*/
Expand Down
13 changes: 12 additions & 1 deletion src/Type/Regex/RegexCapturingGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ final class RegexCapturingGroup

private bool $forceNonOptional = false;

private ?Type $forceType = null;

public function __construct(
private readonly int $id,
private readonly ?string $name,
Expand All @@ -30,9 +32,15 @@ public function forceNonOptional(): void
$this->forceNonOptional = true;
}

public function restoreNonOptional(): void
public function forceType(Type $type): void
{
$this->forceType = $type;
}

public function clearOverrides(): void
{
$this->forceNonOptional = false;
$this->forceType = null;
}

public function resetsGroupCounter(): bool
Expand Down Expand Up @@ -109,6 +117,9 @@ public function getName(): ?string

public function getType(): Type
{
if ($this->forceType !== null) {
return $this->forceType;
}
return $this->type;
}

Expand Down
2 changes: 1 addition & 1 deletion src/Type/Regex/RegexGroupParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ private function walkRegexAst(

if ($ast->getId() === '#alternation') {
$alternationId++;
$alternation = new RegexAlternation($alternationId);
$alternation = new RegexAlternation($alternationId, count($ast->getChildren()));
}

if ($ast->getId() === '#mark') {
Expand Down
26 changes: 26 additions & 0 deletions tests/PHPStan/Analyser/nsrt/bug-11311.php
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,29 @@ function (string $s): void {
preg_match('/%a(\d*)?/', $s, $matches, PREG_UNMATCHED_AS_NULL);
assertType("array{0?: string, 1?: ''|numeric-string|null}", $matches); // could be array{0?: string, 1?: ''|numeric-string}
};

function (string $s): void {
if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL)) {
assertType("array{string, numeric-string|null, non-empty-string|null}", $matches);
} else {
assertType("array{}", $matches);
}
assertType("array{}|array{string, numeric-string|null, non-empty-string|null}", $matches);
};

function (string $s): void {
if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL|PREG_OFFSET_CAPTURE) === 1) {
assertType("array{array{string|null, int<-1, max>}, array{numeric-string|null, int<-1, max>}, array{non-empty-string|null, int<-1, max>}}", $matches);
}
};

function (string $s): void {
if (preg_match('~a|((u)x)|((v)y)~', $s, $matches, PREG_UNMATCHED_AS_NULL) === 1) {
assertType("array{string, 'ux'|null, 'u'|null, 'vy'|null, 'v'|null}", $matches);
}
};

function (string $s): void {
preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_UNMATCHED_AS_NULL);
assertType("array{0?: string, 1?: numeric-string|null, 2?: non-empty-string|null}", $matches);
};
50 changes: 33 additions & 17 deletions tests/PHPStan/Analyser/nsrt/preg_match_shapes.php
Original file line number Diff line number Diff line change
Expand Up @@ -129,16 +129,16 @@ function doUnknownFlags(string $s, int $flags): void {

function doMultipleAlternativeCaptureGroupsWithSameNameWithModifier(string $s): void {
if (preg_match('/(?J)(?<Foo>[a-z]+)|(?<Foo>[0-9]+)/', $s, $matches)) {
assertType('array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
assertType("array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
}
assertType('array{}|array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
assertType("array{}|array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
}

function doMultipleConsecutiveCaptureGroupsWithSameNameWithModifier(string $s): void {
if (preg_match('/(?J)(?<Foo>[a-z]+)|(?<Foo>[0-9]+)/', $s, $matches)) {
assertType('array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
assertType("array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
}
assertType('array{}|array{0: string, Foo: numeric-string|non-empty-string, 1: non-empty-string, 2?: numeric-string}', $matches);
assertType("array{}|array{0: string, Foo: non-empty-string, 1: non-empty-string}|array{0: string, Foo: numeric-string, 1: '', 2: numeric-string}", $matches);
}

// https://github.com/hoaproject/Regex/issues/31
Expand Down Expand Up @@ -307,21 +307,21 @@ function (string $size): void {
if (preg_match('~^(?:(\\d+)x(\\d+)|(\\d+)|x(\\d+))$~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1: numeric-string, 2: numeric-string, 3?: numeric-string, 4?: numeric-string}', $matches);
assertType("array{string, '', '', '', numeric-string}|array{string, '', '', numeric-string}|array{string, numeric-string, numeric-string}", $matches);
};

function (string $size): void {
if (preg_match('~^(?:(\\d+)x(\\d+)|(\\d+)|x(\\d+))?$~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1: numeric-string, 2: numeric-string, 3?: numeric-string, 4?: numeric-string}|array{string}', $matches);
assertType("array{string, '', '', '', numeric-string}|array{string, '', '', numeric-string}|array{string, numeric-string, numeric-string}|array{string}", $matches);
};

function (string $size): void {
if (preg_match('~\{(?:(include)\\s+(?:[$]?\\w+(?<!file))\\s)|(?:(include\\s+file)\\s+(?:[$]?\\w+)\\s)|(?:(include(?:Template|(?:\\s+file)))\\s+(?:\'?.*?\.latte\'?)\\s)~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType("array{0: string, 1: 'include', 2?: non-falsy-string, 3?: non-falsy-string}", $matches);
assertType("array{string, '', '', non-falsy-string}|array{string, '', non-falsy-string}|array{string, 'include'}", $matches);
};


Expand All @@ -338,13 +338,7 @@ function bug11277a(string $value): void
function bug11277b(string $value): void
{
if (preg_match('/^(?:(.+,?)|(x))*$/', $value, $matches)) {
assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string}', $matches);
if (count($matches) === 2) {
assertType('array{string, string}', $matches); // could be array{string, non-empty-string}
}
if (count($matches) === 3) {
assertType('array{string, string, string}', $matches); // could be array{string, non-empty-string, non-empty-string}
}
assertType("array{0: string, 1?: non-empty-string}|array{string, '', non-empty-string}", $matches);
}
}

Expand Down Expand Up @@ -656,10 +650,9 @@ function (string $value): void
}
};

function (string $value): void
{
function (string $value): void {
if (preg_match('/^(?:(x)|(y))*$/', $value, $matches, PREG_OFFSET_CAPTURE)) {
assertType("array{0: array{string, int<-1, max>}, 1?: array{non-empty-string, int<-1, max>}, 2?: array{non-empty-string, int<-1, max>}}", $matches);
assertType("array{0: array{string, int<-1, max>}, 1?: array{non-empty-string, int<-1, max>}}|array{array{string, int<-1, max>}, array{'', int<-1, max>}, array{non-empty-string, int<-1, max>}}", $matches);
}
};

Expand All @@ -683,3 +676,26 @@ static public function sayHello(string $source): void
assertType("array{0?: string, dateFrom?: ''|numeric-string, 1?: ''|numeric-string, dateTo?: numeric-string, 2?: numeric-string}", $matches);
}
}

function (string $s): void {
if (preg_match('~a|(\d)|(\s)~', $s, $matches) === 1) {
assertType("array{0: string, 1?: numeric-string}|array{string, '', non-empty-string}", $matches);
}
};

function (string $s): void {
if (preg_match('~a|((u)x)|((v)y)~', $s, $matches) === 1) {
assertType("array{string, '', '', 'vy', 'v'}|array{string, 'ux', 'u'}|array{string}", $matches);
}
};

function (string $s): void {
if (preg_match('~a|(\d)|(\s)~', $s, $matches, PREG_OFFSET_CAPTURE) === 1) {
assertType("array{0: array{string, int<-1, max>}, 1?: array{numeric-string, int<-1, max>}}|array{array{string, int<-1, max>}, array{'', int<-1, max>}, array{non-empty-string, int<-1, max>}}", $matches);
}
};

function (string $s): void {
preg_match('~a|(\d)|(\s)~', $s, $matches);
assertType("array{0?: string, 1?: '', 2?: non-empty-string}|array{0?: string, 1?: numeric-string}", $matches);
};

0 comments on commit 0132833

Please sign in to comment.