Skip to content

Commit

Permalink
WIP recursive queries
Browse files Browse the repository at this point in the history
  • Loading branch information
GuySartorelli committed Sep 18, 2023
1 parent b8665a7 commit 491d5b2
Show file tree
Hide file tree
Showing 15 changed files with 1,068 additions and 28 deletions.
91 changes: 89 additions & 2 deletions src/ORM/Connect/DBQueryBuilder.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,24 @@ public function buildSQL(SQLExpression $query, &$parameters)
*/
protected function buildSelectQuery(SQLSelect $query, array &$parameters)
{
$sql = $this->buildSelectFragment($query, $parameters);
$needsParenthisis = count($query->getUnions()) > 0;
$nl = $this->getSeparator();
$sql = '';
if ($needsParenthisis) {
$sql .= "({$nl}";
}
$sql .= $this->buildWithFragment($query, $parameters);
$sql .= $this->buildSelectFragment($query, $parameters);
$sql .= $this->buildFromFragment($query, $parameters);
$sql .= $this->buildWhereFragment($query, $parameters);
$sql .= $this->buildGroupByFragment($query, $parameters);
$sql .= $this->buildHavingFragment($query, $parameters);
$sql .= $this->buildOrderByFragment($query, $parameters);
$sql .= $this->buildLimitFragment($query, $parameters);
if ($needsParenthisis) {
$sql .= "{$nl})";
}
$sql .= $this->buildUnionFragment($query, $parameters);
return $sql;
}

Expand Down Expand Up @@ -155,6 +166,66 @@ protected function buildUpdateQuery(SQLUpdate $query, array &$parameters)
return $sql;
}

protected function buildUnionFragment(SQLSelect $query, array &$parameters): string
{
$unions = $query->getUnions();
if (empty($unions)) {
return '';
}

$nl = $this->getSeparator();
$clauses = [];

foreach ($unions as $union) {
$unionQuery = $union['query'];
$unionType = $union['type'];

$clause = "{$nl}UNION";

if ($unionType) {
$clause .= " $unionType";
}

$clause .= "$nl($nl" . $this->buildSelectQuery($unionQuery, $parameters) . "$nl)";

$clauses[] = $clause;
}

return implode('', $clauses);
}

/**
* Returns the WITH clauses ready for inserting into a query.
*/
protected function buildWithFragment(SQLSelect $query, array &$parameters): string
{
$with = $query->getWith();
if (empty($with)) {
return '';
}

$nl = $this->getSeparator();
$clauses = [];

foreach ($with as $name => $bits) {
$clause = $bits['recursive'] ? 'RECURSIVE ' : '';
$clause .= $name;

if (!empty($bits['cte_fields'])) {
$clause .= ' (' . implode(', ', $bits['cte_fields']) . ')';
}

$clause .= " AS ({$nl}";

$clause .= $this->buildSelectQuery($bits['query'], $parameters);

$clause .= "{$nl})";
$clauses[] = $clause;
}

return 'WITH ' . implode(",{$nl}", $clauses) . $nl;
}

/**
* Returns the SELECT clauses ready for inserting into a query.
*
Expand Down Expand Up @@ -242,9 +313,25 @@ public function buildUpdateFragment(SQLUpdate $query, array &$parameters)
public function buildFromFragment(SQLConditionalExpression $query, array &$parameters)
{
$from = $query->getJoins($joinParameters);
$tables = [];
$joins = [];

// E.g. a naive "Select 1" statemnt is valid SQL
if (empty($from)) {
return '';
}

foreach ($from as $joinOrTable) {
if (preg_match(SQLConditionalExpression::JOIN_REGEX, $joinOrTable)) {
$joins[] = $joinOrTable;
} else {
$tables[] = $joinOrTable;
}
}

$parameters = array_merge($parameters, $joinParameters);
$nl = $this->getSeparator();
return "{$nl}FROM " . implode(' ', $from);
return "{$nl}FROM " . implode(', ', $tables) . ' ' . implode(' ', $joins);
}

/**
Expand Down
19 changes: 18 additions & 1 deletion src/ORM/Connect/Database.php
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,24 @@ abstract public function searchEngine(
$invertedMatch = false
);

/**
* Determines if this database support WITH statements.
* By default it is assumed that they don't unless they are explicitly enabled.
*/
public function supportsCteQueries(): bool
{
return false;
}

/**
* Determines if this database support recursive WITH statements.
* By default it is assumed that they don't unless they are explicitly enabled.
*/
public function supportsRecursiveCteQueries(): bool
{
return false;
}

/**
* Determines if this database supports transactions
*
Expand All @@ -654,7 +672,6 @@ public function supportsSavepoints()
return false;
}


/**
* Determines if the used database supports given transactionMode as an argument to startTransaction()
* If transactions are completely unsupported, returns false.
Expand Down
48 changes: 48 additions & 0 deletions src/ORM/Connect/MySQLDatabase.php
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,54 @@ public function searchEngine(
return $list;
}

public function supportsCteQueries(): bool
{
$version = $this->getVersion();
$mariaDBVersion = $this->getMariaDBVersion($version);
if ($mariaDBVersion) {
// MariaDB has supported CTEs since 10.2.1
// see https://mariadb.com/kb/en/mariadb-1021-release-notes/
return $this->compareVersion($mariaDBVersion, '10.2.1') >= 0;
}
// MySQL has supported CTEs since 8.0.1
// see https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-1.html
return $this->compareVersion($version, '8.0.1') >= 0;
}

public function supportsRecursiveCteQueries(): bool
{
$version = $this->getVersion();
$mariaDBVersion = $this->getMariaDBVersion($version);
if ($mariaDBVersion) {
// MariaDB has supported Recursive CTEs since 10.2.2
// see https://mariadb.com/kb/en/mariadb-1022-release-notes/
return $this->compareVersion($mariaDBVersion, '10.2.2') >= 0;
}
// MySQL has supported Recursive CTEs since 8.0.1
// see https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-1.html
return $this->compareVersion($version, '8.0.1') >= 0;
}

private function getMariaDBVersion(string $version): ?string
{
// MariaDB versions look like "5.5.5-10.6.8-mariadb-1:10.6.8+maria~focal"
// or "10.8.3-MariaDB-1:10.8.3+maria~jammy"
// The relevant part is the x.y.z-mariadb portion.
if (!preg_match('/((\d+\.){2}\d+)-mariadb/i', $version, $matches)) {
return null;
}
return $matches[1];
}

private function compareVersion(string $actualVersion, string $atLeastVersion): int
{
// Assume it's lower if it's not a proper version number
if (!preg_match('/^(\d+\.){2}\d+$/', $actualVersion)) {
return -1;
}
return version_compare($actualVersion, $atLeastVersion);
}

/**
* Returns the TransactionManager to handle transactions for this database.
*
Expand Down
47 changes: 47 additions & 0 deletions src/ORM/DataList.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use SilverStripe\ORM\DataQuery;
use SilverStripe\ORM\ArrayList;
use SilverStripe\ORM\Filters\SearchFilterable;
use SilverStripe\ORM\Queries\SQLSelect;

/**
* Implements a "lazy loading" DataObjectSet.
Expand Down Expand Up @@ -313,6 +314,52 @@ public function canFilterBy($fieldName)
return false;
}

// @TODO is there a case where we want recursiveFilterAny? Or recursiveExclude/Any? Or recursiveWhere? Or some combination of those?
// @TODO is there a case where we want to sort the recursive or base portion, and grab the first of many possible matches?
// In the case of recursiveWhere that's probably getting into "just use alterDataQuery" territory
// .... Are we getting to the point where we'd be better off passing in lists or callables, to allow the full power of the DataList API?
// @TODO is there a case where we want to sort the resultset by some field from the WITH clause?
public function recursiveFilter(array $baseFilters, array $recursiveFilters)
{
$schema = DataObject::getSchema();
$baseFields = [];
foreach ($baseFilters as $filter => $value) {
$fieldArgs = explode(':', $filter);
$baseFields[] = $schema->sqlColumnForField($this->dataClass(), array_shift($fieldArgs));
}
$recursiveFields = [];
foreach ($recursiveFilters as $filter => $value) {
$fieldArgs = explode(':', $filter);
$recursiveFields[] = $schema->sqlColumnForField($this->dataClass(), array_shift($fieldArgs));
}

$queryFields = array_intersect($baseFields, $recursiveFields);


$cteName = 'CTE_Ancestor'; // @TODO dynamically generate a name, and make sure it can handle multiple recursive filters per list
// @TODO we need a better way to generate these - or just pass them in
// @TODO is there a case where we'd want multiple args for the recursive query? Or something other than ID?
// For example, https://learnsql.com/blog/get-to-know-the-power-of-sql-recursive-queries/ has an example where three args are used,
// one of which is "is_visited" which is only used in the recursive portion, and is used to avoid double-checking items.
// One way to handle this would be to have a third set of filters, which is only for additional filters that are _only_ required for the special thingy.
// We could then derive the args from that - but in that case we'd need some way to tell which ones need to be part of the join....
$cteIdField = 'cte_id';
$yy = "\"$cteName\".\"$cteIdField\"";
// @TODO This WHERE is obviously not ideal.
// @TODO is there a case where we want something other than ID (see args above)?
// We may need a third argument to hold this part - but we need to make sure to abstract it in an appropriate way.
$idField = $schema->sqlColumnForField($this->dataClass(), 'ID');
$recursiveQuery = self::create($this->dataClass())->filter($recursiveFilters)->where("$idField = $yy")->sort(null)->dataQuery();
$recursiveQuery->innerJoin($cteName, $idField . ' = ' . $yy)->distinct(false);
$recursiveQuery = $recursiveQuery->query()->setSelect($queryFields);
$baseQuery = self::create($this->dataClass())->filter($baseFilters)->sort(null)->dataQuery();
$baseQuery->union($recursiveQuery);
$baseQuery = $baseQuery->query()->setSelect($queryFields);
return $this->alterDataQuery(function (DataQuery $query) use ($baseQuery, $cteName, $cteIdField) {
$query->with($cteName, $baseQuery, ["\"$cteIdField\""], ['ID' => $cteIdField], true);
});
}

/**
* Return a new DataList instance with the records returned in this query
* restricted by a limit clause.
Expand Down
61 changes: 61 additions & 0 deletions src/ORM/DataQuery.php
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,15 @@ public function having($having)
return $this;
}

public function union(self|SQLSelect $query, ?string $type = null): static
{
if ($query instanceof self) {
$query = $query->query();
}
$this->query->addUnion($query, $type);
return $this;
}

/**
* Create a disjunctive subgroup.
*
Expand Down Expand Up @@ -697,6 +706,58 @@ public function conjunctiveGroup()
return new DataQuery_SubGroup($this, 'AND', $clause);
}

/**
* @TODO Add PHPDoc - including union is required for recursive queries, and can't be distinct and the term "CTE"
*
* @param string|array $onClause The "ON" clause (escaped SQL statement) for joining the query.
* It can either be a full clause (like you would pass to {@link leftJoin()} or {@link innerJoin()}),
* or it can be an array mapping of the field(s) on the dataclass table that map with the field(s) on the CTE table
* e.g. ['ID' => 'cte_id']
*/
public function with(string $name, self|SQLSelect $query, array $cteFields = [], string|array $onClause = '', bool $recursive = false): static
{
// If the query is a DataQuery, make sure all manipulators, joins, etc are applied
// @TODO craft a unit test that protects against this being swapped to $query = $query->query;
// or validate that the existing tests already do that
if ($query instanceof self) {
$query = $query->query();
}

$quotedName = '"' . $name . '"';

// Craft the "ON" clause for the join if we need to
if (is_array($onClause)) {
$schema = DataObject::getSchema();
$onClauses = [];
foreach ($onClause as $myField => $cteField) {
$onClauses[] = $schema->sqlColumnForField($this->dataClass(), $myField) . " = $quotedName.\"$cteField\"";
}
$onClause = implode(' AND ', $onClauses);
}

// // Recursive portion can't be distinct, and must include the with cte_name as FROM like it's a table
// $recursiveQuery->addFrom($quotedName)->setDistinct(false);
// // @TODO ew. How do I pass the field(s) in?
// // Needed for the DataList abstraction, since leaving these out results in
// // "Unknown column 'Ancestor.pid' in 'where clause'"... not sure why this fixes it though?
// $baseQuery->setSelect('"ParentID"');
// $recursiveQuery->setSelect('"ParentID"');
// @TODO do I still need those? If not, do those requirements need to at least be documented somewhere?
// Add the WITH
// @TODO should I be automagically quoting the field names here?
// @TODO Or should they be automagically quoted in SQLSelect?
$this->query->addWith($quotedName, $query, $cteFields, $recursive);

// Only add a join if we have an on clause
// Sometimes people will want to use a CTE as a subquery which could be referenced in a WHERE or similar.
if ($onClause) {
$this->query->addInnerJoin($name, $onClause);
}

return $this;
}

/**
* Adds a WHERE clause.
*
Expand Down
Loading

0 comments on commit 491d5b2

Please sign in to comment.