Skip to content

Commit

Permalink
Add CountQuery class
Browse files Browse the repository at this point in the history
  • Loading branch information
BrandonXLF committed Nov 25, 2023
1 parent da8aa21 commit c6c0bc4
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 99 deletions.
148 changes: 148 additions & 0 deletions includes/CountQuery.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
<?php

class CountQuery {
public const MODE_REDIRECT = 'redirect';
public const MODE_LINK = 'link';
public const MODE_TRANSCLUSION = 'transclusion';

public const SINGLE_NS = 1;
public const NO_FROM_NS = 2;
public const NO_LINK_TARGET = 4;

private $fromNamespaces;
private $db;
private $title;

public function __construct($fromNamespaces, $db, $title) {
$this->fromNamespaces = $fromNamespaces;
$this->db = $db;
$this->title = $title;
}

private function createCond(
$prefix,
$titleSQL,
$namespaceSQL,
$flags,
$joins = [],
$wheres = []
) {
$hasFromNS = ~$flags & CountQuery::NO_FROM_NS;
$usesLinkTarget = ~$flags & CountQuery::NO_LINK_TARGET;
$hasNS = $usesLinkTarget || (~$flags & CountQuery::SINGLE_NS);

if ($this->fromNamespaces !== '' && $hasFromNS) {
array_push($wheres, "{$prefix}_from_namespace IN ({$this->fromNamespaces})");
}

if ($this->fromNamespaces !== '' && !$hasFromNS) {
array_push(
$joins,
<<<SQL
JOIN page AS source ON
source.page_id = {$prefix}_from
AND source.page_namespace IN ({$this->fromNamespaces})
SQL
);
}

$linkInfoPrefix = $usesLinkTarget ? 'lt' : $prefix;
$titleColumn = $linkInfoPrefix . '_' . ($hasNS ? 'title' : 'to');

array_push($wheres, "$titleColumn = $titleSQL");

if ($hasNS) {
array_push($wheres, "{$linkInfoPrefix}_namespace = $namespaceSQL");
}

if ($usesLinkTarget) {
array_push($joins, "JOIN linktarget ON {$prefix}_target_id = lt_id");
}

return implode(' ', $joins) . " WHERE " . implode(' AND ', $wheres);
}

private function createDirectCond($prefix, $flags) {
return $this->createCond(
$prefix,
$this->db->quote($this->title->getDBKey()),
$this->title->getNamespaceId(),
$flags
);
}

private function createIndirectCond($table, $prefix, $flags) {
$joins = [
'JOIN page AS target ON target.page_id = rd_from',
"JOIN $table"
];

$wheres = [
"rd_title = {$this->db->quote($this->title->getDBKey())}",
"rd_namespace = {$this->title->getNamespaceId()}",
"(rd_interwiki IS NULL OR rd_interwiki = {$this->db->quote('')})"
];

return $this->createCond(
$prefix,
'target.page_title',
'target.page_namespace',
$flags,
$joins,
$wheres
);
}

private function createQuery($table, $prefix, $mode, $flags) {
return match ($mode) {
self::MODE_REDIRECT => <<<SQL
SELECT COUNT(rd_from) FROM $table
{$this->createDirectCond($prefix, $flags)}
AND ({$prefix}_interwiki is NULL or {$prefix}_interwiki = {$this->db->quote('')})
SQL,
// Transclusions of a redirect that follow the redirect are also added as a transclusion of the redirect target.
// There is no way to differentiate from a page with a indirect link and a page with a indirect and a direct link
// in this case, only the indirect link is recorded. Pages can also transclude a page with a redirect without
// following the redirect, so a valid indirect link must have an associated direct link.
self::MODE_TRANSCLUSION => <<<SQL
SELECT
COUNT({$prefix}_from),
COUNT({$prefix}_from) - COUNT(indirect_link),
COUNT(indirect_link)
FROM $table
LEFT JOIN (
SELECT DISTINCT {$prefix}_from AS indirect_link
FROM redirect
{$this->createIndirectCond($table, $prefix, $flags)}
) AS temp ON {$prefix}_from = indirect_link
{$this->createDirectCond($prefix, $flags)}
SQL,
self::MODE_LINK => <<<SQL
SELECT
COUNT(DISTINCT COALESCE(direct_link, indirect_link)),
COUNT(direct_link),
COUNT(indirect_link)
FROM (
SELECT {$prefix}_from AS direct_link, NULL AS indirect_link
FROM $table
{$this->createDirectCond($prefix, $flags)}
UNION ALL
SELECT DISTINCT NULL AS direct_link, {$prefix}_from AS indirect_link
FROM redirect
{$this->createIndirectCond($table, $prefix, $flags)}
) AS temp
SQL
};
}

public function runQuery($table, $prefix, $mode, $flags = 0) {
$query = $this->createQuery($table, $prefix, $mode, $flags);
$res = $this->db->query($query)->fetch();

return $mode == self::MODE_REDIRECT ? (int) $res[0] : [
'all' => (int) $res[0],
'direct' => (int) $res[1],
'indirect' => (int) $res[2]
];
}
}
134 changes: 35 additions & 99 deletions includes/LinkCount.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@ class LinkCount implements HtmlProducer, JsonProducer {
public $counts;
public $error;

private $fromNamespaces;
private $projectURL;
private $db;
private $title;
private $countQuery;

private $typeInfo = [
'filelinks' => [
Expand Down Expand Up @@ -57,8 +56,6 @@ public function __construct($page, $project, $namespaces = '') {
}
}

$this->fromNamespaces = $namespaces;

$maybeProjectURL = 'https://' . preg_replace('/^https:\/\//', '', $project);
$metaDB = DatabaseFactory::create();

Expand All @@ -71,111 +68,50 @@ public function __construct($page, $project, $namespaces = '') {
}

list($dbName, $this->projectURL) = $stmt->fetch();
$this->db = DatabaseFactory::create($dbName);
$db = DatabaseFactory::create($dbName);
$this->title = new Title($page, $dbName, $this->projectURL);
$this->countQuery = new CountQuery($namespaces, $db, $this->title);

$this->counts = [
'filelinks' => $this->title->getNamespaceId() === 6 ? $this->counts('imagelinks', 'il', self::COUNT_MODE_TRANSCLUSION, true, true, false) : null,
'categorylinks' => $this->title->getNamespaceId() === 14 ? $this->counts('categorylinks', 'cl', self::COUNT_MODE_LINK, true, false, false) : null,
'wikilinks' => $this->counts('pagelinks', 'pl', self::COUNT_MODE_LINK, false, true, false),
'redirects' => $this->counts('redirect', 'rd', self::COUNT_MODE_REDIRECT, false, false, false),
'transclusions' => $this->counts('templatelinks', 'tl', self::COUNT_MODE_TRANSCLUSION, false, true, true)
'filelinks' => $this->title->getNamespaceId() === 6
? $this->countQuery->runQuery(
'imagelinks',
'il',
CountQuery::MODE_TRANSCLUSION,
CountQuery::SINGLE_NS | CountQuery::NO_LINK_TARGET
)
: null,
'categorylinks' => $this->title->getNamespaceId() === 14
? $this->countQuery->runQuery(
'categorylinks',
'cl',
CountQuery::MODE_LINK,
CountQuery::SINGLE_NS | CountQuery::NO_FROM_NS | CountQuery::NO_LINK_TARGET
)
: null,
'wikilinks' => $this->countQuery->runQuery(
'pagelinks',
'pl',
CountQuery::MODE_LINK,
CountQuery::NO_LINK_TARGET
),
'redirects' => $this->countQuery->runQuery(
'redirect',
'rd',
CountQuery::MODE_REDIRECT,
CountQuery::NO_FROM_NS | CountQuery::NO_LINK_TARGET),
'transclusions' => $this->countQuery->runQuery(
'templatelinks',
'tl',
CountQuery::MODE_TRANSCLUSION
)
];

// Redirects are included in the wikilinks table
$this->counts['wikilinks']['all'] -= $this->counts['redirects'];
$this->counts['wikilinks']['direct'] -= $this->counts['redirects'];
}

private function counts($table, $prefix, $mode = self::COUNT_MODE_LINK, $singleNS = false, $hasFromNamespace = true, $usesLinkTarget = true) {
$escapedTitle = $this->db->quote($this->title->getDBKey());
$escapedBlank = $this->db->quote('');
$titleColumn = $prefix . '_' . ($singleNS ? 'to' : 'title');

$fromNamespaceWhere = '';
$fromNamespaceJoin = '';

if ($this->fromNamespaces !== '') {
$fromNamespaceWhere = $hasFromNamespace ? " AND {$prefix}_from_namespace IN ({$this->fromNamespaces})" : '';
$fromNamespaceJoin = !$hasFromNamespace ? " JOIN page AS source ON source.page_id = {$prefix}_from AND source.page_namespace IN ({$this->fromNamespaces})" : '';
}

// TODO: Remove once all tables are switched to linktarget
$directCond = '';
$indirectQuery = '';

if (!$usesLinkTarget) {
$namespaceComponent = $singleNS ? '' : " AND {$prefix}_namespace = {$this->title->getNamespaceId()}";

$directCond = <<<SQL
$fromNamespaceJoin
WHERE $titleColumn = $escapedTitle $namespaceComponent $fromNamespaceWhere
SQL;

$namespaceComponent = $singleNS ? '' : " AND {$prefix}_namespace = target.page_namespace";

$indirectQuery = <<<SQL
SELECT DISTINCT NULL AS direct_link, {$prefix}_from AS indirect_link FROM redirect
JOIN page AS target ON target.page_id = rd_from
JOIN $table ON $titleColumn = target.page_title $namespaceComponent $fromNamespaceWhere$fromNamespaceJoin
WHERE rd_title = $escapedTitle AND rd_namespace = {$this->title->getNamespaceId()} AND (rd_interwiki IS NULL OR rd_interwiki = $escapedBlank)
SQL;
} else {
$directCond = <<<SQL
JOIN linktarget on {$prefix}_target_id = lt_id $fromNamespaceJoin
WHERE lt_title = $escapedTitle AND lt_namespace = {$this->title->getNamespaceId()} $fromNamespaceWhere
SQL;

$indirectQuery = <<<SQL
SELECT DISTINCT NULL AS direct_link, {$prefix}_from AS indirect_link FROM redirect
JOIN page AS target ON target.page_id = rd_from
JOIN linktarget ON lt_title = target.page_title AND lt_namespace = target.page_namespace
JOIN $table ON {$prefix}_target_id = lt_id $fromNamespaceWhere$fromNamespaceJoin
WHERE rd_title = $escapedTitle AND rd_namespace = {$this->title->getNamespaceId()} AND (rd_interwiki IS NULL OR rd_interwiki = $escapedBlank)
SQL;
}

if ($mode == self::COUNT_MODE_REDIRECT) {
$query = <<<SQL
SELECT COUNT(rd_from) FROM $table
$directCond AND ({$prefix}_interwiki is NULL or {$prefix}_interwiki = $escapedBlank)
SQL;
} elseif ($mode == self::COUNT_MODE_TRANSCLUSION) {
// Transclusions of a redirect that follow the redirect are also added as a transclusion of the redirect target.
// There is no way to differentiate from a page with a indirect link and a page with a indirect and a direct link in this case, only the indirect link is recorded.
// Pages can also transclude a page with a redirect without following the redirect, so a valid indirect link must have an associated direct link.
$query = <<<SQL
SELECT
COUNT({$prefix}_from),
COUNT({$prefix}_from) - COUNT(indirect_link),
COUNT(indirect_link)
FROM $table
LEFT JOIN ($indirectQuery) AS temp ON {$prefix}_from = indirect_link $directCond
SQL;
} elseif ($mode == self::COUNT_MODE_LINK) {
$query = <<<SQL
SELECT
COUNT(DISTINCT COALESCE(direct_link, indirect_link)),
COUNT(direct_link),
COUNT(indirect_link)
FROM (
SELECT {$prefix}_from AS direct_link, NULL AS indirect_link FROM $table
$directCond
UNION ALL
$indirectQuery
) AS temp
SQL;
}

$res = $this->db->query($query)->fetch();

return $mode == self::COUNT_MODE_REDIRECT ? (int) $res[0] : [
'all' => (int) $res[0],
'direct' => (int) $res[1],
'indirect' => (int) $res[2]
];
}

public function getTitle() {
$parts = [];

Expand Down

0 comments on commit c6c0bc4

Please sign in to comment.