-
Notifications
You must be signed in to change notification settings - Fork 4
/
40-PicoSearch.php
181 lines (153 loc) · 6.16 KB
/
40-PicoSearch.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
<?php
/**
*
* @author Pontus Horn
* @link https://pontushorn.me
* @repository https://github.com/PontusHorn/Pico-Search
* @license http://opensource.org/licenses/MIT
*/
class PicoSearch extends AbstractPicoPlugin
{
private $search_area;
private $search_terms;
/**
* Parses the requested URL to determine if a search has been requested. The search may be
* scoped to a folder. An example URL: yourdomain.com/blog/search/foobar/page/2,
* which searches the /blog folder for "foobar" and shows the second page of results using
* e.g. https://github.com/rewdy/Pico-Pagination.
*
* @see Pico::getBaseUrl()
* @see Pico::getRequestUrl()
* @param string &$url request URL
* @return void
*/
public function onRequestUrl(&$url)
{
// If form was submitted without being intercepted by JavaScript, redirect to the canonical search URL.
if (preg_match('~^(.+/)?search$~', $url) && isset($_GET['q'])) {
header('Location: ' . $this->getPico()->getBaseUrl() . $url . '/' . urlencode($_GET['q']));
exit;
}
if (preg_match('~^(.+/)?search/([^/]+)(/.+)?$~', $url, $matches)) {
$this->search_terms = urldecode($matches[2]);
if (!empty($matches[1])) {
$this->search_area = $matches[1];
}
}
}
/**
* If accessing search results, {@link Pico::discoverRequestFile()} will have failed since
* the search terms are included in the URL but do not map to a file. This method takes care
* of finding the appropriate file.
*
* @see Pico::discoverRequestFile()
* @param string &$file request file
* @return void
*/
public function onRequestFile(&$file)
{
if ($this->search_terms) {
$pico = $this->getPico();
$folder = '';
// Aggressively strip out any ./ or ../ parts from the search area before using it
// as the folder to look in. Should already be taken care of previously, but just
// as a safeguard to make sure nothing slips through the cracks.
if ($this->search_area) {
$folder = str_replace('\\', '/', $this->search_area);
$folder = preg_replace('~\.+/~', '', $folder);
}
$temp_file = $pico->getConfig('content_dir') . $folder . 'search' . $pico->getConfig('content_ext');
if (file_exists($temp_file)) {
$file = $temp_file;
}
}
}
/**
* Filter the input array to pages matching the search terms
* and sort them by relevance.
*
* @param array $pages data of all known pages
* @return array filtered and sorted pages
*/
public function applySearch($pages)
{
if (!isset($this->search_area) && !isset($this->search_terms)) {
return array();
}
if (isset($this->search_area)) {
$pages = array_filter($pages, function ($page) {
return substr($page['id'], 0, strlen($this->search_area)) === $this->search_area;
});
}
$pico = $this->getPico();
$excludes = $pico->getConfig('search_excludes');
if (!empty($excludes)) {
foreach ($excludes as $exclude_path) {
unset($pages[$exclude_path]);
}
}
if (isset($this->search_terms)) {
$pages = array_map(function ($page) {
$page['search_rank'] = $this->getSearchRankForPage($page);
return $page;
}, $pages);
$pages = array_filter($pages, function ($page) {
return $page['search_rank'] > 0;
});
uasort($pages, function ($a, $b) {
if ($a['search_rank'] == $b['search_rank']) {
return 0;
}
return $a['search_rank'] > $b['search_rank'] ? -1 : 1;
});
}
return $pages;
}
public function getSearchRankForPage($page) {
// If there's an exact match in the title, skip a bunch of work and give it a very high score
$escaped_search_terms = preg_quote($this->search_terms, '/');
if (preg_match("/\b$escaped_search_terms\b/iu", $page['title']) === 1) {
return 5;
}
$searchTerms = preg_split('/\s+/', $this->search_terms);
$keyTerms = array_filter($searchTerms, function ($searchTerm) {
return !$this->isLowValueWord($searchTerm);
});
// Only search through key terms if any exist
if (!empty($keyTerms)) {
$searchTerms = $keyTerms;
}
return array_sum(
array_map(
function ($searchTerm) use ($page) {
return $this->getSearchRankForString($searchTerm, $page['title']) +
$this->getSearchRankForString($searchTerm, $page['raw_content']) * 0.2;
},
$searchTerms
)
);
}
public function getSearchRankForString($searchTerm, $content) {
$searchTermValue = $this->isLowValueWord($searchTerm) ? 0.2 : 1;
$escapedSearchTerm = preg_quote($searchTerm, '/');
$fullWordMatches = preg_match_all("/\b$escapedSearchTerm\b/iu", $content);
if ($fullWordMatches > 0) {
return min($fullWordMatches, 3) * $searchTermValue;
}
$startOfWordMatches = preg_match_all("/\b$escapedSearchTerm\B/iu", $content);
if ($startOfWordMatches > 0) {
return min($startOfWordMatches, 3) * 0.5 * $searchTermValue;
}
$inWordMatches = preg_match_all("/\B$escapedSearchTerm\B/iu", $content);
return min($inWordMatches, 3) * 0.05 * $searchTermValue;
}
public function isLowValueWord($word) {
return in_array(mb_strtolower($word), $this->getPluginConfig('low_value_words') ?: []);
}
public function onPageRendering(&$twig, &$twigVariables, &$templateName) {
$twigVariables['search_terms'] = $this->search_terms;
}
public function onTwigRegistration() {
$this->getPico()->getTwig()->addFilter(new \Twig\TwigFilter('apply_search', [$this, 'applySearch']));
}
}