Skip to content

Commit

Permalink
[4.0] Make the tuple length of com_finder configurable (#20384)
Browse files Browse the repository at this point in the history
* Make the tuple length in com_finder configurable

* Codestyle

* Comment fix

* Changing wording

* Changing wording for finder option

* Ordering the strings alphanumerically

* Updating comment

* Fixing search modifiers and renaming option

* Integrating proposals
  • Loading branch information
Hackwar authored and wilsonge committed Jul 18, 2018
1 parent 89d65ba commit 1c59a62
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 73 deletions.
11 changes: 11 additions & 0 deletions administrator/components/com_finder/config.xml
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,17 @@
description="COM_FINDER_FIELDSET_INDEX_OPTIONS_DESCRIPTION"
>

<field
name="tuplecount"
type="list"
label="COM_FINDER_CONFIG_TUPLECOUNT_LABEL"
default="1"
validate="options"
>
<option value="1">COM_FINDER_CONFIG_TUPLECOUNT_PHRASE_DISABLED</option>
<option value="3">COM_FINDER_CONFIG_TUPLECOUNT_PHRASE_ENABLED</option>
</field>

<field
name="batch_size"
type="list"
Expand Down
53 changes: 27 additions & 26 deletions administrator/components/com_finder/helpers/indexer/helper.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public static function parse($input, $format = 'html')
*/
public static function tokenize($input, $lang, $phrase = false)
{
static $cache;
static $cache, $tuplecount;
static $multilingual;
static $defaultLanguage;

Expand All @@ -68,6 +68,12 @@ public static function tokenize($input, $lang, $phrase = false)
return $cache[$store];
}

if (!$tuplecount)
{
$params = ComponentHelper::getParams('com_finder');
$tuplecount = $params->get('tuplecount', 1);
}

if (is_null($multilingual))
{
$multilingual = Multilanguage::isEnabled();
Expand Down Expand Up @@ -127,33 +133,28 @@ public static function tokenize($input, $lang, $phrase = false)
$tokens[] = new FinderIndexerToken($terms[$i], $language->language);
}

// Create two and three word phrase tokens from the individual words.
for ($i = 0, $n = count($tokens); $i < $n; $i++)
// Create multi-word phrase tokens from the individual words.
if ($tuplecount > 1)
{
// Setup the phrase positions.
$i2 = $i + 1;
$i3 = $i + 2;

// Create the two word phrase.
if ($i2 < $n && isset($tokens[$i2]))
for ($i = 0, $n = count($tokens); $i < $n; $i++)
{
// Tokenize the two word phrase.
$token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term), $language->language, $language->spacer);
$token->derived = true;

// Add the token to the stack.
$tokens[] = $token;
}

// Create the three word phrase.
if ($i3 < $n && isset($tokens[$i3]))
{
// Tokenize the three word phrase.
$token = new FinderIndexerToken(array($tokens[$i]->term, $tokens[$i2]->term, $tokens[$i3]->term), $language->language, $language->spacer);
$token->derived = true;

// Add the token to the stack.
$tokens[] = $token;
$temp = array($tokens[$i]->term);

// Create tokens for 2 to $tuplecount length phrases
for ($j = 1; $j < $tuplecount; $j++)
{
if ($i + $j >= $n || !isset($tokens[$i + $j]))
{
break;
}

$temp[] = $tokens[$i + $j]->term;
$token = new FinderIndexerToken($temp, $language->language, $language->spacer);
$token->derived = true;

// Add the token to the stack.
$tokens[] = $token;
}
}
}
}
Expand Down
74 changes: 29 additions & 45 deletions administrator/components/com_finder/helpers/indexer/query.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

defined('_JEXEC') or die;

use Joomla\CMS\Component\ComponentHelper;
use Joomla\Registry\Registry;
use Joomla\String\StringHelper;
use Joomla\Utilities\ArrayHelper;
Expand Down Expand Up @@ -738,11 +739,12 @@ protected function processDates($date1, $date2, $when1, $when2)
protected function processString($input, $lang, $mode)
{
// Clean up the input string.
$input = html_entity_decode($input, ENT_QUOTES, 'UTF-8');
$input = StringHelper::strtolower($input);
$input = preg_replace('#\s+#mi', ' ', $input);
$input = trim($input);
$debug = JFactory::getConfig()->get('debug_lang');
$input = html_entity_decode($input, ENT_QUOTES, 'UTF-8');
$input = StringHelper::strtolower($input);
$input = preg_replace('#\s+#mi', ' ', $input);
$input = trim($input);
$debug = JFactory::getConfig()->get('debug_lang');
$params = ComponentHelper::getParams('com_finder');

/*
* First, we need to handle string based modifiers. String based
Expand Down Expand Up @@ -900,51 +902,33 @@ protected function processString($input, $lang, $mode)

// Get the number of words in the phrase.
$parts = explode(' ', $match);
$tuplecount = $params->get('tuplecount', 1);

// Check if the phrase is longer than three words.
if (count($parts) > 3)
// Check if the phrase is longer than our $tuplecount.
if (count($parts) > $tuplecount && $tuplecount > 1)
{
$chunk = array_slice($parts, 0, $tuplecount);
$parts = array_slice($parts, $tuplecount);

// If the chunk is not empty, add it as a phrase.
if (count($chunk))
{
$phrases[] = implode(' ', $chunk);
$terms[] = implode(' ', $chunk);
}

/*
* If the phrase is longer than three words, we need to
* If the phrase is longer than $tuplecount words, we need to
* break it down into smaller chunks of phrases that
* are less than or equal to three words. We overlap
* are less than or equal to $tuplecount words. We overlap
* the chunks so that we can ensure that a match is
* found for the complete phrase and not just portions
* of it.
*/
for ($i = 0, $c = count($parts); $i < $c; $i += 2)
for ($i = 0, $c = count($parts); $i < $c; $i++)
{
// Set up the chunk.
$chunk = array();

// The chunk has to be assembled based on how many
// pieces are available to use.
switch ($c - $i)
{
/*
* If only one word is left, we can break from
* the switch and loop because the last word
* was already used at the end of the last
* chunk.
*/
case 1:
break 2;

// If there words are left, we use them both as
// the last chunk of the phrase and we're done.
case 2:
$chunk[] = $parts[$i];
$chunk[] = $parts[$i + 1];
break;

// If there are three or more words left, we
// build a three word chunk and continue on.
default:
$chunk[] = $parts[$i];
$chunk[] = $parts[$i + 1];
$chunk[] = $parts[$i + 2];
break;
}
array_shift($chunk);
$chunk[] = array_shift($parts);

// If the chunk is not empty, add it as a phrase.
if (count($chunk))
Expand All @@ -956,7 +940,7 @@ protected function processString($input, $lang, $mode)
}
else
{
// The phrase is <= 3 words so we can use it as is.
// The phrase is <= $tuplecount words so we can use it as is.
$phrases[] = $match;
$terms[] = $match;
}
Expand Down Expand Up @@ -1050,7 +1034,7 @@ protected function processString($input, $lang, $mode)
{
// Tokenize the current term.
$token = FinderIndexerHelper::tokenize($terms[$i], $lang, true);
$token = $this->getTokenData($token);
$token = $this->getTokenData(array_shift($token));

// Set the required flag.
$token->required = false;
Expand All @@ -1071,7 +1055,7 @@ protected function processString($input, $lang, $mode)

// Tokenize the term after the next term (current plus two).
$other = FinderIndexerHelper::tokenize($terms[$i + 2], $lang, true);
$other = $this->getTokenData($other);
$other = $this->getTokenData(array_shift($other));

// Set the required flag.
$other->required = false;
Expand Down Expand Up @@ -1116,7 +1100,7 @@ protected function processString($input, $lang, $mode)

// Tokenize the next term (current plus one).
$other = FinderIndexerHelper::tokenize($terms[$i + 1], $lang, true);
$other = $this->getTokenData($other);
$other = $this->getTokenData(array_shift($other));

// Set the required flag.
$other->required = false;
Expand Down
3 changes: 3 additions & 0 deletions administrator/language/en-GB/en-GB.com_finder.ini
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ COM_FINDER_CONFIG_TEXT_MULTIPLIER_DESCRIPTION="The multiplier is used to control
COM_FINDER_CONFIG_TEXT_MULTIPLIER_LABEL="Body Text Weight Multiplier"
COM_FINDER_CONFIG_TITLE_MULTIPLIER_DESCRIPTION="The multiplier is used to control how much influence matching text has on the overall relevance score of a search result. A multiplier is considered in relationship to the other multipliers. The title text comes from the title of the content."
COM_FINDER_CONFIG_TITLE_MULTIPLIER_LABEL="Title Text Weight Multiplier"
COM_FINDER_CONFIG_TUPLECOUNT_LABEL="Search for Phrases"
COM_FINDER_CONFIG_TUPLECOUNT_PHRASE_DISABLED="Disabled (Improved performance)"
COM_FINDER_CONFIG_TUPLECOUNT_PHRASE_ENABLED="Enabled (Improved search results)"
COM_FINDER_CONFIGURATION="Smart Search: Options"
COM_FINDER_CREATE_FILTER="Create a filter."
COM_FINDER_EDIT_FILTER="Edit Filter"
Expand Down
9 changes: 8 additions & 1 deletion components/com_finder/tmpl/search/default_form.php
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,14 @@
<?php if ($this->params->get('show_advanced_tips', 1)) : ?>
<div class="com-finder__tips card card-outline-secondary mb-3">
<div class="card-body">
<?php echo JText::_('COM_FINDER_ADVANCED_TIPS'); ?>
<?php echo JText::_('COM_FINDER_ADVANCED_TIPS_INTRO'); ?>
<?php echo JText::_('COM_FINDER_ADVANCED_TIPS_AND'); ?>
<?php echo JText::_('COM_FINDER_ADVANCED_TIPS_NOT'); ?>
<?php echo JText::_('COM_FINDER_ADVANCED_TIPS_OR'); ?>
<?php if ($this->params->get('tuplecount', 1) > 1) : ?>
<?php echo JText::_('COM_FINDER_ADVANCED_TIPS_PHRASE'); ?>
<?php endif; ?>
<?php echo JText::_('COM_FINDER_ADVANCED_TIPS_OUTRO'); ?>
</div>
</div>
<?php endif; ?>
Expand Down
7 changes: 6 additions & 1 deletion language/en-GB/en-GB.com_finder.ini
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@

COM_FINDER="Smart Search"
COM_FINDER_ADVANCED_SEARCH_TOGGLE="Advanced Search"
COM_FINDER_ADVANCED_TIPS="<p>Here are a few examples of how you can use the search feature:</p><p>Entering <strong>this and that</strong> into the search form will return results with both &quot;this&quot; and &quot;that&quot;.</p><p>Entering <strong>this not that</strong> into the search form will return results with &quot;this&quot; and not &quot;that&quot;.</p><p>Entering <strong>this or that</strong> into the search form will return results with either &quot;this&quot; or &quot;that&quot;.</p><p>Entering <strong>&quot;this and that&quot;</strong> (with quotes) into the search form will return results with the exact phrase &quot;this and that&quot;.</p><p>Search results can also be filtered using a variety of criteria. Select one or more filters below to get started.</p>"
COM_FINDER_ADVANCED_TIPS_INTRO="<p>Here are a few examples of how you can use the search feature:</p>"
COM_FINDER_ADVANCED_TIPS_AND="<p>Entering <strong>this and that</strong> into the search form will return results containing both &quot;this&quot; and &quot;that&quot;.</p>"
COM_FINDER_ADVANCED_TIPS_NOT="<p>Entering <strong>this not that</strong> into the search form will return results containing &quot;this&quot; and not &quot;that&quot;.</p>"
COM_FINDER_ADVANCED_TIPS_OR="<p>Entering <strong>this or that</strong> into the search form will return results containing either &quot;this&quot; or &quot;that&quot;.</p>"
COM_FINDER_ADVANCED_TIPS_PHRASE="<p>Entering <strong>&quot;this and that&quot;</strong> (with quotes) into the search form will return results containing the exact phrase &quot;this and that&quot;.</p>"
COM_FINDER_ADVANCED_TIPS_OUTRO="<p>Search results can also be filtered using a variety of criteria. Select one or more filters below to get started.</p>"
COM_FINDER_DEFAULT_PAGE_TITLE="Search Results"
COM_FINDER_FILTER_BRANCH_LABEL="Search by %s"
COM_FINDER_FILTER_DATE_BEFORE="Before"
Expand Down

0 comments on commit 1c59a62

Please sign in to comment.