Improve search plugin and add support for UTF8 to Fuzz algorithm
This commit is contained in:
parent
b510c59bab
commit
7c8bad72c5
|
@ -116,7 +116,7 @@ EOF;
|
||||||
global $url;
|
global $url;
|
||||||
|
|
||||||
// Change the whereAmI to avoid load pages in the rule 69.pages
|
// Change the whereAmI to avoid load pages in the rule 69.pages
|
||||||
// This is only for performance propose
|
// This is only for performance purpose
|
||||||
$url->setWhereAmI('search');
|
$url->setWhereAmI('search');
|
||||||
|
|
||||||
// Get the string to search from the URL
|
// Get the string to search from the URL
|
||||||
|
@ -125,7 +125,6 @@ EOF;
|
||||||
|
|
||||||
// Search the string in the cache and get all pages with matches
|
// Search the string in the cache and get all pages with matches
|
||||||
$list = $this->search($stringToSearch);
|
$list = $this->search($stringToSearch);
|
||||||
|
|
||||||
$this->numberOfItems = count($list);
|
$this->numberOfItems = count($list);
|
||||||
|
|
||||||
// Split the content in pages
|
// Split the content in pages
|
||||||
|
@ -220,7 +219,7 @@ EOF;
|
||||||
// Inlcude Fuzz algorithm
|
// Inlcude Fuzz algorithm
|
||||||
require_once($this->phpPath().'vendors/fuzz.php');
|
require_once($this->phpPath().'vendors/fuzz.php');
|
||||||
$fuzz = new Fuzz($cache, 10, 1, true);
|
$fuzz = new Fuzz($cache, 10, 1, true);
|
||||||
$results = $fuzz->search($text, 3);
|
$results = $fuzz->search($text, 5);
|
||||||
|
|
||||||
return(array_keys($results));
|
return(array_keys($results));
|
||||||
}
|
}
|
||||||
|
|
|
@ -125,8 +125,8 @@ class Fuzz
|
||||||
{
|
{
|
||||||
$suffix = [];
|
$suffix = [];
|
||||||
$result = 0;
|
$result = 0;
|
||||||
$n = strlen($source);
|
$n = mb_strlen($source, CHARSET);
|
||||||
$m = strlen($target);
|
$m = mb_strlen($target, CHARSET);
|
||||||
|
|
||||||
for ($i = 0; $i <= $n; $i++) {
|
for ($i = 0; $i <= $n; $i++) {
|
||||||
for ($j = 0; $j <= $m; $j++) {
|
for ($j = 0; $j <= $m; $j++) {
|
||||||
|
@ -155,8 +155,8 @@ class Fuzz
|
||||||
public function getLevenshtein($source, $target)
|
public function getLevenshtein($source, $target)
|
||||||
{
|
{
|
||||||
$matrix = [];
|
$matrix = [];
|
||||||
$n = strlen($source);
|
$n = mb_strlen($source, CHARSET);
|
||||||
$m = strlen($target);
|
$m = mb_strlen($target, CHARSET);
|
||||||
|
|
||||||
if ($n === 0) {
|
if ($n === 0) {
|
||||||
return $m;
|
return $m;
|
||||||
|
@ -208,35 +208,35 @@ class Fuzz
|
||||||
$shorter;
|
$shorter;
|
||||||
$longer;
|
$longer;
|
||||||
|
|
||||||
if (strlen($first) > strlen($second)) {
|
if (mb_strlen($first, CHARSET) > mb_strlen($second, CHARSET)) {
|
||||||
$longer = strtolower($first);
|
$longer = mb_strtolower($first, CHARSET);
|
||||||
$shorter = strtolower($second);
|
$shorter = mb_strtolower($second, CHARSET);
|
||||||
} else {
|
} else {
|
||||||
$longer = strtolower($second);
|
$longer = mb_strtolower($second, CHARSET);
|
||||||
$shorter = strtolower($first);
|
$shorter = mb_strtolower($first, CHARSET);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get half the length distance of shorter string
|
// Get half the length distance of shorter string
|
||||||
$halfLen = intval((strlen($shorter) / 2) + 1);
|
$halfLen = intval((mb_strlen($shorter,CHARSET) / 2) + 1);
|
||||||
|
|
||||||
$match1 = $this->_getCharMatch($shorter, $longer, $halfLen);
|
$match1 = $this->_getCharMatch($shorter, $longer, $halfLen);
|
||||||
$match2 = $this->_getCharMatch($longer, $shorter, $halfLen);
|
$match2 = $this->_getCharMatch($longer, $shorter, $halfLen);
|
||||||
|
|
||||||
if ((strlen($match1) == 0 || strlen($match2) == 0)
|
if ((mb_strlen($match1, CHARSET) == 0 || mb_strlen($match2, CHARSET) == 0)
|
||||||
|| (strlen($match1) != strlen($match2))
|
|| (mb_strlen($match1, CHARSET) != mb_strlen($match2, CHARSET))
|
||||||
) {
|
) {
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
$trans = $this->_getTranspositions($match1, $match2);
|
$trans = $this->_getTranspositions($match1, $match2);
|
||||||
|
|
||||||
$distance = (strlen($match1) / strlen($shorter)
|
$distance = (mb_strlen($match1, CHARSET) / mb_strlen($shorter, CHARSET)
|
||||||
+ strlen($match2) / strlen($longer)
|
+ mb_strlen($match2, CHARSET) / mb_strlen($longer, CHARSET)
|
||||||
+ (strlen($match1) - $trans)
|
+ (mb_strlen($match1, CHARSET) - $trans)
|
||||||
/ strlen($match1)) / 3.0;
|
/ mb_strlen($match1, CHARSET)) / 3.0;
|
||||||
|
|
||||||
// Apply Winkler Adjustment
|
// Apply Winkler Adjustment
|
||||||
$prefixLen = min(strlen($this->_getPrefix($first, $second)), 4);
|
$prefixLen = min(mb_strlen($this->_getPrefix($first, $second),CHARSET), 4);
|
||||||
$jaroWinkler = round(($distance + (0.1 * $prefixLen * (1.0 - $distance))) * 100.0) / 100.0;
|
$jaroWinkler = round(($distance + (0.1 * $prefixLen * (1.0 - $distance))) * 100.0) / 100.0;
|
||||||
|
|
||||||
return $jaroWinkler;
|
return $jaroWinkler;
|
||||||
|
@ -255,8 +255,8 @@ class Fuzz
|
||||||
{
|
{
|
||||||
$common = '';
|
$common = '';
|
||||||
$copy = $second;
|
$copy = $second;
|
||||||
$firstLen = strlen($first);
|
$firstLen = mb_strlen($first, CHARSET);
|
||||||
$secondLen = strlen($second);
|
$secondLen = mb_strlen($second, CHARSET);
|
||||||
|
|
||||||
for ($i = 0; $i < $firstLen; $i++) {
|
for ($i = 0; $i < $firstLen; $i++) {
|
||||||
$char = $first[$i];
|
$char = $first[$i];
|
||||||
|
@ -285,7 +285,7 @@ class Fuzz
|
||||||
private function _getTranspositions($first, $second)
|
private function _getTranspositions($first, $second)
|
||||||
{
|
{
|
||||||
$trans = 0;
|
$trans = 0;
|
||||||
$firstLen = strlen($first);
|
$firstLen = mb_strlen($first, CHARSET);
|
||||||
|
|
||||||
for ($i = 0; $i < $firstLen; $i++) {
|
for ($i = 0; $i < $firstLen; $i++) {
|
||||||
if ($first[$i] != $second[$i]) {
|
if ($first[$i] != $second[$i]) {
|
||||||
|
@ -307,7 +307,7 @@ class Fuzz
|
||||||
*/
|
*/
|
||||||
private function _getPrefix($first, $second)
|
private function _getPrefix($first, $second)
|
||||||
{
|
{
|
||||||
if (strlen($first) == 0 || strlen($second) == 0) {
|
if (mb_strlen($first, CHARSET) == 0 || mb_strlen($second, CHARSET) == 0) {
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -317,7 +317,7 @@ class Fuzz
|
||||||
} elseif ($index == 0) {
|
} elseif ($index == 0) {
|
||||||
return '';
|
return '';
|
||||||
} else {
|
} else {
|
||||||
return substr($first, 0, $index);
|
return mb_substr($first, 0, $index, CHARSET);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -335,7 +335,7 @@ class Fuzz
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
$maxLen = min(strlen($first), strlen($second));
|
$maxLen = min(mb_strlen($first, CHARSET), mb_strlen($second, CHARSET));
|
||||||
for ($i = 0; $i < $maxLen; $i++) {
|
for ($i = 0; $i < $maxLen; $i++) {
|
||||||
if ($first[$i] != $second[$i]) {
|
if ($first[$i] != $second[$i]) {
|
||||||
return $i;
|
return $i;
|
||||||
|
|
Loading…
Reference in New Issue