Skip to content

Commit

Permalink
Merge pull request #193 from tpwd/feature/issue-19-word-delimiter
Browse files Browse the repository at this point in the history
[FEATURE] Additional word characters
  • Loading branch information
christianbltr authored Oct 20, 2023
2 parents 3fcfb41 + b4559c3 commit 5f9e6e1
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 10 deletions.
1 change: 1 addition & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
ChangeLog

Upcoming version
[FEATURE] Additional word characters: Allows searching for words containing e.g. dots or hyphens, https://github.com/tpwd/ke_search/issues/19
[TASK] Exclude all parameters (including filters) from cHash calculation, thanks to Uwe Hawkeye1909, https://github.com/tpwd/ke_search/issues/187
[BUGFIX] Allow to use 'loadFlexformsFromOtherCE' parameter for TypoScript-instantiated plugins, thanks to Garvin Hicking, https://github.com/tpwd/ke_search/pull/152
[BUGFIX] Avoid undefined array key "sortWithoutSearchword", "resultsPerPage" and "noResultsText", https://github.com/tpwd/ke_search/issues/151
Expand Down
13 changes: 13 additions & 0 deletions Classes/Indexer/IndexerRunner.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
use Tpwd\KeSearch\Event\ModifyFieldValuesBeforeStoringEvent;
use Tpwd\KeSearch\Lib\Db;
use Tpwd\KeSearch\Lib\SearchHelper;
use Tpwd\KeSearch\Utility\AdditionalWordCharactersUtility;
use TYPO3\CMS\Core\Log\Logger;
use TYPO3\CMS\Core\Log\LogManager;
use TYPO3\CMS\Core\Mail\MailMessage;
Expand Down Expand Up @@ -695,6 +696,18 @@ public function storeInIndex(
$tags = StringUtility::uniqueList($tags);
}

// Get additional content for additional word characters
$additionalContent = AdditionalWordCharactersUtility::getAdditionalContent($content);
if (!empty($additionalContent)) {
if (!isset($additionalFields['hidden_content'])) {
$additionalFields['hidden_content'] = '';
}
if (!empty($additionalFields['hidden_content'])) {
$additionalFields['hidden_content'] .= ' ';
}
$additionalFields['hidden_content'] .= $additionalContent;
}

$table = 'tx_kesearch_index';
$fieldValues = $this->createFieldValuesForIndexing(
$storagePid,
Expand Down
17 changes: 15 additions & 2 deletions Classes/Lib/Searchphrase.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/

use Tpwd\KeSearch\Utility\AdditionalWordCharactersUtility;
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
use TYPO3\CMS\Core\Utility\GeneralUtility;

Expand Down Expand Up @@ -67,6 +68,7 @@ public function buildSearchPhrase()
$cleanSearchStringParts[$key] = $part;
}
}
$searchStringParts = $this->explodeSearchPhrase($searchString, true);

$searchArray = [
'sword' => implode(' ', $cleanSearchStringParts), // f.e. hello karl-heinz +mueller
Expand Down Expand Up @@ -96,11 +98,14 @@ public function checkAgainstDefaultValue($searchString)
}

/**
* explode search string and remove too short words
* Explode search string and remove too short words. Additionaly add modifiers for in-word search and optionally
* replace additional word characters.
*
* @param string $searchString
* @param bool $replaceAdditionalWordCharacters
* @return array
*/
public function explodeSearchPhrase($searchString)
public function explodeSearchPhrase(string $searchString, bool $replaceAdditionalWordCharacters = false)
{
preg_match_all('/([+\-~<>])?\".*?"|[^ ]+/', $searchString, $matches);
list($searchParts) = $matches;
Expand Down Expand Up @@ -129,6 +134,14 @@ public function explodeSearchPhrase($searchString)
unset($searchParts[$key]);
}
}

// Replace additional word characters
if ($replaceAdditionalWordCharacters) {
foreach ($searchParts as $key => $word) {
$searchParts[$key] = AdditionalWordCharactersUtility::replaceAdditionalWordCharacters($word);
}
}

foreach ($searchParts as $key => $word) {
if ($word != '|') {
// Enable partial word search (default: on) and in-word-search (Sphinx-based or native).
Expand Down
68 changes: 68 additions & 0 deletions Classes/Utility/AdditionalWordCharactersUtility.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php

declare(strict_types=1);

namespace Tpwd\KeSearch\Utility;

use Tpwd\KeSearch\Lib\SearchHelper;

class AdditionalWordCharactersUtility
{
public static function getAdditionalWordCharacters(): array
{
$extConf = SearchHelper::getExtConf();
$additionalWordCharacters = [];
if (!empty($extConf['additionalWordCharacters'] ?? '')) {
foreach (str_split($extConf['additionalWordCharacters']) as $char) {
$additionalWordCharacters[] = $char;
}
}
return $additionalWordCharacters;
}

public static function getAdditionalContent(string $content): string
{
$additionalWordCharacters = self::getAdditionalWordCharacters();
if (empty($additionalWordCharacters)) {
return '';
}
$additionalContent = '';
foreach ($additionalWordCharacters as $additionalWordCharacter) {
$matches = [];
$pattern = '/(?=(?:[^\s]*[' . $additionalWordCharacter . ']){1,})\S+/';
preg_match($pattern, $content, $matches);
if ($matches) {
if (!empty($additionalContent)) {
$additionalContent .= ' ';
}
$additionalContent .= str_replace(
$additionalWordCharacter,
self::getReplacementForAdditionalWordCharacter($additionalWordCharacter),
implode(' ', $matches)
);
}
}
return $additionalContent;
}

public static function replaceAdditionalWordCharacters(string $content): string
{
$additionalWordCharacters = self::getAdditionalWordCharacters();
if (empty($additionalWordCharacters)) {
return '';
}
foreach ($additionalWordCharacters as $additionalWordCharacter) {
$content = str_replace(
$additionalWordCharacter,
self::getReplacementForAdditionalWordCharacter($additionalWordCharacter),
$content
);
}
return $content;
}

public static function getReplacementForAdditionalWordCharacter(string $character): string
{
return '___' . ord($character) . '___';
}
}
30 changes: 30 additions & 0 deletions Documentation/Configuration/AdditionalWordCharacters.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
.. include:: /Includes.rst.txt

.. _configuration-additional-word-characters:

==========================
Additional word characters
==========================

By default MySQL treats certain characters as word delimiters, e.g. dot (".")
and hyphen ("-"). That means words which contain one of these characters will be
treated as two words and it is not possible to search for such a word.

But in some cases it would be helpful to be able to search for such words, e.g.
serial numbers which contain a hyphen, e.g. "AB-123".

Since version 5.1.0 it is possible to make this words searchable. Go to the
extension settings and add the desired characters in "Additional word
characters". You can add multiple characters there.

.. figure:: /Images/Configuration/additional-word-characters-configuration.png
:alt: Configure additional word characters
:class: with-border

After that you will have to start the indexer.

Words containing the configured characters can then be searched.

.. figure:: /Images/Configuration/additional-word-characters-result.png
:alt: Search result with additional word characters
:class: with-border
17 changes: 9 additions & 8 deletions Documentation/Configuration/Index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@ The extension can be configured via the plugin settings (FlexForm), TypoScript a
:titlesonly:
:glob:

SearchWordLength
Sorting
RoutingSpeakingUrls
HighlightedWord
SearchWordParameter
Avoid404Error
AdditionalWordCharacters
AllowOnlyKeSearchRecords
ReducingFiltersBackend
OverrideRecordStoragePage
Avoid404Error
HighlightedWord
Notes
OverrideRecordStoragePage
ReducingFiltersBackend
RoutingSpeakingUrls
SearchWordLength
SearchWordParameter
Sorting
StorageEngine
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 3 additions & 0 deletions ext_conf_template.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ enableExplicitAnd = 0
# cat=basic//50; type=boolean; label=Allow empty search:If enabled, a search will be executed even if no searchword or filter is given. This will render a list of all records available in the index. Please note: There's also the setting "Show text instead of searchresults if no filter or searchword has been given" in the plugin settings which has to be disabled to see the full list of all index records. If set to false no query is executed when loading a page with a result list and an empty search, this will give a better performance.
allowEmptySearch = 1

# cat=basic//60; type=string; label=Additional word characters:Characters which should be treated as word characters instead of word delimiters, e.g. if you want to search for serial numbers which have dots in it. Does not support unicode (e.g. UTF-8). Does not support space character. Example: ".-," (without the quotes).
additionalWordCharacters =

# cat=notification//10; type=boolean; label= Send notification when finished: If activated, a notification email will be sent when indexing process is finished in CLI / scheduler mode.
finishNotification = 0

Expand Down

0 comments on commit 5f9e6e1

Please sign in to comment.