Skip to content
This repository has been archived by the owner on Dec 25, 2022. It is now read-only.

Changed way to calculate score to php #38

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@

"elasticsearch/elasticsearch": "^2.0",
"ramsey/uuid": "^3.5",
"doctrine/dbal": "^2.5"
"doctrine/dbal": "^2.5",
"symfony/property-access": "^3.3"
},
"require-dev": {
"doctrine/doctrine-bundle": "^1.6",
Expand Down
2 changes: 2 additions & 0 deletions src/Component/Client/IndexInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,6 @@ public function search(Search $search, $type);
* @return array
*/
public function get($type, $id);

public function optimize();
}
8 changes: 8 additions & 0 deletions src/Component/Elasticsearch/ElasticsearchIndex.php
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,12 @@ public function get($type, $id)

return $response;
}

/**
* {@inheritdoc}
*/
public function optimize()
{
// nothing to optimize currently.
}
}
4 changes: 2 additions & 2 deletions src/Component/Pucene/Compiler/Element/CompositeElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

class CompositeElement extends BaseElement
{
const OR = 'or';
const AND = 'and';
const OPERATOR_OR = 'or';
const OPERATOR_AND = 'and';

/**
* @var string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ public function visit(QueryInterface $query, StorageInterface $storage)
}

if (count($andElements) === 0) {
return new CompositeElement(CompositeElement:: OR, $shouldElements);
return new CompositeElement(CompositeElement::OPERATOR_OR, $shouldElements);
}

return new BoolElement(
new CompositeElement(CompositeElement:: AND, $andElements),
new CompositeElement(CompositeElement::OPERATOR_AND, $andElements),
array_merge($mustElements, $shouldElements)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ public function visit(QueryInterface $query, StorageInterface $storage)
$terms[] = new TermElement($query->getField(), $token->getEncodedTerm());
}

return new CompositeElement(CompositeElement:: OR, $terms);
return new CompositeElement(CompositeElement:: OPERATOR_OR, $terms);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ public function visit(QueryInterface $query, StorageInterface $storage)

$mustNotElements = $this->getMustNotElements($query->getLike());
if (0 === count($mustNotElements)) {
return new CompositeElement(CompositeElement:: OR, $elements);
return new CompositeElement(CompositeElement:: OPERATOR_OR, $elements);
}

return new BoolElement(
new CompositeElement(
CompositeElement:: AND,
CompositeElement:: OPERATOR_AND,
[
new CompositeElement(CompositeElement:: AND, $mustNotElements),
new CompositeElement(CompositeElement:: OR, $elements),
new CompositeElement(CompositeElement:: OPERATOR_AND, $mustNotElements),
new CompositeElement(CompositeElement:: OPERATOR_OR, $elements),
]
),
$elements
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ public function visit(QueryInterface $query, StorageInterface $storage)
return $ids;
}

return new CompositeElement(CompositeElement:: AND, [$ids, new TypeElement($query->getType())]);
return new CompositeElement(CompositeElement:: OPERATOR_AND, [$ids, new TypeElement($query->getType())]);
}
}
5 changes: 5 additions & 0 deletions src/Component/Pucene/Dbal/DbalStorage.php
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ public function termStatistics()
return new DbalTermStatistics($this->connection, $this->getSchema());
}

public function optimize()
{
$this->persister->optimize();
}

public function getConnection()
{
return $this->connection;
Expand Down
59 changes: 50 additions & 9 deletions src/Component/Pucene/Dbal/DocumentPersister.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ class DocumentPersister
/**
* @var Connection
*/
private $connection;
public $connection;

/**
* @var PuceneSchema
*/
private $schema;
public $schema;

/**
* @param Connection $connection
Expand Down Expand Up @@ -56,21 +56,51 @@ public function persist(Document $document, array $fields)
$token->getEncodedTerm(),
ElasticsearchPrecision::fieldNorm($field->getNumberOfTerms())
);

$this->connection->createQueryBuilder()
->update($this->schema->getDocumentTermsTableName())
->set('frequency', 'frequency + 1')
->andWhere('field_name = :fieldName')
->andWhere('term = :term')
->setParameter('fieldName', $field->getName())
->setParameter('term', $token->getEncodedTerm())
->execute();
}

// update term frequency
foreach ($fieldTerms as $term => $frequency) {
$this->connection->update(
$this->schema->getDocumentTermsTableName(),
[
'term_frequency' => $frequency,
],
['document_id' => $document->getId(), 'field_name' => $field->getName(), 'term' => $term]
);
$this->connection->createQueryBuilder()
->update($this->schema->getDocumentTermsTableName())
->set('term_frequency', sqrt($frequency))
->set('score', 'field_norm * ' . sqrt($frequency))
->andWhere('document_ID = :document')
->andWhere('field_name = :fieldName')
->andWhere('term = :term')
->setParameter('document', $document->getId())
->setParameter('fieldName', $field->getName())
->setParameter('term', $term)
->execute();
}
}
}

public function optimize()
{
// TODO recalculate term frequency

$docCount = $this->connection->createQueryBuilder()
->select('COUNT(id)')
->from($this->schema->getDocumentsTableName())
->execute()
->fetchColumn();

// calculate inverse-document-frequency
$this->connection->createQueryBuilder()
->update($this->schema->getDocumentTermsTableName())
->set('idf', '1 + log(' . $docCount . ' / (frequency + 1))')
->execute();
}

/**
* @param Document $document
*/
Expand Down Expand Up @@ -101,13 +131,24 @@ protected function insertDocument(Document $document)
*/
protected function insertToken(string $documentId, string $fieldName, $term, $fieldNorm)
{
$frequency = $this->connection->createQueryBuilder()
->select('frequency')
->from($this->schema->getDocumentTermsTableName())
->andWhere('field_name = :fieldName')
->andWhere('term = :term')
->setParameter('fieldName', $fieldName)
->setParameter('term', $term)
->execute()
->fetchColumn();

$this->connection->insert(
$this->schema->getDocumentTermsTableName(),
[
'document_id' => $documentId,
'field_name' => $fieldName,
'term' => $term,
'field_norm' => $fieldNorm,
'frequency' => $frequency ?: 0,
]
);
}
Expand Down
35 changes: 16 additions & 19 deletions src/Component/Pucene/Dbal/Interpreter/DbalInterpreter.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,17 @@
use Pucene\Component\Pucene\Model\Document;
use Pucene\Component\QueryBuilder\Search;
use Pucene\Component\QueryBuilder\Sort\IdSort;
use Pucene\Component\QueryBuilder\Sort\ScoreSort;
use Pucene\Component\Symfony\Pool\PoolInterface;
use Pucene\Component\Utils\SortUtils;

class DbalInterpreter
{
public static $sortPaths = [
ScoreSort::class => 'score',
IdSort::class => 'id',
];

/**
* @var PoolInterface
*/
Expand Down Expand Up @@ -42,8 +49,6 @@ public function interpret(array $types, Search $search, DbalStorage $storage, El
->select('document.*')
->from($schema->getDocumentsTableName(), 'document')
->where('document.type IN (?)')
->setMaxResults($search->getSize())
->setFirstResult($search->getFrom())
->setParameter(0, implode(',', $types));

/** @var InterpreterInterface $interpreter */
Expand All @@ -54,21 +59,6 @@ public function interpret(array $types, Search $search, DbalStorage $storage, El
}

$scoringAlgorithm = new ScoringAlgorithm($queryBuilder, $schema, $this->interpreterPool);
$expression = $interpreter->scoring($element, $scoringAlgorithm);

if ($expression) {
$queryBuilder->addSelect('(' . $expression . ') as score')->orderBy('score', 'desc');
} else {
$queryBuilder->addSelect('1 as score');
}

if (0 < count($search->getSorts())) {
foreach ($search->getSorts() as $sort) {
if ($sort instanceof IdSort) {
$queryBuilder->addOrderBy('id', $sort->getOrder());
}
}
}

$result = [];
foreach ($queryBuilder->execute()->fetchAll() as $row) {
Expand All @@ -77,10 +67,17 @@ public function interpret(array $types, Search $search, DbalStorage $storage, El
$row['type'],
$storage->getName(),
json_decode($row['document'], true),
array_key_exists('score', $row) ? (float) $row['score'] : 1
$interpreter->newScoring($element, $scoringAlgorithm, $row)
);
}

return $result;
$paths = [];
foreach ($search->getSorts() as $sort) {
$paths[] = self::$sortPaths[get_class($sort)];
}

$result = SortUtils::multisort($result, $paths);

return array_splice($result, $search->getFrom(), $search->getSize());
}
}
40 changes: 40 additions & 0 deletions src/Component/Pucene/Dbal/Interpreter/Element/BoolInterpreter.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ public function __construct(PoolInterface $interpreterPool)
*/
public function interpret(ElementInterface $element, PuceneQueryBuilder $queryBuilder)
{
foreach ($element->getScoringElements() as $innerElement) {
$this->getInterpreter($innerElement)->interpret($innerElement, $queryBuilder);
}

return $this->getInterpreter($element->getElement())->interpret($element->getElement(), $queryBuilder);
}

Expand Down Expand Up @@ -74,6 +78,42 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
);
}

/**
* {@inheritdoc}
*
* @param BoolElement $element
*/
public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
if (count($element->getScoringElements()) === 0 || $element->getBoost() === 0) {
return 0;
} elseif (count($element->getScoringElements()) === 1) {
$innerElement = $element->getScoringElements()[0];
$interpreter = $this->interpreterPool->get(get_class($innerElement));

return $interpreter->newScoring($innerElement, $scoring, $row);
}

if (!$queryNorm) {
$queryNorm = $scoring->queryNorm($this->getTerms($element->getScoringElements()));
}

$score = 0;
$coord = 0;
foreach ($element->getScoringElements() as $innerElement) {
/** @var InterpreterInterface $interpreter */
$interpreter = $this->interpreterPool->get(get_class($innerElement));

$score += $interpreter->newScoring($innerElement, $scoring, $row, $queryNorm);

if ($interpreter->matches($innerElement, $row)) {
$coord += 1 / count($element->getScoringElements());
}
}

return $score * $coord * $element->getBoost();
}

private function getTerms(array $elements)
{
$terms = [];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public function interpret(ElementInterface $element, PuceneQueryBuilder $queryBu
$expr = $queryBuilder->expr();

$expression = $expr->orX();
if ($element->getOperator() === CompositeElement:: AND) {
if ($element->getOperator() === CompositeElement:: OPERATOR_AND) {
$expression = $expr->andX();
}

Expand All @@ -53,4 +53,35 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
{
return parent::scoring(new BoolElement($element, $element->getElements()), $scoring, $queryNorm);
}

/**
* {@inheritdoc}
*
* @param CompositeElement $element
*/
public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
return parent::newScoring(new BoolElement($element, $element->getElements()), $scoring, $row, $queryNorm);
}

/**
* {@inheritdoc}
*
* @param CompositeElement $element
*/
public function matches(ElementInterface $element, array $row)
{
foreach ($element->getElements() as $innerElement) {
$interpreter = $this->interpreterPool->get(get_class($innerElement));
if ($interpreter->matches($innerElement, $row)) {
if ($element->getOperator() === CompositeElement::OPERATOR_OR) {
return true;
}
} elseif ($element->getOperator() === CompositeElement::OPERATOR_AND) {
return false;
}
}

return $element->getOperator() === CompositeElement::OPERATOR_AND;
}
}
10 changes: 10 additions & 0 deletions src/Component/Pucene/Dbal/Interpreter/Element/IdsInterpreter.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,14 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
{
return (new MathExpressionBuilder())->value(1);
}

public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
return 1;
}

public function matches(ElementInterface $element, array $row)
{
return in_array($row['id'], $element->getIds());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,9 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
{
return (new MathExpressionBuilder())->value(1);
}

public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
return 1;
}
}
Loading