Skip to content
This repository has been archived by the owner on Dec 25, 2022. It is now read-only.

Commit

Permalink
tmp
Browse files Browse the repository at this point in the history
  • Loading branch information
wachterjohannes committed Jun 4, 2017
1 parent 3371b15 commit b78fda0
Show file tree
Hide file tree
Showing 19 changed files with 305 additions and 35 deletions.
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@

"elasticsearch/elasticsearch": "^2.0",
"ramsey/uuid": "^3.5",
"doctrine/dbal": "^2.5"
"doctrine/dbal": "^2.5",
"symfony/property-access": "^3.3"
},
"require-dev": {
"doctrine/doctrine-bundle": "^1.6",
Expand Down
4 changes: 2 additions & 2 deletions src/Component/Pucene/Compiler/Element/CompositeElement.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

class CompositeElement extends BaseElement
{
const OR = 'or';
const AND = 'and';
const OPERATOR_OR = 'or';
const OPERATOR_AND = 'and';

/**
* @var string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ public function visit(QueryInterface $query, StorageInterface $storage)
}

if (count($andElements) === 0) {
return new CompositeElement(CompositeElement:: OR, $shouldElements);
return new CompositeElement(CompositeElement::OPERATOR_OR, $shouldElements);
}

return new BoolElement(
new CompositeElement(CompositeElement:: AND, $andElements),
new CompositeElement(CompositeElement::OPERATOR_AND, $andElements),
array_merge($mustElements, $shouldElements)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ public function visit(QueryInterface $query, StorageInterface $storage)
$terms[] = new TermElement($query->getField(), $token->getEncodedTerm());
}

return new CompositeElement(CompositeElement:: OR, $terms);
return new CompositeElement(CompositeElement:: OPERATOR_OR, $terms);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ public function visit(QueryInterface $query, StorageInterface $storage)

$mustNotElements = $this->getMustNotElements($query->getLike());
if (0 === count($mustNotElements)) {
return new CompositeElement(CompositeElement:: OR, $elements);
return new CompositeElement(CompositeElement:: OPERATOR_OR, $elements);
}

return new BoolElement(
new CompositeElement(
CompositeElement:: AND,
CompositeElement:: OPERATOR_AND,
[
new CompositeElement(CompositeElement:: AND, $mustNotElements),
new CompositeElement(CompositeElement:: OR, $elements),
new CompositeElement(CompositeElement:: OPERATOR_AND, $mustNotElements),
new CompositeElement(CompositeElement:: OPERATOR_OR, $elements),
]
),
$elements
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ public function visit(QueryInterface $query, StorageInterface $storage)
return $ids;
}

return new CompositeElement(CompositeElement:: AND, [$ids, new TypeElement($query->getType())]);
return new CompositeElement(CompositeElement:: OPERATOR_AND, [$ids, new TypeElement($query->getType())]);
}
}
2 changes: 1 addition & 1 deletion src/Component/Pucene/Dbal/DocumentPersister.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ public function persist(Document $document, array $fields)
$this->connection->update(
$this->schema->getDocumentTermsTableName(),
[
'term_frequency' => $frequency,
'term_frequency' => sqrt($frequency),
],
['document_id' => $document->getId(), 'field_name' => $field->getName(), 'term' => $term]
);
Expand Down
36 changes: 18 additions & 18 deletions src/Component/Pucene/Dbal/Interpreter/DbalInterpreter.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,17 @@
use Pucene\Component\Pucene\Model\Document;
use Pucene\Component\QueryBuilder\Search;
use Pucene\Component\QueryBuilder\Sort\IdSort;
use Pucene\Component\QueryBuilder\Sort\ScoreSort;
use Pucene\Component\Symfony\Pool\PoolInterface;
use Pucene\Component\Utils\SortUtils;

class DbalInterpreter
{
static $sortPaths = [
ScoreSort::class => 'score',
IdSort::class => 'id',
];

/**
* @var PoolInterface
*/
Expand Down Expand Up @@ -42,8 +49,6 @@ public function interpret(array $types, Search $search, DbalStorage $storage, El
->select('document.*')
->from($schema->getDocumentsTableName(), 'document')
->where('document.type IN (?)')
->setMaxResults($search->getSize())
->setFirstResult($search->getFrom())
->setParameter(0, implode(',', $types));

/** @var InterpreterInterface $interpreter */
Expand All @@ -54,21 +59,9 @@ public function interpret(array $types, Search $search, DbalStorage $storage, El
}

$scoringAlgorithm = new ScoringAlgorithm($queryBuilder, $schema, $this->interpreterPool);
$expression = $interpreter->scoring($element, $scoringAlgorithm);

if ($expression) {
$queryBuilder->addSelect('(' . $expression . ') as score')->orderBy('score', 'desc');
} else {
$queryBuilder->addSelect('1 as score');
}

if (0 < count($search->getSorts())) {
foreach ($search->getSorts() as $sort) {
if ($sort instanceof IdSort) {
$queryBuilder->addOrderBy('id', $sort->getOrder());
}
}
}
// TODO sorting
// TODO pagination (size, limit)

$result = [];
foreach ($queryBuilder->execute()->fetchAll() as $row) {
Expand All @@ -77,10 +70,17 @@ public function interpret(array $types, Search $search, DbalStorage $storage, El
$row['type'],
$storage->getName(),
json_decode($row['document'], true),
array_key_exists('score', $row) ? (float) $row['score'] : 1
$interpreter->newScoring($element, $scoringAlgorithm, $row)
);
}

return $result;
$paths = [];
foreach ($search->getSorts() as $sort) {
$paths[] = self::$sortPaths[get_class($sort)];
}

$result = SortUtils::multisort($result, $paths);

return array_splice($result, $search->getFrom(), $search->getSize());
}
}
40 changes: 40 additions & 0 deletions src/Component/Pucene/Dbal/Interpreter/Element/BoolInterpreter.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ public function __construct(PoolInterface $interpreterPool)
*/
public function interpret(ElementInterface $element, PuceneQueryBuilder $queryBuilder)
{
foreach ($element->getScoringElements() as $innerElement) {
$this->getInterpreter($innerElement)->interpret($innerElement, $queryBuilder);
}

return $this->getInterpreter($element->getElement())->interpret($element->getElement(), $queryBuilder);
}

Expand Down Expand Up @@ -74,6 +78,42 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
);
}

/**
* {@inheritdoc}
*
* @param BoolElement $element
*/
public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
if (count($element->getScoringElements()) === 0 || $element->getBoost() === 0) {
return 0;
} elseif (count($element->getScoringElements()) === 1) {
$innerElement = $element->getScoringElements()[0];
$interpreter = $this->interpreterPool->get(get_class($innerElement));

return $interpreter->newScoring($innerElement, $scoring, $row);
}

if (!$queryNorm) {
$queryNorm = $scoring->queryNorm($this->getTerms($element->getScoringElements()));
}

$score = 0;
$coord = 0;
foreach ($element->getScoringElements() as $innerElement) {
/** @var InterpreterInterface $interpreter */
$interpreter = $this->interpreterPool->get(get_class($innerElement));

$score += $interpreter->newScoring($innerElement, $scoring, $row, $queryNorm);

if ($interpreter->matches($innerElement, $row)) {
$coord += 1 / count($element->getScoringElements());
}
}

return $score * $coord * $element->getBoost();
}

private function getTerms(array $elements)
{
$terms = [];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public function interpret(ElementInterface $element, PuceneQueryBuilder $queryBu
$expr = $queryBuilder->expr();

$expression = $expr->orX();
if ($element->getOperator() === CompositeElement:: AND) {
if ($element->getOperator() === CompositeElement:: OPERATOR_AND) {
$expression = $expr->andX();
}

Expand All @@ -53,4 +53,35 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
{
return parent::scoring(new BoolElement($element, $element->getElements()), $scoring, $queryNorm);
}

/**
* {@inheritdoc}
*
* @param CompositeElement $element
*/
public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
return parent::newScoring(new BoolElement($element, $element->getElements()), $scoring, $row, $queryNorm);
}

/**
* {@inheritdoc}
*
* @param CompositeElement $element
*/
public function matches(ElementInterface $element, array $row)
{
foreach ($element->getElements() as $innerElement) {
$interpreter = $this->interpreterPool->get(get_class($innerElement));
if ($interpreter->matches($innerElement, $row)) {
if ($element->getOperator() === CompositeElement::OPERATOR_OR) {
return true;
}
} elseif ($element->getOperator() === CompositeElement::OPERATOR_AND) {
return false;
}
}

return $element->getOperator() === CompositeElement::OPERATOR_AND;
}
}
10 changes: 10 additions & 0 deletions src/Component/Pucene/Dbal/Interpreter/Element/IdsInterpreter.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,14 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
{
return (new MathExpressionBuilder())->value(1);
}

public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
return 1;
}

public function matches(ElementInterface $element, array $row)
{
return in_array($row['id'], $element->getIds());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,9 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
{
return (new MathExpressionBuilder())->value(1);
}

public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
return 1;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
);
}

public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
$interpreter = $this->getInterpreter($element->getElement());

return $element->getBoost() * $interpreter->newScoring($element, $scoring, $row, $queryNorm);
}

/**
* Returns interpreter for element.
*
Expand Down
40 changes: 37 additions & 3 deletions src/Component/Pucene/Dbal/Interpreter/Element/TermInterpreter.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ class TermInterpreter implements InterpreterInterface
public function interpret(ElementInterface $element, PuceneQueryBuilder $queryBuilder)
{
$expr = $queryBuilder->expr();
$name = $queryBuilder->joinTerm($element->getField(), $element->getTerm());

return $expr->isNotNull(
$queryBuilder->joinTerm($element->getField(), $element->getTerm()) . '.id'
);
$queryBuilder->addSelect(sprintf('(%1$s.term_frequency * %1$s.field_norm) as %1$sValue', $name));

return $expr->isNotNull($name . '.id');
}

/**
Expand All @@ -33,4 +34,37 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
{
return $scoring->scoreTerm($element, $queryNorm, $element->getBoost());
}

/**
* {@inheritdoc}
*
* @param TermElement $element
*/
public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
$idf = $scoring->inverseDocumentFrequency($element);
$factor = $idf * $element->getBoost();
if ($queryNorm) {
$factor *= $idf * $queryNorm;
}

$termName = 'term' . ucfirst($element->getField()) . ucfirst($element->getTerm()) . 'Value';
if (!array_key_exists($termName, $row)|| $row[$termName] === null) {
return 0;
}

return $row[$termName] * $factor;
}

/**
* {@inheritdoc}
*
* @param TermElement $element
*/
public function matches(ElementInterface $element, array $row)
{
$termName = 'term' . ucfirst($element->getField()) . ucfirst($element->getTerm()) . 'Value';

return array_key_exists($termName, $row) && $row[$termName] !== null;
}
}
10 changes: 10 additions & 0 deletions src/Component/Pucene/Dbal/Interpreter/Element/TypeInterpreter.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,14 @@ public function scoring(ElementInterface $element, ScoringAlgorithm $scoring, $q
{
return (new MathExpressionBuilder())->value(1);
}

public function newScoring(ElementInterface $element, ScoringAlgorithm $scoring, array $row, $queryNorm = null)
{
return 1;
}

public function matches(ElementInterface $element, array $row)
{
return $row['id'] === $element->getType();
}
}
2 changes: 1 addition & 1 deletion src/Component/Pucene/Dbal/ScoringAlgorithm.php
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ public function getConnection(): Connection
return $this->queryBuilder->getConnection();
}

private function inverseDocumentFrequency(ElementInterface $element): float
public function inverseDocumentFrequency(ElementInterface $element): float
{
return $this->calculateInverseDocumentFrequency($this->getDocCountForElement($element));
}
Expand Down
3 changes: 3 additions & 0 deletions src/Component/QueryBuilder/Search.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Pucene\Component\QueryBuilder;

use Pucene\Component\QueryBuilder\Query\QueryInterface;
use Pucene\Component\QueryBuilder\Sort\ScoreSort;
use Pucene\Component\QueryBuilder\Sort\SortInterface;

class Search
Expand Down Expand Up @@ -39,6 +40,8 @@ class Search
public function __construct(QueryInterface $query = null)
{
$this->query = $query;

$this->sorts = [new ScoreSort()];
}

/**
Expand Down
Loading

0 comments on commit b78fda0

Please sign in to comment.