Skip to content

Commit

Permalink
fix bug in ItemMatcher colation (#70)
Browse files Browse the repository at this point in the history
* fix bug in ItemMatcher colation

* deep checkout for workflow actions
  • Loading branch information
geo-martino authored Apr 18, 2024
1 parent 3009476 commit 41f25b4
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 24 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/docs_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ jobs:
steps:
- name: Checkout 🛒
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: ⚙️ Setup - Python 🐍
uses: actions/setup-python@v5
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ jobs:
steps:
- name: 🛒 Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: ⚙️ Setup - Python 🐍
uses: actions/setup-python@v5
Expand Down
11 changes: 11 additions & 0 deletions .idea/musify.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions docs/release-history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ Release History
The format is based on `Keep a Changelog <https://keepachangelog.com/en>`_,
and this project adheres to `Semantic Versioning <https://semver.org/spec/v2.0.0.html>`_

0.9.1
=====

Fixed
-----

* Bug in :py:meth:`.ItemMatcher.match` where operations always returned the last item in the given list of ``results``


0.9.0
=====
Expand Down
59 changes: 37 additions & 22 deletions musify/processors/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,7 @@ def match[T: MusifyObject](
else:
match_on_filtered.add(match_field)

min_score = limit_value(min_score, floor=0.01, ceil=1.0)
max_score = limit_value(max_score, floor=0.01, ceil=1.0)
self._log_algorithm(source=source, extra=[f"max_score={max_score}"])

Expand All @@ -316,34 +317,20 @@ def match[T: MusifyObject](
allow_karaoke=allow_karaoke
)

result = None
best_score = 0

def sum_nested_scores(futures: list[list[Future[float]]]) -> float:
"""Sum the scores from a given list of nested Futures"""
scores_summed = [sum(score.result() for score in nested) / len(nested) for nested in futures]
return sum(scores_summed) / len(scores_summed)

for result, result_scores in scores:
result_scores = [
sum_nested_scores(score) if isinstance(score, list) else score.result()
for score in result_scores.values()
]
best_score = sum(result_scores) / len(result_scores)
if best_score > max_score:
break
result, score = self._get_match_from_scores(scores, max_score=max_score)

min_score = limit_value(min_score, floor=0.01, ceil=1.0)
if best_score > min_score:
if result is not None and score > min_score:
extra = [
f"best score: {'%.2f' % round(best_score, 2)} > {'%.2f' % round(min_score, 2)}"
if best_score < max_score else
f"max score reached: {'%.2f' % round(best_score, 2)} > {'%.2f' % round(max_score, 2)}"
f"best score: {score:.2f} > {min_score:.2f}"
if score < max_score else
f"max score reached: {score:.2f} > {max_score:.2f}"
]
self._log_match(source=source, result=result, extra=extra)
return result
else:
self._log_test(source=source, result=result, test=best_score, extra=[f"NO MATCH: {best_score}<{min_score}"])
self._log_test(
source=source, result=result, test=score, extra=[f"NO MATCH: {score:.2f}<{min_score:.2f}"]
)

def _score[T: MusifyObject](
self,
Expand Down Expand Up @@ -432,6 +419,34 @@ def _get_scores[T: MusifyObject](

return scores

def _get_match_from_scores[T: MusifyObject](
self,
scores: Iterable[tuple[T, dict[TagField, Future[float] | list[list[Future[float]]]]]],
max_score: float,
) -> tuple[T | None, float]:
best_result = None
best_score = 0

def sum_nested_scores(futures: list[list[Future[float]]]) -> float:
"""Sum the scores from a given list of nested Futures"""
scores_summed = [sum(score.result() for score in nested) / len(nested) for nested in futures]
return sum(scores_summed) / len(scores_summed)

for result, result_scores in scores:
result_scores = [
sum_nested_scores(score) if isinstance(score, list) else score.result()
for score in result_scores.values()
]
score = sum(result_scores) / len(result_scores)
if score > best_score:
best_score = score
best_result = result

if best_score > max_score:
break

return best_result, best_score

def as_dict(self) -> dict[str, Any]:
return {
"clean_tags": {
Expand Down
4 changes: 2 additions & 2 deletions tests/processors/test_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def test_match_all(self, matcher: ItemMatcher, track1: LocalTrack, track2: Local
track3.artist = f"artist{sep}nope{sep}other"
track3.year = 2015
assert matcher.match(track1, [track2, track3], min_score=0.2, max_score=0.8) == track3
assert matcher(track1, [track2, track3], min_score=0.2, max_score=0.8) == track3
assert matcher(track1, [track3, track2], min_score=0.2, max_score=0.8) == track3

# track4 score is above max_score causing an early stop
track4 = random_track()
Expand All @@ -193,7 +193,7 @@ def test_match_all(self, matcher: ItemMatcher, track1: LocalTrack, track2: Local
track4.album = "album"
track4._reader.file.info.length = 100
track4.year = 2015
assert matcher.match(track1, [track2, track4, track3], min_score=0.2, max_score=0.8) == track4
assert matcher.match(track1, [track4, track2, track3], min_score=0.2, max_score=0.8) == track4
assert matcher(track1, [track2, track4, track3], min_score=0.2, max_score=0.8) == track4

def test_allows_karaoke(self, matcher: ItemMatcher, track1: LocalTrack, track2: LocalTrack):
Expand Down

0 comments on commit 41f25b4

Please sign in to comment.