Skip to content

Commit

Permalink
Fix mismatching, refactor to use enums for DRY re:#604 and #631
Browse files Browse the repository at this point in the history
  • Loading branch information
machawk1 committed Mar 2, 2020
1 parent bf9aead commit a603cf5
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 9 deletions.
40 changes: 31 additions & 9 deletions ipwb/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from .util import unsurt
from .util import IPWBREPLAY_HOST, IPWBREPLAY_PORT
from .util import INDEX_FILE
from .util import MementoMatch

from . import indexer

Expand Down Expand Up @@ -187,7 +188,10 @@ def bin_search(iter, key, datetime=None):
ln = iter.readline()

surtk, datetimeK, rest = ln.split(maxsplit=2)
if key == surtk and datetime is None and datetimeK == datetime:
datetimeK = datetimeK.decode()

matchDegree = getMatchDegree(surt, datetime, surtk, datetimeK)
if matchDegree == MementoMatch.EXACTMATCH:
return [ln]

# If further searching required...
Expand All @@ -213,27 +217,35 @@ def bin_search(iter, key, datetime=None):
surtk = surtk.rstrip(b"/")
key = key.rstrip(b"/")

if key == surtk:
if datetime and datetime == datetimeK:
# Rm other close matches, exact found
lines.clear()
lines.add(line)
matchDegree = getMatchDegree(key, datetime, surtk, datetimeK)

break
if matchDegree == MementoMatch.RIGHTKEYWRONGDATE:
lines.add(line)
# Iterate further to get lines after selection point
nextLine = iter.readline()
while nextLine:
surtk, rest = nextLine.split(maxsplit=1)
surtk, datetimeK, rest = nextLine.split(maxsplit=2)
surtk = surtk.rstrip(b"/")
datetimeK = datetimeK.decode()

if key == surtk:
matchDegree = getMatchDegree(key, datetime, surtk, datetimeK)
if matchDegree == MementoMatch.RIGHTKEYWRONGDATE:
lines.add(nextLine)
elif matchDegree == MementoMatch.EXACTMATCH:
# Exact match found while iterating
lines.clear()
lines.add(nextLine)
break

nextLine = iter.readline()

# Continue searching until find first instance
right = mid
elif matchDegree == MementoMatch.EXACTMATCH:
lines.clear()
lines.add(line)

break
elif key > surtk:
left = mid
else:
Expand All @@ -245,6 +257,16 @@ def bin_search(iter, key, datetime=None):
return ret


def getMatchDegree(surt, datetime, surtK, datetimeK):
if surt == surtK:
if datetime is None or datetime is not None and datetime != datetimeK:
return MementoMatch.RIGHTKEYWRONGDATE
if datetime == datetimeK:
return MementoMatch.EXACTMATCH
else:
return MementoMatch.WRONGKEY


def getCDXJLinesWithURIR(urir, indexPath, datetime=None):
""" Get all CDXJ records corresponding to a URI-R """
if not indexPath:
Expand Down
9 changes: 9 additions & 0 deletions ipwb/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
import logging
import platform

from enum import Enum, auto

from six.moves.urllib.request import urlopen
import json
from .__init__ import __version__ as ipwbVersion
Expand All @@ -38,6 +40,13 @@

INDEX_FILE = 'samples/indexes/salam-home.cdxj'


class MementoMatch(Enum):
WRONGKEY = auto()
RIGHTKEYWRONGDATE = auto()
EXACTMATCH = auto()


log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR)

Expand Down

0 comments on commit a603cf5

Please sign in to comment.