diff --git a/wordseg/evaluate.py b/wordseg/evaluate.py index 6752a83..8b56de1 100755 --- a/wordseg/evaluate.py +++ b/wordseg/evaluate.py @@ -6,6 +6,7 @@ """ import codecs +import collections from wordseg import utils from wordseg.separator import Separator @@ -206,17 +207,19 @@ def evaluate(text, gold, separator=_DEFAULT_SEPARATOR): _stringpos_boundarypos(text_stringpos), _stringpos_boundarypos(gold_stringpos)) - return { - 'token_precision': token_eval.precision(), - 'token_recall': token_eval.recall(), - 'token_fscore': token_eval.fscore(), - 'type_precision': type_eval.precision(), - 'type_recall': type_eval.recall(), - 'type_fscore': type_eval.fscore(), - 'boundary_precision': boundary_eval.precision(), - 'boundary_recall': boundary_eval.recall(), - 'boundary_fscore': boundary_eval.fscore() - } + # return the scores in a fixed order (the default dict does not + # repect insertion order). This is needed for python<3.6, see + # https://docs.python.org/3.6/whatsnew/3.6.html#new-dict-implementation + return collections.OrderedDict((k, v) for k, v in ( + ('token_precision', token_eval.precision()), + ('token_recall', token_eval.recall()), + ('token_fscore', token_eval.fscore()), + ('type_precision', type_eval.precision()), + ('type_recall', type_eval.recall()), + ('type_fscore', type_eval.fscore()), + ('boundary_precision', boundary_eval.precision()), + ('boundary_recall', boundary_eval.recall()), + ('boundary_fscore', boundary_eval.fscore()))) def _load_text(text):