Lazy formatting in evaluate_word_pairs (#1084)

As requested in #1079.
piskvorky · Jan 10, 2017 · 9112ee7 · 9112ee7
1 parent fe30541
commit 9112ee7
Showing 1 changed file with 17 additions and 12 deletions.
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -481,19 +481,20 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar, c
 
     @staticmethod
     def log_evaluate_word_pairs(pearson, spearman, oov, pairs):
-        logger.info('Pearson correlation coefficient against %s: %.4f' % (pairs, pearson[0]))
-        logger.info('Spearman rank-order correlation coefficient against %s: %.4f' % (pairs, spearman[0]))
-        logger.info('Pairs with unknown words ratio: %.1f%%' % oov)
+        logger.info('Pearson correlation coefficient against %s: %.4f', pairs, pearson[0])
+        logger.info('Spearman rank-order correlation coefficient against %s: %.4f', pairs, spearman[0])
+        logger.info('Pairs with unknown words ratio: %.1f%%', oov)
 
-    def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case_insensitive=True, dummy4unknown=False):
+    def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case_insensitive=True,
+                            dummy4unknown=False):
         """
         Compute correlation of the model with human similarity judgments. `pairs` is a filename of a dataset where
         lines are 3-tuples, each consisting of a word pair and a similarity value, separated by `delimiter'.
-        An example dataset is included in Gensim (test/test_data/wordsim353.tsv). More datasets can be found at 
+        An example dataset is included in Gensim (test/test_data/wordsim353.tsv). More datasets can be found at
         http://technion.ac.il/~ira.leviant/MultilingualVSMdata.html or https://www.cl.cam.ac.uk/~fh295/simlex.html.
 
         The model is evaluated using Pearson correlation coefficient and Spearman rank-order correlation coefficient
-        between the similarities from the dataset and the similarities produced by the model itself.        .
+        between the similarities from the dataset and the similarities produced by the model itself.
         The results are printed to log and returned as a triple (pearson, spearman, ratio of pairs with unknown words).
 
         Use `restrict_vocab` to ignore all word pairs containing a word not in the first `restrict_vocab`
@@ -532,7 +533,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
                         a, b, sim = [word for word in line.split(delimiter)]
                     sim = float(sim)
                 except:
-                    logger.info('skipping invalid line #%d in %s' % (line_no, pairs))
+                    logger.info('skipping invalid line #%d in %s', line_no, pairs)
                     continue
                 if a not in ok_vocab or b not in ok_vocab:
                     oov += 1
@@ -541,7 +542,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
                         similarity_gold.append(sim)
                         continue
                     else:
-                        logger.debug('skipping line #%d with OOV words: %s' % (line_no, line.strip()))
+                        logger.debug('skipping line #%d with OOV words: %s', line_no, line.strip())
                         continue
                 similarity_gold.append(sim)  # Similarity from the dataset
                 similarity_model.append(self.similarity(a, b))  # Similarity from the model
@@ -550,10 +551,14 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
         pearson = stats.pearsonr(similarity_gold, similarity_model)
         oov_ratio = float(oov) / (len(similarity_gold) + oov) * 100
 
-        logger.debug('Pearson correlation coefficient against %s: %f with p-value %f'
-                     % (pairs, pearson[0], pearson[1]))
-        logger.debug('Spearman rank-order correlation coefficient against %s: %f with p-value %f'
-                     % (pairs, spearman[0], spearman[1]))
+        logger.debug(
+            'Pearson correlation coefficient against %s: %f with p-value %f',
+            pairs, pearson[0], pearson[1]
+        )
+        logger.debug(
+            'Spearman rank-order correlation coefficient against %s: %f with p-value %f',
+            pairs, spearman[0], spearman[1]
+        )
         logger.debug('Pairs with unknown words: %d' % oov)
         self.log_evaluate_word_pairs(pearson, spearman, oov_ratio, pairs)
         return pearson, spearman, oov_ratio