Add support to print multi system results as JSON (#213)

* Add support to print multi system results as JSON Co-authored-by: Matt Post <mattpost@microsoft.com>
mjpost · Oct 6, 2022 · 37de171 · 37de171
1 parent 1d04b6f
commit 37de171
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 8 deletions.
diff --git a/.github/workflows/check-build.yml b/.github/workflows/check-build.yml
@@ -17,7 +17,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.6", "3.7", "3.8", "3.9", "3.10"]
         exclude:
           - os: windows-latest
             python-version: '3.6'   # test fails due to UTF8 stuff

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,19 +1,17 @@
 # Release Notes
 
-- 2.2.2 (2022-XX-XX)
+- 2.3.0 (2022-10-XX)
   Features:
   - (#203) Added `-tok flores101` and `-tok flores200`, a.k.a. `spbleu`.
     These are multilingual tokenizations that make use of the
     multilingual SPM models released by Facebook and described in the
     following papers:
     * Flores-101: https://arxiv.org/abs/2106.03193
     * Flores-200: https://arxiv.org/abs/2207.04672
+  - (#213) Added JSON formatting for multi-system output (thanks to Manikanta Inugurthi @me-manikanta)
   - (#211) You can now list all test sets for a language pair with `--list SRC-TRG`.
     Thanks to Jaume Zaragoza (@ZJaume) for adding this feature.
 
-  Changes:
-  - Removed testing support for Python 3.6 (end-of-lifed: https://endoflife.date/python).
-
 - 2.2.1 (2022-09-13)
   Bugfix: Standard usage was returning (and using) each reference twice.
 

diff --git a/sacrebleu/__init__.py b/sacrebleu/__init__.py
@@ -14,7 +14,7 @@
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
 
-__version__ = '2.2.2'
+__version__ = '2.3.0'
 __description__ = 'Hassle-free computation of shareable, comparable, and reproducible BLEU, chrF, and TER scores'
 
 

diff --git a/sacrebleu/utils.py b/sacrebleu/utils.py
@@ -1,3 +1,4 @@
+import json
 import os
 import re
 import sys
@@ -90,9 +91,26 @@ def _color_p_value(p: float):
 def print_results_table(results: dict, signatures: dict, args: Namespace):
     """Prints out a nicely formatted table for multi-system evaluation mode."""
 
+    if args.format == 'json':
+        proper_json = []
+        dict_keys = list(results.keys())
+        for i in range(len(results['System'])):
+            value = {}
+            value['system'] = results['System'][i]
+            # parse metrics
+            for j in range(1, len(dict_keys)):
+                if isinstance(results[dict_keys[j]][i], str):
+                    value[dict_keys[j]] = results[dict_keys[j]][i]
+                else:
+                    # Values inside object as dict
+                    value[dict_keys[j]] = results[dict_keys[j]][i].__dict__
+            proper_json.append(value)
+
+        print(json.dumps(proper_json, indent=4))
+        return
+
     tablefmt = args.format
-    if tablefmt in ('text', 'json'):
-        # Fallback to simple table if json is given
+    if tablefmt in ('text'):
         tablefmt = 'fancy_grid'
     elif tablefmt == 'latex':
         # Use booktabs