Update gossip gen script

greenya · Jan 8, 2024 · 057fcbe · 057fcbe
1 parent 164d05e
commit 057fcbe
Show file tree

Hide file tree

Showing 3 changed files with 83 additions and 25 deletions.
diff --git a/scripts/README.md b/scripts/README.md
@@ -15,6 +15,7 @@ All other files are edited manually, e.g. via pull requests.
 1. Download all files from Crowdin:
     - Export terms in TBX (v2) format -> ClassicUA.tbx
     - Export quests via "Build & Download" -> ClassicUA.zip
+    - Download gossip source texts -> ClassicUA_en_gossip.zip
 
 2. Update Terms app:
     - Update file /docs/terms/ClassicUA.tbx
@@ -23,7 +24,11 @@ All other files are edited manually, e.g. via pull requests.
 
 3. Generate lua files:
     - Clean up folder "translation_from_crowdin"
-    - Copy ClassicUA.tbx and ClassicUA.zip, extract zip
+    - Copy ClassicUA.tbx, ClassicUA.zip and ClassicUA_en_gossip.zip, extract both zips
+    - Expected structure at this point:
+        * /uk/              <- from ClassicUA.zip
+        * /en/gossip/       <- from ClassicUA_en_gossip.zip
+        * /ClassicUA.tbx
     - Run python gen_addon_books_source_from_crowdin.py > translation_from_crowdin/books_stats.txt
     - Run python gen_addon_gossip_source_from_crowdin.py > translation_from_crowdin/gossip_stats.txt
     - Run python gen_addon_npcs_source_from_crowdin.py > translation_from_crowdin/npcs_stats.txt

diff --git a/scripts/gen_addon_gossip_source_from_crowdin.py b/scripts/gen_addon_gossip_source_from_crowdin.py
@@ -1,40 +1,82 @@
-import sys, os
+import sys, os, re
 from xml.etree import ElementTree
 import utils
 
 # TODO: add expansion support when we actually have gossip_[expansion] folders
 
+def get_all_strings_from_xml_file(filename):
+    result = []
+
+    for s in ElementTree.parse(filename).getroot().findall('./string'):
+        if s.text:
+            t = utils.get_clean_text(s.text)
+            result.append(t)
+
+    return result
+
 def collect_gossip():
-    gossip_path = 'translation_from_crowdin/uk/gossip/'
-    print(f'Processing {gossip_path}')
+    source_path         = 'translation_from_crowdin/en/gossip/'
+    translation_path    = 'translation_from_crowdin/uk/gossip/'
+    filename_pattern    = re.compile(r'^([^_]+)_(\d+)\.xml$')
+
+    print(f'Processing {translation_path}')
+    print(f'Using source from {source_path}')
 
     result = {}
     issues = []
 
-    for dirpath, _, filenames in os.walk(gossip_path):
+    for dirpath, _, filenames in os.walk(translation_path):
         for filename in filenames:
             if not filename.lower().endswith('.xml'):
                 continue
 
-            tree = ElementTree.parse(os.path.join(dirpath, filename))
-            root = tree.getroot()
+            npc_name, npc_id = re.search(filename_pattern, filename).groups()
 
-            for s in root.findall('./string'):
-                if s.text:
-                    s_name = s.attrib['name']
-                    npc_id, text_code = utils.unpack_gossip_string_name(s_name)
-                    text_ua = utils.get_clean_text(s.text)
+            if npc_id in result:
+                issues.append(f'[!] Duplicated npc id #{npc_id} via {filename}. File skipped.')
+                continue
 
-                    if not npc_id in result:
-                        result[npc_id] = {}
+            ua_strings = get_all_strings_from_xml_file(os.path.join(dirpath, filename))
 
-                    if text_code in result[npc_id]:
-                        issues.append(f'[!] Replacing text #{text_code} for npc #{npc_id}\n\tOld text: {result[npc_id][text_code]}\n\tNew text: {text_ua}')
+            if not ua_strings:
+                continue
+
+            filename_sub_path = f'{dirpath}/{filename}'.replace(translation_path, '').replace(filename, '')
+            en_strings = get_all_strings_from_xml_file(os.path.join(source_path, filename_sub_path, filename))
 
-                    result[npc_id][text_code] = text_ua
+            # print(f'=========== #{npc_id} {npc_name} ============')
+            # print(ua_strings)
+            # print(en_strings)
+
+            if len(ua_strings) != len(en_strings):
+                issues.append(f'[!] Different number of en->uk strings for {filename}. File skipped.')
+                continue
+
+            npc_gossip = {}
+
+            for i in range(len(ua_strings)):
+                text_ua = ua_strings[i]
+                text_en = en_strings[i]
+
+                if text_ua == text_en:
+                    continue
+
+                text_code = utils.get_text_code(text_en)
+                text_data = (text_ua, text_en)
+
+                if text_code in npc_gossip:
+                    issues.append(f'[!] Text code "{text_code}" collision in {filename} -- New data skipped\n\tOld: {npc_gossip[text_code]}\n\tNew: {text_data}')
+                    continue
+
+                npc_gossip[text_code] = { 'en': text_en, 'ua': text_ua }
+
+            if not npc_gossip:
+                continue
 
-    for npc_id in result:
-        result[npc_id] = dict(sorted(result[npc_id].items()))
+            result[npc_id] = {
+                'name'      : npc_name,
+                'strings'   : dict(sorted(npc_gossip.items()))
+            }
 
     result = dict(sorted(result.items()))
 
@@ -45,8 +87,10 @@ def print_report(gossip, issues):
 
     total_gossip_texts = 0
     for npc_id in gossip:
-        print(f'npc #{npc_id}: {", ".join(gossip[npc_id].keys())}')
-        total_gossip_texts += len(gossip[npc_id])
+        npc_name = gossip[npc_id]['name']
+        npc_strings = gossip[npc_id]['strings']
+        print(f'npc #{npc_id} {npc_name}: {", ".join(npc_strings.keys())}')
+        total_gossip_texts += len(npc_strings)
 
     print('-' * 80)
     print(f'Total npcs with gossip: {len(gossip)}')

diff --git a/scripts/utils.py b/scripts/utils.py
@@ -247,11 +247,20 @@ def write_lua_gossip_file(path, filename, gossip):
         f.write('addonTable.gossip = { -- [npc_id] = { [code1] = text1, [code2] = text2, ... }\n')
 
         for npc_id in gossip:
-            npc_texts = gossip[npc_id]
+            npc_name = gossip[npc_id]['name']
+            npc_strings = gossip[npc_id]['strings']
+
+            f.write(f'[{npc_id}] = ' + '{' + f' -- {npc_name}\n')
+
+            for text_code in npc_strings:
+                text_en = npc_strings[text_code]['en']
+                text_ua = npc_strings[text_code]['ua']
+
+                for s in text_en.split('\n'):
+                    f.write(f'--{' ' + s if s else ''}\n')
+
+                f.write(f'["{text_code}"] = [===[{text_ua}]===],\n')
 
-            f.write(f'[{npc_id}] = ' + '{\n')
-            for text_code in npc_texts:
-                f.write(f'["{text_code}"] = [===[{npc_texts[text_code]}]===],\n')
             f.write('},\n')
 
         f.write('}\n')