生成mdict词典等

HeavySnowJakarta · Sep 24, 2023 · 63be512 · 63be512
1 parent dfc04c8
commit 63be512
Show file tree

Hide file tree

Showing 5 changed files with 37 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -10,6 +10,8 @@ This project is only in Chinese and i18n would be a super long shot.
 
 + 日期（由新到旧排序） 消息内容
 
++ 9-24 利用 writemdict 模块实现了写入 mdx 的函数（mdict.py）。珍爱生命，拒绝造轮子！
+
 + 9-23 阅读了 MDict txt 格式的初步制作方法。mdx 格式词典的生成要用到 MdxBuilder，这是仅使用于 Windows 下的 GUI 闭源软件，如果要用 MdxBuilder，我必须考虑：
 
     + 要不要把 Github Actions 容器换成 Windows 虚拟机？

diff --git a/configure.py b/configure.py
@@ -1,11 +1,9 @@
 # 此文件存储配置性质的全局变量。
+# 警告：不要注释下述任何变量。本项目其他部分没有针对它们设置默认值。
 
 # 本项目使用 GitHub Rest API 获取最新数据，因此数据源需为 GitHub 仓库。repository 变量指定所使用的 GitHub 仓库，格式为“所有者/仓库名”。
 repository = "EhTagTranslation/Database"
 
-# directory 变量指定 markdown 文件所在的目录，项目将读取该目录所有 markdown 文件
-directory = "database"
-
 # output_name 变量指定输出词典的文件名前缀。最终输出的词典将以“前缀_后缀”的格式命名，其中前缀在这里指定，而后缀将为词典的格式
 output_name = "EhTagTranslation"
 

diff --git a/fetch.py b/fetch.py
@@ -22,7 +22,7 @@ def getDataUrl(repository, noimage):
                 return i["browser_download_url"]
 
 # 此函数为 fetch.py 的主体，从数据源实际地址解析 JSON 格式的数据并返回 Python 字典            
-def getJsonData(repository, noimage):
+def getDictionaryData(repository, noimage):
     # 从 URL 获取 JSON 格式的字典
     url = getDataUrl(repository, noimage)
     stringresult = requests.get(url).text
@@ -32,7 +32,8 @@ def getJsonData(repository, noimage):
     for category in jsonresult["data"]:
         for enname in category["data"].keys():
             result[enname] = category["data"][enname]
-    # 当 female 与 male 标签的词条同时存在时，取 female 标签的词条。为了确保这一点，我们在这里牺牲一些效率把这些相同的词条再一次换为 female 的
+    # 当 female 与 male 标签的词条同时存在时，取 female 标签的词条
+    # 为了确保这一点，我们在这里牺牲一些效率把这些相同的词条再一次换为 female 的
     # 接下来的四行代码虽然能实现业务，但是很烂，求大佬提供更好的实现方式
     for category in jsonresult["data"]:
         if category["namespace"] == "female":

diff --git a/main.py b/main.py
@@ -2,4 +2,5 @@
 
 version = 0
 
-from . import *
+import configure
+import fetch
diff --git a/mdict.py b/mdict.py
@@ -0,0 +1,29 @@
+# 此文件调用 writemdict 库产生 MDict 格式的文件
+# writemdict 库是 MdxBuilder 的一个开源实现，并不包含 mdx 的全部技术细节，也
+# 不能生成新版 mdx 词典，不过这不重要。
+
+from lib.writemdict.writemdict import MDictWriter
+
+# 我们的 mdict 词典将采用的词条格式为：英文名<br>中文名<br>描述
+# 这些词条将以 Python 字典的格式传递给 writemdict 库
+# 此函数的 dictionary 由 fetch.py 的函数生成
+def generateSourceDictionary(dictionary, nodescription):
+    result = {}
+    if (nodescription):
+        for entry in dictionary.keys():
+            result[entry] = dictionary[entry]["name"]
+    else:
+        for entry in dictionary.keys():
+            if (dictionary[entry]["links"]==""):
+                result[entry] = dictionary[entry]["name"] + "<br>" + dictionary[entry]["intro"]
+            else:
+                result[entry] = dictionary[entry]["name"] + "<br>" + dictionary[entry]["intro"] + "<br>" + dictionary[entry]["links"]
+    return result
+
+# 此函数调用 writemdict 库并向传来的文件对象写入数据，其中 dictionary 参数由
+# fetch.py 的函数生成，file 为被写入的文件对象，必须拥有写
+# 入权限且必须以二进制格式打开
+def writeMdxFile(title, description, dictionary, nodescription, outfile):
+    source_dictionary = generateSourceDictionary(dictionary, nodescription)
+    writer = MDictWriter(source_dictionary, title=title, description=description)
+    writer.write(outfile)