Skip to content

Commit

Permalink
🐛 fix sentence spliter
Browse files Browse the repository at this point in the history
  • Loading branch information
zhzLuke96 committed Jun 22, 2024
1 parent 23023bc commit 5d8937c
Showing 1 changed file with 9 additions and 1 deletion.
10 changes: 9 additions & 1 deletion modules/SentenceSplitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,22 @@ def split_zhon_sentence(text):
return result


def split_zhon_paragraph(text):
lines = text.split("\n")
result = []
for line in lines:
result.extend(split_zhon_sentence(line))
return result


# 解析文本 并根据停止符号分割成句子
# 可以设置最大阈值,即如果分割片段小于这个阈值会与下一段合并
class SentenceSplitter:
def __init__(self, threshold=100):
self.sentence_threshold = threshold

def parse(self, text):
sentences = split_zhon_sentence(text)
sentences = split_zhon_paragraph(text)

# 合并小于最大阈值的片段
merged_sentences = []
Expand Down

0 comments on commit 5d8937c

Please sign in to comment.