Skip to content

Commit

Permalink
Merge pull request #2 from SQ-AMD/main
Browse files Browse the repository at this point in the history
为 LLM 应用预处理非结构化数据
  • Loading branch information
6forwater29 committed Jun 14, 2024
2 parents 6da7ebe + a66e920 commit 3c60dad
Show file tree
Hide file tree
Showing 11 changed files with 3,857 additions and 0 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import os
import panel as pn
from dotenv import load_dotenv


# 确保加载面板扩展以实现基于 Web 的可视化。
pn.extension()


class Utils:
"""
实用工具类,用于从环境变量中获取配置信息。
"""
def __init__(self):
# 实例创建时只加载一次环境变量。
load_dotenv()

def get_dlai_api_key(self):
"""
从环境变量中检索 DLAI API 密钥。
返回值:
str: DLAI API 密钥(如果已设置);否则为 None。
"""
return os.getenv("DLAI_API_KEY")

def get_dlai_url(self):
"""
从环境变量中获取 DLAI API URL。
返回值:
str: DLAI API URL(如果已设置);否则为 None。
"""
print(os.getenv("DLAI_API_URL"))

return os.getenv("DLAI_API_URL")


class UploadFile:
"""
通过面板小部件处理文件上传,仅允许特定文件类型。
"""
def __init__(self):
self.widget_file_upload = pn.widgets.FileInput(accept='.pdf,.ppt,.png,.html', multiple=False)
# 注意 "文件名" 的变化,以触发 save_filename 方法。
self.widget_file_upload.param.watch(self.save_filename, 'filename')

def save_filename(self, event):
"""
如果上传文件的大小在限制范围内(2 MB),则保存该文件。
参数:
event: 包含文件输入小部件中更改的详细信息。不直接用于此功能,但回调签名需要它。
如果文件大小超过 2 MB 限制,则打印信息,否则保存文件。
"""
# 将文件大小限制为 2 MB。
max_file_size = 2 * 1024 * 1024

if len(self.widget_file_upload.value) > max_file_size:
print("文件过大。2 MB 限制!")
else:
# 确保目录存在。
output_dir = './example_files'
os.makedirs(output_dir, exist_ok=True)

# 将文件保存到指定目录。
with open(os.path.join(output_dir, self.widget_file_upload.filename), 'wb') as f:
f.write(self.widget_file_upload.value)
Binary file not shown.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
chromadb==0.4.22
langchain==0.1.5
langchain-community==0.0.17
langchain-core==0.1.19
langchain-openai==0.0.5
openai==1.30.2
tiktoken==0.5.2
unstructured-client==0.14.0
unstructured==0.11.8
unstructured-inference==0.7.23
unstructured.pytesseract==0.3.12
urllib3==1.26.18
python-dotenv==1.0.1
panel==1.3.0a8
ipython==8.12.3
python-pptx==0.6.23
pdf2image==1.17.0
pdfminer==20191125
opencv-python==4.9.0.80
pikepdf==8.13.0
pypdf==4.0.1
protobuf==4.22
requests==2.29.0
urllib3==1.25.11

0 comments on commit 3c60dad

Please sign in to comment.