-
Notifications
You must be signed in to change notification settings - Fork 24
/
uv-instructions.txt
150 lines (115 loc) · 4.84 KB
/
uv-instructions.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
GlOBAL INSTALLATION OF UV
https://github.com/astral-sh/uv
https://medium.com/bitgrit-data-science-publication/forget-pip-install-use-this-instead-754863c58f1e
➜ pip install uv
# On macOS and Linux.
curl -LsSf https://astral.sh/uv/install.sh | sh
# On Windows.
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
# With pip.
pip install uv
# With pipx.
pipx install uv
# With Homebrew.
brew install uv
# With Pacman.
pacman -S uv
CREAtE A VENV
uv venv # Create a virtual environment at .venv.
➜ uv venv # Create a virtual environment at .venv.
Using Python 3.10.0rc2 interpreter at: C:\Users\FabioMatricardi\AppData\Local\Programs\Python\Python310\python.exe
Creating virtualenv at: .venv
Activate with: .venv\Scripts\activate
➜ cpuORPO ⚡
INSTALL PACKAGES
uv pip install flask # Install Flask.
uv pip install -r requirements.txt # Install from a requirements.txt file.
uv pip install -e . # Install the current project in editable mode.
uv pip install "package @ ." # Install the current project from disk
uv pip install "flask[dotenv]" # Install Flask with "dotenv" extra.
uv pip install llama-cpp-python[server]==0.2.60
uv pip install openai
uv pip install streamlit
uv pip install langchain
uv pip install tiktoken
uv pip install sentence_transformers
uv pip install rich
uv pip install --upgrade pymupdf
!wget https://github.com/fabiomatricardi/cdQnA/raw/main/2024-04-09%2007.05.35%20Review%20of%20Abraham%20Flexner%20s%20The%20Usefulness%20of%20Use.txt
!wget https://github.com/fabiomatricardi/cdQnA/raw/main/2024-04-11%2012.52.28%20Kaggle%20s%20wrong%20turn%20when%20AI%20becomes%20a%20teacher%20and.txt
!wget https://github.com/fabiomatricardi/cdQnA/raw/main/28884E00-%20SYSTEM%20OPERATIONAL%20TEST%20PROCEDURE%20PREPARATION%20CUIDELINE.pdf
!wget https://github.com/fabiomatricardi/cdQnA/raw/main/Dark%20Psychology%20and%20Manipulation%20-%20The%20Ultimate%20Guide%20To%20Master%20The%20Art%20Of%20Persuasion.pdf
!wget https://github.com/fabiomatricardi/cdQnA/raw/main/UsefulnessHarpers.pdf
!wget https://github.com/fabiomatricardi/cdQnA/raw/main/2024-04-12%2013.16.08%20Kaggle%20s%20wrong%20turn%20when%20AI%20becomes%20a%20teacher%20and.txt
PREIMPORT
```
from tqdm.rich import trange, tqdm
from rich.markdown import Markdown
import warnings
warnings.filterwarnings(action='ignore')
import datetime
from rich.console import Console
console = Console(width=110)
from llama_cpp import Llama
import tiktoken
encoding = tiktoken.get_encoding("r50k_base")
```
LOAD PDF
```
#Load PDF Function
import os
import fitz #pyMuPDF
miofile = "/content/28884E00- SYSTEM OPERATIONAL TEST PROCEDURE PREPARATION CUIDELINE.pdf"
def LoadPDFandWork(filepath,chunks, overlap):
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter
TOKENtext_splitter = TokenTextSplitter(chunk_size=chunks, chunk_overlap=overlap)
#splitted_text = TOKENtext_splitter.split_text(fulltext) #create a list
from langchain_community.document_loaders import PyMuPDFLoader
import datetime
start = datetime.datetime.now()
console.print('1. loading pdf')
loader = PyMuPDFLoader(filepath) #on Win local simply 'stl-0000011.pdf'
data = loader.load_and_split(TOKENtext_splitter)
delta = datetime.datetime.now() - start
console.print(f'2. Loaded in {delta}')
console.print(f'3. Number of items: {len(data)}')
console.print('---')
its = 0
chars = 0
solotesto = ''
for items in data:
testo = len(items.page_content)
solotesto = solotesto + ' ' + items.page_content
#console.print(f"Number of CHAR in Document {its}: {testo}")
its = its + 1
chars += testo
console.print('---')
console.print(f'> Total lenght of text in characthers: {chars}')
console.print('---')
return data,solotesto
d,article = LoadPDFandWork(miofile, 300,50)
import tiktoken
encoding = tiktoken.get_encoding("r50k_base")
context_count = len(encoding.encode(article))
console.print(f"Number of Tokens in the Article: {context_count}")
docs = []
for items in d:
docs.append(items.page_content)
```
lOAD A TXT FILE
```
#FOR TXT
with open('/content/2024-04-11 12.52.28 Kaggle s wrong turn when AI becomes a teacher and.txt', encoding='utf-8') as f:
article = f.read()
f.close()
import tiktoken
encoding = tiktoken.get_encoding("r50k_base")
context_count = len(encoding.encode(article))
console.print(f"Number of Tokens in the Article: {context_count}")
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter
TOKENtext_splitter = TokenTextSplitter(chunk_size=700, chunk_overlap=50)
d = TOKENtext_splitter.split_text(article) #create a list
console.print(f"Number of Document Chunks in the Article: {len(d)}")
```