-
Notifications
You must be signed in to change notification settings - Fork 0
/
aws.py
79 lines (56 loc) · 1.87 KB
/
aws.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import io
import os
import sys
import boto3
import streamlit as st
from botocore.config import Config
from PIL import Image, ImageDraw
aws_access_key_id = os.environ['AWS_ACCESS_KEY']
aws_secret_access_key = os.environ['AWS_SECRET_ACCESS_KEY']
def _generate_annotated_image(content, blocks):
image_stream = io.BytesIO(content)
image = Image.open(image_stream)
width, height = image.size
for block in blocks:
if block['BlockType'].startswith('LINE'):
draw=ImageDraw.Draw(image)
bounding_box = block['Geometry']['BoundingBox']
left = width * bounding_box['Left']
top = height * bounding_box['Top']
rect = [
left,
top,
left + (width * bounding_box['Width']),
top + (height * bounding_box['Height'])
]
draw.rectangle(
rect,
outline='red')
return image
def aws_detect_text(content):
header_text = """<h3 style="font-family: Monaco">Amazon</h3>"""
st.markdown(header_text, unsafe_allow_html=True)
client = boto3.client('textract',
region_name='us-west-1',
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key
)
response = client.analyze_document(
Document={"Bytes": content},
FeatureTypes=["TABLES", "FORMS"]
)
blocks = response["Blocks"]
text = []
for block in blocks:
if block['BlockType'].startswith("LINE"):
text.append(block['Text'])
st.download_button(
label="Download transcipt",
data='\n'.join(text),
key="aws_download_transcript"
)
image_col, text_col = st.columns(2)
annotated_image = _generate_annotated_image(content, blocks)
image_col.image(annotated_image)
for row in text:
text_col.write(row)