-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils_word.py
37 lines (32 loc) · 1.09 KB
/
utils_word.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import docx
def get_doc_info(path):
"""This will return the doc info infomation from the
Named file."""
data = {}
doc = docx.Document(path)
# get the core properties from the file...
# https://python-docx.readthedocs.io/en/latest/api/document.html#coreproperties-objects
cp = doc.core_properties
data['author'] = cp.author
data['category'] = cp.category
data['comments'] = cp.comments
data['content_status'] = cp.content_status
data['created'] = cp.created
data['identifier'] = cp.identifier
data['keywords'] = cp.keywords
data['language'] = cp.language
data['last_modified_by'] = cp.last_modified_by
data['last_printed'] = cp.last_printed
data['modified'] = cp.modified
data['revision'] = cp.revision
data['subject'] = cp.subject
data['title'] = cp.title
data['version'] = cp.version
return data
def get_doc_text(path):
"""This will return the paragroah objects in a word document"""
data = []
doc = docx.Document(path)
for p in doc.paragraphs:
data.append(p.text)
return data