-
Notifications
You must be signed in to change notification settings - Fork 3
/
run_download.py
76 lines (42 loc) · 1.45 KB
/
run_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
'''
Import required libraries
'''
import os
import sys
from caseolap.download import *
import json as json
'''
Input and output data file path
'''
DATA_DIR = './'
logFilePath = './log/download_log.txt'
download_config_file_path = './config/download_config.json'
ftp_config_file_path = './config/ftp_config.json'
'''
Start the download,verification and extraction process
'''
if __name__ == '__main__':
logfile = open(logFilePath, "w")
with open(download_config_file_path, 'r') as f1:
download_config = json.load(f1)
with open(ftp_config_file_path, 'r') as f2:
ftp_config = json.load(f2)
BASELINE_DIR = os.path.join(DATA_DIR, 'ftp.ncbi.nlm.nih.gov/pubmed/baseline/')
UPDATE_FILES_DIR = os.path.join(DATA_DIR,'ftp.ncbi.nlm.nih.gov/pubmed/updatefiles/')
if not os.path.isdir(DATA_DIR):
print("Directory not found:", DATA_DIR)
'''
Start download ---------------------
'''
download_pubmed(DATA_DIR,download_config,ftp_config,logfile)
'''
verify download -----------------
'''
check_all_md5_in_dir(BASELINE_DIR,logfile)
check_all_md5_in_dir(UPDATE_FILES_DIR,logfile)
'''
Extract downloaded files --------------
'''
extract_all_gz_in_dir(BASELINE_DIR,logfile)
extract_all_gz_in_dir(UPDATE_FILES_DIR,logfile)
logfile.close()