-
Notifications
You must be signed in to change notification settings - Fork 18
/
get_latest_file_urls.py
125 lines (101 loc) · 4.47 KB
/
get_latest_file_urls.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/usr/bin/python
#################################################################
# Use and redistribution is source and binary forms is permitted
# subject to the OMG-DDS INTEROPERABILITY TESTING LICENSE found
# at the following URL:
#
# https://github.com/omg-dds/dds-rtps/blob/master/LICENSE.md
#
#################################################################
import os
import sys
import json
from google.oauth2 import service_account
from googleapiclient.discovery import build
class GoogleDriveClient:
def __init__(self):
# Load Google Drive credentials from environment variable
self.credentials_str = os.getenv('GCP_CREDENTIAL_STR')
# Load folder ID from environment variable
self.folder_id = os.getenv('DRIVE_FOLDER_ID')
# Create credentials object
self.credentials = service_account.Credentials.from_service_account_info(
json.loads(self.credentials_str),
scopes=['https://www.googleapis.com/auth/drive']
)
# Create Google Drive service
self.drive_service = build('drive', 'v3', credentials=self.credentials)
def get_latest_files_url(self):
# List to store XLSX files
xlsx_files, zip_files = self.get_subfolder_files(self.folder_id)
# Find the latest XLSX and ZIP files URL
xlsx_url = None
zip_url = None
if xlsx_files:
# Find the latest XLSX file based on modification time
latest_file = max(xlsx_files, key=lambda x: x['modifiedTime'])
xlsx_url = latest_file.get('webViewLink')
if zip_files:
# Find the latest zip file based on modification time
latest_file = max(zip_files, key=lambda x: x['modifiedTime'])
zip_url = latest_file.get('webViewLink')
return xlsx_url, zip_url
def get_subfolder_files(self, folder_id):
page_token = None
# Retrieve all files and folders within the subfolder
while True:
response = self.drive_service.files().list(
q=f"'{folder_id}' in parents",
includeItemsFromAllDrives=True,
supportsAllDrives=True,
fields='nextPageToken, files(id, name, webViewLink, mimeType, modifiedTime)',
pageToken=page_token
).execute()
# Extract files and folders from response
items = response.get('files', [])
# Check if there are more pages of results
page_token = response.get('nextPageToken')
if not page_token:
break
# List to store XLSX or ZIP files within subfolder
xlsx_files = []
zip_files = []
# Iterate through files and folders
for item in items:
# Check if current item is a folder
if item['mimeType'] == 'application/vnd.google-apps.folder':
# Recursively search for XLSX files within sub-subfolder
subfolder_xlsx_files, subfolder_zip_files = self.get_subfolder_files(item['id'])
if subfolder_xlsx_files:
xlsx_files.extend(subfolder_xlsx_files)
if subfolder_zip_files:
zip_files.extend(subfolder_zip_files)
# Check if current item is an XLSX file
elif item['mimeType'] == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
# Add XLSX file to list
xlsx_files.append(item)
elif item['mimeType'] == 'application/zip':
# Add ZIP file to list
zip_files.append(item)
# Return list of XLSX files within subfolder
return xlsx_files, zip_files
def main():
"""This requires a filename to save the URL of the XLSX and ZIP files"""
# Check if the file name is provided as a command-line argument
if len(sys.argv) < 2:
print("Usage: python get_latest_files.py <output_file>")
sys.exit(1)
# Get the file name from the command-line arguments
file_name = sys.argv[1]
if not file_name.endswith('.py'):
print("Error: File must have .py extension")
sys.exit(1)
client = GoogleDriveClient()
xlsx_file_url, zip_file_url = client.get_latest_files_url()
with open(file_name, 'w') as file:
if xlsx_file_url is not None:
file.write(f'xlsx_url = \'{xlsx_file_url}\'\n')
if zip_file_url is not None:
file.write(f'zip_url = \'{zip_file_url}\'\n')
if __name__ == '__main__':
main()