-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_video.py
117 lines (88 loc) · 4.86 KB
/
get_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""
This script downloads a video from a specified URL using Selenium and Chrome WebDriver.
It takes the following arguments:
--base_url: Base URL of the video page
--output_folder: Folder to save the downloaded video
The script uses the download_file function to download the video with progress indication.
The video information is saved to a text file with the same name as the video file but with a .txt extension.
The script uses Selenium to interact with the webpage and Chrome WebDriver to download the video.
Author: Pavis
Date: 25/03/2024
"""
import argparse
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import requests
from tqdm import tqdm
def download_file(url, filename):
"""Download file with progress bar using requests and tqdm."""
response = requests.get(url, stream=True)
total_size_in_bytes = int(response.headers.get('content-length', 0))
block_size = 1024 # 1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
with open(filename, 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
print("ERROR, something went wrong")
def get_video(base_url, output_folder):
try:
# Setup Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless") # Ensure Chrome runs in headless mode
chrome_options.add_argument("--disable-gpu") # Recommended as needed for headless mode
chrome_options.add_argument("--window-size=1920x1080") # Optional, but may help with certain scenarios
# Setup Selenium WebDriver
driver = webdriver.Chrome(options=chrome_options) # Ensure chromedriver is installed and in your PATH
# Navigate to the video page
driver.get(base_url)
# Wait for the page and elements to load
wait = WebDriverWait(driver, 10)
video_information = ""
# Find title and read the text inside, it is in h3 tag
video_title = wait.until(EC.presence_of_element_located((By.TAG_NAME, "h3"))).text
print("Title:", video_title)
video_information += f"Title: {video_title}\n"
# Find class testo and read the text inside removing html tags
video_description = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "testo"))).text
print("Content:", video_description)
video_information += f"Content: {video_description}\n"
# Find class data (video creation date) and read the text inside
video_creation_date = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "data"))).text
print("Date:", video_creation_date)
video_information += f"Date: {video_creation_date}\n"
# Open the download popup
download_popup_link = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@href='#download-box-main']")))
download_popup_link.click()
# Wait for the download link within the popup to be clickable
download_link_within_popup = wait.until(EC.element_to_be_clickable((By.XPATH, "//div[@id='download-box-main']//a[contains(@class, 'btn-link') and @target='_blank']")))
# Get the download URL
final_download_url = download_link_within_popup.get_attribute('href')
print("Download URL:", final_download_url)
# Create video filename from the finale download URL, like https://webtv.camera.it/download/flash_7/2024/AI_20240207_ch28_24496.mp4
video_filename = final_download_url.split('/')[-1]
video_filename = f"{output_folder}/{video_filename}"
# Download the video with progress indication
download_file(final_download_url, video_filename)
print(f"Video successfully downloaded as {video_filename}.")
# Save video information to a text file with the same name as the video file but with a .txt extension
text_filename = video_filename.replace('.mp4', '.txt')
with open(text_filename, 'w') as file:
file.write(video_information)
print(f"Video information saved to {text_filename}.")
finally:
# Cleanup
driver.quit()
def main():
parser = argparse.ArgumentParser(description="Download videos from a specified URL.")
parser.add_argument("--base_url", type=str, default="https://webtv.camera.it/evento/24495", help="Base URL of the video page.")
parser.add_argument("--output_folder", type=str, default=".", help="Folder to save the downloaded video.")
args = parser.parse_args()
get_video(args.base_url, args.output_folder)
if __name__ == "__main__":
main()