-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.py
83 lines (73 loc) · 3.17 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from selenium.common.exceptions import TimeoutException
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
import logging
from bs4 import BeautifulSoup
from constants import UserNotFoundException, ExitCode
# Webdriver options
chrome_options = webdriver.ChromeOptions()
caps = DesiredCapabilities().CHROME
caps["pageLoadStrategy"] = "none"
def scrape(username, show_window=False, load_images=False, debug=False):
if debug:
show_window = True
load_images = True
if not show_window:
# Go headless :)
chrome_options.add_argument("--headless=new")
if not load_images:
# Disable images to save time
prefs = {"profile.managed_default_content_settings.images": 2}
chrome_options.add_experimental_option("prefs", prefs)
driver = None
for attempt in range(3):
try:
driver = webdriver.Chrome(options=chrome_options)
driver.get(f"https://krunker.io/social.html?p=profile&q={username}")
if debug:
input("Press [Enter] to continue.")
else:
# Accept privacy policy bc it breaks the script if not :sob:
button = WebDriverWait(driver, 10).until(
EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
)
button.click()
logging.info('website loaded successfully!')
break
except TimeoutException:
print(f"Attempt {attempt + 1}: Browser driver has timed out. Retrying...")
if attempt == 2:
print(f"ERROR: Timeout while trying to fetch stats for {username}.")
return None
except Exception as e:
if driver:
driver.quit()
print(f"ERROR: An unexpected error occurred: {e}")
return None
html_content = driver.page_source
soup = BeautifulSoup(html_content, 'html.parser')
if soup.find(text="Profile doesn't exist"):
print(f"ERROR: Krunker profile '{username}' was not found!")
driver.quit()
return None
try:
meta_div = soup.body.find("div", {"class": "leftTopHolder"})
xp_div = soup.body.find("div", {"class": "xpBar"})
main_stat_divs = soup.body.find_all("div", {"class": "pSt"})
class_xp_divs = soup.body.find_all("div", {"class": "classCard"})
driver.quit()
return {
"meta": meta_div.get_text() if meta_div else "",
"xpbar": xp_div.get_text() if xp_div else "",
"main_stats": [div.get_text() for div in main_stat_divs],
"class_stats": [div.get_text() for div in class_xp_divs]
}
except AttributeError:
print("ERROR: The HTML could not be parsed.\n"
"Most likely, there is a captcha. Please solve it manually, and try again.\n"
"You can do this by adding the `headless=False` flag in the args of `krunker_api`.")
driver.quit()
return None