update rit linkedin search jobs implementation messages

Signed-off-by: GuillaumeFalourd <guillaume.falourd@zup.com.br>
GuillaumeFalourd · Jun 2, 2021 · c1d2f0e · c1d2f0e
1 parent 77a7a12
commit c1d2f0e
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 8 deletions.
diff --git a/linkedin/search/jobs/src/classes/scrap.py b/linkedin/search/jobs/src/classes/scrap.py
@@ -2,33 +2,34 @@
 import requests
 import os
 import time
+import random
 
 from bs4 import BeautifulSoup as soup
 
 def get_datas(job, city, job_link):
     job_datas = [job_link]
     try:
         for retry in range(5):
-            time.sleep(5)
+            time.sleep(random.randint(1, 3))
             page_req = requests.get(
                 url = job_link,
                 headers = {'User-agent': f'{job}_{city} bot'}
                 )
             if page_req.status_code == "429":
                 print(f"\033[1;36m\n⚠️  Too many requests - Retrying with other IP...\033[0m")
                 change_ip(random.randint(1, 30))
-                time.sleep(3)
+                time.sleep(random.randint(1, 3))
                 continue
             else:
                 page_req.raise_for_status()
                 # Parse HTML
                 job_soup = soup(page_req.text, 'html.parser')
                 contents = job_soup.findAll('div', {'class': 'topcard__content-left'})[0:]
                 if len(contents) == 0:
-                    time.sleep(3)
+                    time.sleep(random.randint(1, 3))
                     continue
                 else:
-                    print(f"\033[1;36m\n⚠️  Couldn't retrieve all datas for the job link: {job_link}\033[0m")
+                    # Couldn't retrieve all datas for the job
                     break
 
         if len(contents) != 0:
@@ -48,7 +49,7 @@ def get_datas(job, city, job_link):
 
                 # Scraping Job Title
                 for title in content.findAll('h1', {'class': 'topcard__title'})[0:]:
-                    print(f'\033[0;32m📌 {title.text}\033[0m', f'\033[1;33m- {org}\033[0m')
+                    print(f'\n\033[0;32m📌 {title.text}\033[0m', f'\033[1;33m- {org}\033[0m')
                     job_datas.append(title.text.replace(',', '.'))
 
                 for location in content.findAll('span', {'class': 'topcard__flavor topcard__flavor--bullet'})[0:]:
@@ -80,9 +81,9 @@ def get_datas(job, city, job_link):
             for criteria in job_soup.findAll('span', {'class': 'job-criteria__text job-criteria__text--criteria'})[:4]:
                 job_datas.append(criteria.text)
         else:
-            print(f"\033[1;36m⚠️  Saving (only) the job link on the CSV file.\033[0m")
+            print(f"\n\033[1;36m⚠️  Saving (only) the job link on the CSV file.\033[0m")
 
-        print(f"\033[0;34mExtracted Datas: {job_datas} \033[0m")
+        # print(f"\033[0;34mExtracted Datas: {job_datas} \033[0m")
 
         if len(job_datas) < 10:
             fill_number = 10 - len(job_datas)

diff --git a/linkedin/search/jobs/src/formula/formula.py b/linkedin/search/jobs/src/formula/formula.py
@@ -29,7 +29,7 @@ def run(city, profession, send_email, email_receiver, sendgrid_api_key, sendgrid
             if len(job_links) == 0:
                 print(f"\033[1;36m\n⚠️  Couldn't extract job links list from LinkedIn, try again later!\033[0m")        
             else:
-                print(f'\033[1;33m\n🕵️  There are {len(job_links)} available {job} jobs in {city.capitalize()}.\n\033[0m')
+                print(f'\033[1;33m\n🕵️  {len(job_links)} recent {job} jobs identified in {city.capitalize()}.\n\033[0m')
 
                 # Extract Datas into a CSV file
                 csv_filename = csv.filename(job, city)