From 8b8df01b41e4d66e0f9fc8c191b6f4331f16b21c Mon Sep 17 00:00:00 2001 From: Yassine Ait Jeddi <48790841+Altimis@users.noreply.github.com> Date: Wed, 20 Jan 2021 11:37:35 +0100 Subject: [PATCH] fix #37 --- .env | 4 +-- Scweet/Example.ipynb | 76 ++++++++++++++++++++++++-------------------- Scweet/debug.log | 4 +++ Scweet/utils.py | 6 ++-- 4 files changed, 50 insertions(+), 40 deletions(-) diff --git a/.env b/.env index 56a589a..7702ae1 100644 --- a/.env +++ b/.env @@ -1,2 +1,2 @@ -SCWEET_USERNAME= -SCWEET_PASSWORD= \ No newline at end of file +SCWEET_USERNAME=@arobask1 +SCWEET_PASSWORD=mmm010203 \ No newline at end of file diff --git a/Scweet/Example.ipynb b/Scweet/Example.ipynb index 064b7c3..67df7fa 100644 --- a/Scweet/Example.ipynb +++ b/Scweet/Example.ipynb @@ -313,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -518,7 +518,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "scrolled": true }, @@ -528,14 +528,13 @@ "output_type": "stream", "text": [ "Scraping on headless mode.\n", - "Crawling @NAgouzil following\n", + "Crawling @nagouzil following\n", "Found 9 following\n", "Found 38 following\n", "Found 60 following\n", - "Found 79 following\n", - "Found 81 following\n", - "Found 81 following\n", - "Crawling @Yassineaitjeddi following\n", + "Found 80 following\n", + "Found 80 following\n", + "Crawling @@yassineaitjeddi following\n", "Found 10 following\n", "Found 40 following\n", "Found 60 following\n", @@ -560,20 +559,20 @@ "Found 279 following\n", "Found 299 following\n", "Found 318 following\n", - "Found 339 following\n", + "Found 338 following\n", "Found 359 following\n", "Found 378 following\n", - "Found 380 following\n", - "Found 380 following\n", - "Crawling @LolitaPoupat following\n", + "Found 381 following\n", + "Found 381 following\n", + "Crawling @lolitapoupat following\n", "Found 10 following\n", "Found 39 following\n", "Found 39 following\n", - "Crawling @Jade_happiness following\n", + "Crawling @@Jade_happiness following\n", "Found 9 following\n", "Found 38 following\n", "Found 58 following\n", - "Found 79 following\n", + "Found 78 following\n", "Found 99 following\n", "Found 120 following\n", "Found 139 following\n", @@ -581,9 +580,9 @@ "Found 180 following\n", "Found 182 following\n", "Found 182 following\n", - "Crawling @nabila_gl following\n", + "Crawling @@Nabila_Gl following\n", "Found 9 following\n", - "Found 38 following\n", + "Found 39 following\n", "Found 60 following\n", "Found 80 following\n", "Found 100 following\n", @@ -593,15 +592,32 @@ "Found 180 following\n", "Found 200 following\n", "Found 220 following\n", - "Found 239 following\n", - "Found 260 following\n", + "Found 240 following\n", "Found 260 following\n", - "Found 294 following\n", - "Found 315 following\n", - "Found 334 following\n", - "Found 353 following\n", - "Found 363 following\n", - "Found 363 following\n" + "Found 280 following\n", + "Found 298 following\n", + "Found 318 following\n", + "Found 338 following\n", + "Found 357 following\n", + "Found 368 following\n", + "Found 368 following\n", + "Crawling @geceeekusuu following\n", + "Found 9 following\n", + "Found 38 following\n", + "Found 59 following\n", + "Found 78 following\n", + "Found 99 following\n", + "Found 120 following\n", + "Found 139 following\n", + "Found 159 following\n", + "Found 179 following\n", + "Found 200 following\n", + "Found 219 following\n", + "Found 240 following\n", + "Found 259 following\n", + "Found 280 following\n", + "Found 296 following\n", + "Found 296 following\n" ] } ], @@ -611,19 +627,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['@tweetsauce', '@jockowillink', '@brfootball', '@433', '@Ibra_official', '@HSHQ', '@Snowden', '@johnkrasinski', '@fireship_dev', '@zinebmouchrik', '@Dannmace', '@Kurz_Gesagt', '@reactjs', '@dribbble', '@UpLabs', '@sketch', '@materialdesign', '@GoogleDesign', '@ayoubagouzil', '@garyvee', '@368', '@brielarson', '@Tesla', '@MehdiElIdriss23', '@Spotify', '@DavidDobrik', '@ddlovato', '@Dave2D', '@verge', '@StephenCurry30', '@KingJames', '@olivia_holt', '@saradietschy', '@petermckinnon', '@colesprouse', '@jakerawr', '@Casey', '@hkasulka', '@MandyPandyLeigh', '@Phil_Coutinho', '@KevinHart4real', '@BillGates', '@elonmusk', '@oneplus', '@dbrand', '@MKBHD', '@UnboxTherapy', '@TheRock', '@JDMorgan', '@AADaddario', '@jimmyfallon', '@ConanOBrien', '@alexandrabreck1', '@RealHughJackman', '@MelissaBenoist', '@HARDWELL', '@TahaAlamIdrissi', '@Eminem', '@SnoopDogg', '@GiGiHadid', '@Ucefmab', '@SalmaRach', '@ultra', '@MartinGarrix', '@YouTube', '@BadoAbdo', '@iWillSmith', '@ivanrakitic', '@Adele', '@SergiRoberto10', '@taylorswift13', '@LuisSuarez9', '@selenagomez', '@KendallJenner', '@jtimberlake', '@CHANEL', '@justinbieber', '@davidguetta', '@TeamMessi', '@neymarjr', '@BarackObama']\n" - ] - } - ], + "outputs": [], "source": [ - "print(following['NAgouzil'])" + "print(following['nagouzil'])" ] }, { diff --git a/Scweet/debug.log b/Scweet/debug.log index dd64d07..4542965 100644 --- a/Scweet/debug.log +++ b/Scweet/debug.log @@ -79,3 +79,7 @@ [0119/010217.540:ERROR:directory_reader_win.cc(43)] FindFirstFile: The system cannot find the path specified. (0x3) [0119/094952.552:ERROR:directory_reader_win.cc(43)] FindFirstFile: The system cannot find the path specified. (0x3) [0119/133131.458:ERROR:directory_reader_win.cc(43)] FindFirstFile: The system cannot find the path specified. (0x3) +[0120/113020.986:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0120/113021.124:ERROR:exception_snapshot_win.cc(99)] thread ID 19800 not found in process +[0120/113020.967:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022) +[0120/113021.124:ERROR:exception_snapshot_win.cc(99)] thread ID 8272 not found in process diff --git a/Scweet/utils.py b/Scweet/utils.py index 4f72999..3f82888 100644 --- a/Scweet/utils.py +++ b/Scweet/utils.py @@ -159,8 +159,6 @@ def log_search_page(driver, start_date, end_date, lang, display_type, words, to_ end_date = "until%3A" + end_date + "%20" start_date = "since%3A" + start_date + "%20" - # to_from = str('%20'.join([from_account,to_account]))+"%20" - if display_type == "Latest" or display_type == "latest": display_type = "&f=live" elif display_type == "Image" or display_type == "image": @@ -261,7 +259,7 @@ def get_users_follow(users, headless, follow=None, verbose=1, wait=2): driver.get('https://twitter.com/' + user) sleep(random.uniform(wait-0.5, wait+0.5)) # find the following or followers button - driver.find_element_by_xpath('//a[contains(@href,"/' + user + '/' + follow + '")]/span[1]/span[1]').click() + driver.find_element_by_xpath('//a[contains(@href,"/' + follow + '")]/span[1]/span[1]').click() sleep(random.uniform(wait-0.5, wait+0.5)) # if the log in fails, find the new log in button and log in again. if check_exists_by_link_text("Log in", driver): @@ -271,7 +269,7 @@ def get_users_follow(users, headless, follow=None, verbose=1, wait=2): sleep(random.uniform(wait-0.5, wait+0.5)) driver.get('https://twitter.com/' + user) sleep(random.uniform(wait-0.5, wait+0.5)) - driver.find_element_by_xpath('//a[contains(@href,"/' + user + '/' + follow + '")]/span[1]/span[1]').click() + driver.find_element_by_xpath('//a[contains(@href,"/' + follow + '")]/span[1]/span[1]').click() sleep(random.uniform(wait-0.5, wait+0.5)) # check if we must keep scrolling scrolling = True