Skip to content

Commit

Permalink
OikotieApartment fix (#76)
Browse files Browse the repository at this point in the history
* Update listing card xpath

* Add last page wait

* Print benchmark stats in case of xfail
  • Loading branch information
jmyrberg authored Jan 6, 2023
1 parent da77abe commit 4ae1080
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 3 deletions.
4 changes: 3 additions & 1 deletion finscraper/scrapy_spiders/oikotieapartment.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ def start_requests(self):
def _get_last_page(self, driver):
logger.debug('Getting last page...')
last_page_xpath = '//span[contains(@ng-bind, "ctrl.totalPages")]'
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, last_page_xpath)))
last_page_element = driver.find_element(By.XPATH, last_page_xpath)
last_page = int(last_page_element.text.split('/')[-1].strip())
logger.debug(f'Last page found: {last_page}')
Expand Down Expand Up @@ -211,7 +213,7 @@ def _handle_pagination_page(self, request, spider, driver):
driver.get(request.url)

logger.debug('Scrolling pagination page to bottom...')
listings_xpath = '//div[contains(@class, "cards__card")]'
listings_xpath = '//div[contains(@class, "ot-card-v2__wrapper")]'
driver.execute_script("window.scrollTo(0,document.body.scrollHeight)")

logger.debug('Waiting for listings to be available...')
Expand Down
4 changes: 2 additions & 2 deletions tests/test_spiders.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,9 @@ def test_functionality(spider_cls, spider_params):
@pytest.mark.xfail(reason='Benchmark')
def test_benchmark_scrape_1_min(spider_cls, spider_params, capsys):
df = spider_cls(**spider_params).scrape(0, 60).get()
assert len(df) >= 60
with capsys.disabled():
print(f'-- {len(df)} items')
assert len(df) >= 60


@pytest.mark.parametrize('spider_cls, spider_params', other_cases,
Expand All @@ -124,6 +124,6 @@ def test_benchmark_scrape_100_items(spider_cls, spider_params, capsys):
start = time.perf_counter()
df = spider_cls(**spider_params).scrape(100).get()
elapsed_time = int(time.perf_counter() - start)
assert len(df) >= 100
with capsys.disabled():
print(f'-- {elapsed_time} seconds ({len(df)} items)')
assert len(df) >= 100

0 comments on commit 4ae1080

Please sign in to comment.