From 752ac8e65c65382731492ba7d5177d063bade3aa Mon Sep 17 00:00:00 2001 From: chenfei Date: Sat, 4 Jun 2016 19:31:34 +0800 Subject: [PATCH 1/2] =?UTF-8?q?fixbug.=20=20=E4=BF=AE=E6=AD=A3get=5Ftopics?= =?UTF-8?q?=E6=95=B0=E7=BB=84=E8=B6=8A=E7=95=8C=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- zhihu.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/zhihu.py b/zhihu.py index e00e939..99df2a3 100755 --- a/zhihu.py +++ b/zhihu.py @@ -850,15 +850,12 @@ def get_topics(self): } r = requests.get(topics_url, headers=headers, verify=False) soup = BeautifulSoup(r.content, "lxml") - for i in xrange((topics_num - 1) / 20 + 1): - if i == 0: + for offset in xrange(0, topics_num, 20): + if offset == 0: # 初始化请求 topic_list = soup.find_all("div", class_="zm-profile-section-item zg-clear") - for j in xrange(min(topics_num, 20)): - yield topic_list[j].find("strong").string.encode("utf-8") - else: + else: # 下拉刷新 post_url = topics_url _xsrf = soup.find("input", attrs={'name': '_xsrf'})["value"] - offset = i * 20 data = { '_xsrf': _xsrf, 'offset': offset, @@ -874,8 +871,10 @@ def get_topics(self): topic_data = r_post.json()["msg"][1] topic_soup = BeautifulSoup(topic_data, "lxml") topic_list = topic_soup.find_all("div", class_="zm-profile-section-item zg-clear") - for j in xrange(min(topics_num - i * 20, 20)): - yield topic_list[j].find("strong").string.encode("utf-8") + + # get topic + for topic in topic_list: + yield topic.find('strong').string.encode('utf-8') def get_asks(self): """ From 9260f74d29087efdbe4a97fe42a2bc0af1bba8f0 Mon Sep 17 00:00:00 2001 From: chenfei Date: Sat, 4 Jun 2016 20:30:35 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fixbug.=20=E4=BF=AE=E6=AD=A3=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E5=85=B3=E6=B3=A8=E8=AF=9D=E9=A2=98=E6=95=B0=E9=94=99?= =?UTF-8?q?=E8=AF=AF=E7=9A=84=E9=97=AE=E9=A2=98.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- zhihu.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/zhihu.py b/zhihu.py index 99df2a3..1740282 100755 --- a/zhihu.py +++ b/zhihu.py @@ -662,13 +662,14 @@ def get_topics_num(self): if self.soup == None: self.parser() soup = self.soup - topics_num = soup.find_all("div", class_="zm-profile-side-section-title")[1].strong.string.encode("utf-8") - I='' - for i in topics_num: - if i.isdigit(): - I=I+i - topics_num=int(I) - return topics_num + + all_tag = soup.find_all("div", class_="zm-profile-side-section-title") + for tag in all_tag: + a = tag.find('a') + if a and re.match(r"/people/.*/topics", a.attrs.get("href", '')): + return int(a.get_text().split()[0]) # a.get_text()返回 '131 个话题' + else: + return 0 def get_agree_num(self): if self.user_url == None: