forked from h43z/rssify
-
Notifications
You must be signed in to change notification settings - Fork 5
/
rssify.py
77 lines (64 loc) · 2.16 KB
/
rssify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
from urllib.parse import urljoin
from datetime import datetime
import requests
from feedgen.feed import FeedGenerator
from bs4 import BeautifulSoup
from pytz import timezone
title = os.environ.get('TITLE')
subtitle = os.environ.get('SUBTITLE')
url = os.environ.get('URL')
author_name = os.environ.get('AUTHOR_NAME')
author_email = os.environ.get('AUTHOR_EMAIL')
language = os.environ.get('LANGUAGE')
item_title_selector = os.environ.get('ITEM_TITLE_CSS')
item_url_selector = os.environ.get('ITEM_URL_CSS')
item_author_selector = os.environ.get('ITEM_AUTHOR_CSS')
item_description_selector = os.environ.get('ITEM_DESCRIPTION_CSS')
item_date_selector = os.environ.get('ITEM_DATE_CSS')
item_date_format = os.environ.get('ITEM_DATE_FORMAT')
item_timezone = os.environ.get('ITEM_TIMEZONE')
r = requests.get(url)
soup = BeautifulSoup(r.text, 'lxml')
titles = soup.select(item_title_selector)
urls = soup.select(item_url_selector)
descriptions = []
if item_description_selector:
descriptions = soup.select(item_description_selector)
authors = []
if item_author_selector:
authors = soup.select(item_author_selector)
dates = []
if item_date_selector:
dates = soup.select(item_date_selector)
fg = FeedGenerator()
fg.id(url)
fg.title(title)
if subtitle:
fg.description(subtitle)
else:
fg.description('Generated by TabHub Rssify(https://tabhub.github.io/)')
fg.link(href='https://tabhub.github.io/', rel='alternate')
fg.language(language)
fg.author({'name': author_name, 'email': author_email})
for i in range(len(titles)):
if i > len(urls) - 1:
break
fe = fg.add_entry()
fe.title(titles[i].text)
item_url = urljoin(url, urls[i].get('href'))
fe.id(item_url)
fe.link(href=item_url, rel='alternate')
if descriptions and descriptions[i]:
fe.description(descriptions[i].text)
if authors and authors[i]:
fe.author(name=authors[i].text)
if dates and item_date_format:
date = datetime.strptime(dates[i].text.strip(), item_date_format)
else:
date = datetime.utcnow()
localtz = timezone(item_timezone)
date = localtz.localize(date)
fe.published(date)
fe.updated(date)
fg.atom_file('atom.xml')