Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve sensor based on feeds provided by the community in GH issues #92

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
*.pyc
build/
*.egg-info/
custom_components/hacs
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ repos:
- id: mypy
additional_dependencies:
[
homeassistant-stubs,
homeassistant-stubs==2023.8.1,
voluptuous-stubs,
types-python-dateutil,
types-PyYAML,
Expand Down
84 changes: 57 additions & 27 deletions custom_components/feedparser/sensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import email.utils
import logging
import re
from datetime import datetime, timedelta, timezone
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -173,11 +174,19 @@ def _generate_sensor_entry(
if key in ["published", "updated", "created", "expired"]:
parsed_date: datetime = self._parse_date(value)
sensor_entry[key] = parsed_date.strftime(self._date_format)
elif key == "image":
sensor_entry["image"] = value.get("href")
else:
sensor_entry[key] = value

self._process_image(feed_entry, sensor_entry)

if "image" in self._inclusions and "image" not in sensor_entry:
sensor_entry["image"] = self._process_image(feed_entry)
if (
"link" in self._inclusions
and "link" not in sensor_entry
and (processed_link := self._process_link(feed_entry))
):
sensor_entry["link"] = processed_link
_LOGGER.debug("Feed %s: Generated sensor entry: %s", self.name, sensor_entry)
return sensor_entry

Expand All @@ -194,42 +203,63 @@ def _parse_date(self: FeedParserSensor, date: str) -> datetime:
self.name,
date,
)
# best effort to parse the date using dateutil
parsed_time = parser.parse(date)

if not parsed_time.tzinfo:
# best effort to parse the date using dateutil
parsed_time = parser.parse(date)
if not parsed_time.tzname():
# replace tzinfo with UTC offset if tzinfo does not contain a TZ name
parsed_time = parsed_time.replace(
tzinfo=timezone(parsed_time.utcoffset()), # type: ignore[arg-type]
if not parsed_time.tzinfo:
msg = (
f"Feed {self.name}: Unable to parse date {date}, "
"caused by an incorrect date format"
)
raise ValueError(msg)
if not parsed_time.tzname():
# replace tzinfo with UTC offset if tzinfo does not contain a TZ name
parsed_time = parsed_time.replace(
tzinfo=timezone(parsed_time.utcoffset()), # type: ignore[arg-type]
)

if self._local_time:
parsed_time = dt.as_local(parsed_time)
_LOGGER.debug("Feed %s: Parsed date: %s", self.name, parsed_time)
return parsed_time

def _process_image(
self: FeedParserSensor,
feed_entry: FeedParserDict,
sensor_entry: dict[str, str],
) -> None:
if "image" in self._inclusions and "image" not in sensor_entry.keys():
if "enclosures" in feed_entry:
images = [
enc
for enc in feed_entry["enclosures"]
if enc.type.startswith("image/")
]
else:
images = []
def _process_image(self: FeedParserSensor, feed_entry: FeedParserDict) -> str:
if "enclosures" in feed_entry and feed_entry["enclosures"]:
images = [
enc for enc in feed_entry["enclosures"] if enc.type.startswith("image/")
]
if images:
sensor_entry["image"] = images[0]["href"] # pick the first image found
else:
_LOGGER.debug(
"Feed %s: Image is in inclusions, but no image was found for %s",
# pick the first image found
return images[0]["href"]
elif "summary" in feed_entry:
images = re.findall(
r"<img.+?src=\"(.+?)\".+?>",
feed_entry["summary"],
)
if images:
# pick the first image found
return images[0]
_LOGGER.debug(
"Feed %s: Image is in inclusions, but no image was found for %s",
self.name,
feed_entry,
)
return DEFAULT_THUMBNAIL # use default image if no image found

def _process_link(self: FeedParserSensor, feed_entry: FeedParserDict) -> str:
"""Return link from feed entry."""
if "links" in feed_entry:
if len(feed_entry["links"]) > 1:
_LOGGER.warning(
"Feed %s: More than one link found for %s. Using the first link.",
self.name,
feed_entry,
)
sensor_entry[
"image"
] = DEFAULT_THUMBNAIL # use default image if no image found
return feed_entry["links"][0]["href"]
return ""

@property
def feed_entries(self: FeedParserSensor) -> list[dict[str, str]]:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dependencies = ["python-dateutil", "feedparser==6.0.10", "homeassistant"]
[project.optional-dependencies]
dev = [
"black",
"homeassistant-stubs",
"homeassistant-stubs==2023.8.1",
"pytest==7.4.0",
"mypy",
"ruff",
Expand Down
37 changes: 37 additions & 0 deletions tests/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,43 @@
"show_topn": 1,
},
},
{
"has_images": True,
"all_entries_have_images": False,
"has_unique_links": False,
"sensor_config": {
"name": "buienradar_nl",
"feed_url": "https://data.buienradar.nl/1.0/feed/xml/rssbuienradar",
"date_format": "%Y-%m-%d %H:%M:%S.%f",
},
},
{
"has_images": False,
"has_unique_links": False,
"sensor_config": {
"name": "skolmaten_se_ede_skola",
"feed_url": "https://skolmaten.se/ede-skola/rss/weeks/?limit=2",
"inclusions": ["title", "link", "published", "summary"],
},
},
{
"has_images": False,
"sensor_config": {
"name": "api_met_no_metalerts",
"feed_url": "https://api.met.no/weatherapi/metalerts/1.1/",
"inclusions": ["title", "link", "published", "summary"],
},
},
{
"has_images": True,
"has_unique_images": False,
"has_unique_titles": False,
"sensor_config": {
"name": "anp_nieuws",
"feed_url": "https://www.omnycontent.com/d/playlist/56ccbbb7-0ff7-4482-9d99-a88800f49f6c/a49c87f6-d567-4189-8692-a8e2009eaf86/9fea2041-fccd-4fcf-8cec-a8e2009eeca2/podcast.rss",
"inclusions": ["title", "link", "published", "summary"],
},
},
]

DEFAULT_EXCLUSIONS: list[str] = []
Expand Down
16 changes: 16 additions & 0 deletions tests/data/anp_nieuws.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"has_images": true,
"has_unique_images": false,
"has_unique_titles": false,
"sensor_config": {
"name": "anp_nieuws",
"feed_url": "https://www.omnycontent.com/d/playlist/56ccbbb7-0ff7-4482-9d99-a88800f49f6c/a49c87f6-d567-4189-8692-a8e2009eaf86/9fea2041-fccd-4fcf-8cec-a8e2009eeca2/podcast.rss",
"inclusions": [
"title",
"link",
"published",
"summary"
]
},
"download_date": "2023-08-18T09:22:14.164244+00:00"
}
Loading