Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove old feed metrics and add a new one to track recently failing feeds. #997

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/997.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a new metric `hookshot_feeds_failing_recent` which only includes feeds that have recently started failing.
1 change: 1 addition & 0 deletions changelog.d/997.removal
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Remove legacy & deprecated field metrics.
7 changes: 1 addition & 6 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,14 @@ Below is the generated list of Prometheus metrics for Hookshot.
| hookshot_feeds_count | Number of RSS feeds that hookshot is subscribed to | |
| hookshot_feeds_fetch_ms | Time taken for hookshot to fetch all feeds | |
| hookshot_feeds_failing | Number of RSS feeds that hookshot is failing to read | reason |
| hookshot_feeds_failing_recent | Number of RSS feeds that hookshot is failing to read that have begun to fail recently | reason |
## matrix
| Metric | Help | Labels |
|--------|------|--------|
| matrix_api_calls | Number of Matrix client API calls made | method |
| matrix_api_calls_failed | Number of Matrix client API calls which failed | method |
| matrix_appservice_events | Number of events sent over the AS API | |
| matrix_appservice_decryption_failed | Number of events sent over the AS API that failed to decrypt | |
## feed
| Metric | Help | Labels |
|--------|------|--------|
| feed_count | (Deprecated) Number of RSS feeds that hookshot is subscribed to | |
| feed_fetch_ms | (Deprecated) Time taken for hookshot to fetch all feeds | |
| feed_failing | (Deprecated) Number of RSS feeds that hookshot is failing to read | reason |
## process
| Metric | Help | Labels |
|--------|------|--------|
Expand Down
8 changes: 2 additions & 6 deletions src/Metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@ export class Metrics {
public readonly feedsCount;
public readonly feedFetchMs;
public readonly feedsFailing;
public readonly feedsCountDeprecated;
public readonly feedsFetchMsDeprecated;
public readonly feedsFailingDeprecated;
public readonly feedsFailingRecent;


constructor(private registry: Registry = register) {
Expand All @@ -55,9 +53,7 @@ export class Metrics {
this.feedsCount = new Gauge({ name: "hookshot_feeds_count", help: "Number of RSS feeds that hookshot is subscribed to", labelNames: [], registers: [this.registry]});
this.feedFetchMs = new Gauge({ name: "hookshot_feeds_fetch_ms", help: "Time taken for hookshot to fetch all feeds", labelNames: [], registers: [this.registry]});
this.feedsFailing = new Gauge({ name: "hookshot_feeds_failing", help: "Number of RSS feeds that hookshot is failing to read", labelNames: ["reason"], registers: [this.registry]});
this.feedsCountDeprecated = new Gauge({ name: "feed_count", help: "(Deprecated) Number of RSS feeds that hookshot is subscribed to", labelNames: [], registers: [this.registry]});
this.feedsFetchMsDeprecated = new Gauge({ name: "feed_fetch_ms", help: "(Deprecated) Time taken for hookshot to fetch all feeds", labelNames: [], registers: [this.registry]});
this.feedsFailingDeprecated = new Gauge({ name: "feed_failing", help: "(Deprecated) Number of RSS feeds that hookshot is failing to read", labelNames: ["reason"], registers: [this.registry]});
this.feedsFailingRecent = new Gauge({ name: "hookshot_feeds_failing_recent", help: "Number of RSS feeds that hookshot is failing to read that have begun to fail recently", labelNames: ["reason"], registers: [this.registry]});

collectDefaultMetrics({
register: this.registry,
Expand Down
26 changes: 14 additions & 12 deletions src/feeds/FeedReader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ const BACKOFF_TIME_MAX_MS = 24 * 60 * 60 * 1000;
const BACKOFF_POW = 1.05;
const BACKOFF_TIME_MS = 5 * 1000;

/**
* If a feed fails this many times or more, we consider it effectively dead
* and while we might retry it, it won't be counted on the metrics.
*/
const FEEDS_FAILING_METRIC_MAX_DORMANT = 25;

export class FeedError extends Error {
constructor(
public url: string,
Expand Down Expand Up @@ -88,9 +94,9 @@ export class FeedReader {
// A set of last modified times for each url.
private cacheTimes: Map<string, { etag?: string, lastModified?: string}> = new Map();

// Reason failures to url map.
private feedsFailingHttp = new Set();
private feedsFailingParsing = new Set();
// Reason failures to url map. // url -> fail count.
private feedsFailingHttp = new Map<string, number>();
private feedsFailingParsing = new Map<string, number>();

static readonly seenEntriesEventType = "uk.half-shot.matrix-hookshot.feed.reader.seenEntries";

Expand Down Expand Up @@ -128,7 +134,6 @@ export class FeedReader {
this.feedQueue.push(normalisedUrl);
feeds.add(normalisedUrl);
Metrics.feedsCount.inc();
Metrics.feedsCountDeprecated.inc();
}
});
connectionManager.on('connection-removed', removed => {
Expand Down Expand Up @@ -156,7 +161,6 @@ export class FeedReader {
this.feedsFailingHttp.delete(normalisedUrl);
this.feedsFailingParsing.delete(normalisedUrl);
Metrics.feedsCount.dec();
Metrics.feedsCountDeprecated.dec();
});

log.debug('Loaded feed URLs:', [...feeds].join(', '));
Expand Down Expand Up @@ -187,7 +191,6 @@ export class FeedReader {
}
this.feedQueue.populate([...observedFeedUrls]);
Metrics.feedsCount.set(observedFeedUrls.size);
Metrics.feedsCountDeprecated.set(observedFeedUrls.size);
return observedFeedUrls;
}

Expand Down Expand Up @@ -285,9 +288,9 @@ export class FeedReader {
} catch (err: unknown) {
// TODO: Proper Rust Type error.
if ((err as Error).message.includes('Failed to fetch feed due to HTTP')) {
this.feedsFailingHttp.add(url);
this.feedsFailingHttp.set(url, (this.feedsFailingHttp.get(url) ?? 0) + 1);
} else {
this.feedsFailingParsing.add(url);
this.feedsFailingParsing.set(url, (this.feedsFailingParsing.get(url) ?? 0) + 1);
}
const backoffDuration = this.feedQueue.backoff(url);
const error = err instanceof Error ? err : new Error(`Unknown error ${err}`);
Expand All @@ -304,10 +307,10 @@ export class FeedReader {
public async pollFeeds(workerId: number): Promise<void> {

// Update on each iteration
Metrics.feedsFailing.set({ reason: "http" }, this.feedsFailingHttp.size );
Metrics.feedsFailing.set({ reason: "http" }, this.feedsFailingHttp.size);
Metrics.feedsFailing.set({ reason: "parsing" }, this.feedsFailingParsing.size);
Metrics.feedsFailingDeprecated.set({ reason: "http" }, this.feedsFailingHttp.size );
Metrics.feedsFailingDeprecated.set({ reason: "parsing" }, this.feedsFailingParsing.size);
Metrics.feedsFailingRecent.set({ reason: "http" }, [...this.feedsFailingHttp.values()].filter(v => v < FEEDS_FAILING_METRIC_MAX_DORMANT).length);
Metrics.feedsFailingRecent.set({ reason: "parsing" }, [...this.feedsFailingParsing.values()].filter(v => v < FEEDS_FAILING_METRIC_MAX_DORMANT).length);

log.debug(`Checking for updates in ${this.feedQueue.length()} RSS/Atom feeds (worker: ${workerId})`);

Expand All @@ -322,7 +325,6 @@ export class FeedReader {
}
const elapsed = Date.now() - fetchingStarted;
Metrics.feedFetchMs.set(elapsed);
Metrics.feedsFetchMsDeprecated.set(elapsed);
sleepFor = Math.max(this.sleepingInterval - elapsed, 0);
log.debug(`Feed fetching took ${elapsed / 1000}s, sleeping for ${sleepFor / 1000}s`);

Expand Down
Loading