-
Notifications
You must be signed in to change notification settings - Fork 559
/
DomainRateLimiter.cs
119 lines (96 loc) · 4.51 KB
/
DomainRateLimiter.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
using Abot2.Util;
using System;
using System.Collections.Concurrent;
using System.Diagnostics;
using Serilog;
namespace Abot2.Core
{
/// <summary>
/// Rate limits or throttles on a per domain basis
/// </summary>
public interface IDomainRateLimiter
{
/// <summary>
/// If the domain of the param has been flagged for rate limiting, it will be rate limited according to the configured minimum crawl delay
/// </summary>
void RateLimit(Uri uri);
/// <summary>
/// Add a domain entry so that domain may be rate limited according the the param minumum crawl delay
/// </summary>
void AddDomain(Uri uri, long minCrawlDelayInMillisecs);
/// <summary>
/// Add/Update a domain entry so that domain may be rate limited according the the param minumum crawl delay
/// </summary>
void AddOrUpdateDomain(Uri uri, long minCrawlDelayInMillisecs);
/// <summary>
/// Remove a domain entry so that it will no longer be rate limited
/// </summary>
void RemoveDomain(Uri uri);
}
public class DomainRateLimiter : IDomainRateLimiter
{
protected ConcurrentDictionary<string, IRateLimiter> _rateLimiterLookup = new ConcurrentDictionary<string, IRateLimiter>();
long _defaultMinCrawlDelayInMillisecs;
public DomainRateLimiter(long minCrawlDelayMillisecs)
{
if (minCrawlDelayMillisecs < 0)
throw new ArgumentException("minCrawlDelayMillisecs");
if(minCrawlDelayMillisecs > 0)
_defaultMinCrawlDelayInMillisecs = minCrawlDelayMillisecs + 20;//IRateLimiter is always a little under so adding a little more time
}
public void RateLimit(Uri uri)
{
if (uri == null)
throw new ArgumentNullException("uri");
var rateLimiter = GetRateLimiter(uri, _defaultMinCrawlDelayInMillisecs);
if (rateLimiter == null)
return;
var timer = Stopwatch.StartNew();
rateLimiter.WaitToProceed();
timer.Stop();
if(timer.ElapsedMilliseconds > 10)
Log.Debug("Rate limited [{0}] [{1}] milliseconds", uri.AbsoluteUri, timer.ElapsedMilliseconds);
}
public void AddDomain(Uri uri, long minCrawlDelayInMillisecs)
{
if (uri == null)
throw new ArgumentNullException("uri");
if (minCrawlDelayInMillisecs < 1)
throw new ArgumentException("minCrawlDelayInMillisecs");
GetRateLimiter(uri, Math.Max(minCrawlDelayInMillisecs, _defaultMinCrawlDelayInMillisecs));//just calling this method adds the new domain
}
public void AddOrUpdateDomain(Uri uri, long minCrawlDelayInMillisecs)
{
if (uri == null)
throw new ArgumentNullException("uri");
if (minCrawlDelayInMillisecs < 1)
throw new ArgumentException("minCrawlDelayInMillisecs");
var delayToUse = Math.Max(minCrawlDelayInMillisecs, _defaultMinCrawlDelayInMillisecs);
if (delayToUse > 0)
{
var rateLimiter = new RateLimiter(1, TimeSpan.FromMilliseconds(delayToUse));
_rateLimiterLookup.AddOrUpdate(uri.Authority, rateLimiter, (key, oldValue) => rateLimiter);
Log.Debug("Added/updated domain [{0}] with minCrawlDelayInMillisecs of [{1}] milliseconds", uri.Authority, delayToUse);
}
}
public void RemoveDomain(Uri uri)
{
IRateLimiter rateLimiter;
_rateLimiterLookup.TryRemove(uri.Authority, out rateLimiter);
}
protected virtual IRateLimiter GetRateLimiter(Uri uri, long minCrawlDelayInMillisecs)
{
IRateLimiter rateLimiter;
_rateLimiterLookup.TryGetValue(uri.Authority, out rateLimiter);
if (rateLimiter == null && minCrawlDelayInMillisecs > 0)
{
rateLimiter = new RateLimiter(1, TimeSpan.FromMilliseconds(minCrawlDelayInMillisecs));
if (_rateLimiterLookup.TryAdd(uri.Authority, rateLimiter))
Log.Debug("Added new domain [{0}] with minCrawlDelayInMillisecs of [{1}] milliseconds", uri.Authority, minCrawlDelayInMillisecs);
else
Log.Warning("Unable to add new domain [{0}] with minCrawlDelayInMillisecs of [{1}] milliseconds", uri.Authority, minCrawlDelayInMillisecs);
}
return rateLimiter;
}
}
}