From f21752ae2d61aebb1443b1d827e37ef3facfb3fb Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 19 Apr 2021 17:56:17 -0400 Subject: [PATCH 1/7] Use DNS record TTL for caching duration --- pkg/network/dns_cache.go | 74 +++++++--------- pkg/network/dns_cache_test.go | 149 ++++++++++++++++---------------- pkg/network/dns_parser.go | 7 +- pkg/network/dns_snooper.go | 14 +-- pkg/network/dns_snooper_test.go | 16 +--- 5 files changed, 118 insertions(+), 142 deletions(-) diff --git a/pkg/network/dns_cache.go b/pkg/network/dns_cache.go index 5c8cf5f7b62be..d3c6af195557f 100644 --- a/pkg/network/dns_cache.go +++ b/pkg/network/dns_cache.go @@ -2,7 +2,6 @@ package network import ( "sort" - "strings" "sync" "sync/atomic" "time" @@ -27,7 +26,6 @@ type reverseDNSCache struct { mux sync.Mutex data map[util.Address]*dnsCacheVal exit chan struct{} - ttl time.Duration size int // maxDomainsPerIP is the maximum number of domains mapped to a single IP @@ -35,11 +33,10 @@ type reverseDNSCache struct { oversizedLogLimit *util.LogLimit } -func newReverseDNSCache(size int, ttl, expirationPeriod time.Duration) *reverseDNSCache { +func newReverseDNSCache(size int, expirationPeriod time.Duration) *reverseDNSCache { cache := &reverseDNSCache{ data: make(map[util.Address]*dnsCacheVal), exit: make(chan struct{}), - ttl: ttl, size: size, oversizedLogLimit: util.NewLogLimit(10, time.Minute*10), maxDomainsPerIP: 1000, @@ -60,7 +57,7 @@ func newReverseDNSCache(size int, ttl, expirationPeriod time.Duration) *reverseD return cache } -func (c *reverseDNSCache) Add(translation *translation, now time.Time) bool { +func (c *reverseDNSCache) Add(translation *translation) bool { if translation == nil { return false } @@ -71,17 +68,15 @@ func (c *reverseDNSCache) Add(translation *translation, now time.Time) bool { return false } - exp := now.Add(c.ttl).UnixNano() - for _, addr := range translation.ips { + for addr, deadline := range translation.ips { val, ok := c.data[addr] if ok { - val.expiration = exp - if rejected := val.merge(translation.dns, c.maxDomainsPerIP); rejected && c.oversizedLogLimit.ShouldLog() { + if rejected := val.merge(translation.dns, deadline, c.maxDomainsPerIP); rejected && c.oversizedLogLimit.ShouldLog() { log.Warnf("%s mapped to too many domains, DNS information will be dropped (this will be logged the first 10 times, and then at most every 10 minutes)", addr) } } else { atomic.AddInt64(&c.added, 1) - c.data[addr] = &dnsCacheVal{names: []string{translation.dns}, expiration: exp} + c.data[addr] = &dnsCacheVal{names: map[string]time.Time{translation.dns: deadline}} } } @@ -91,7 +86,7 @@ func (c *reverseDNSCache) Add(translation *translation, now time.Time) bool { return true } -func (c *reverseDNSCache) Get(conns []ConnectionStats, now time.Time) map[util.Address][]string { +func (c *reverseDNSCache) Get(conns []ConnectionStats) map[util.Address][]string { if len(conns) == 0 { return nil } @@ -100,7 +95,6 @@ func (c *reverseDNSCache) Get(conns []ConnectionStats, now time.Time) map[util.A resolved = make(map[util.Address][]string) unresolved = make(map[util.Address]struct{}) oversized = make(map[util.Address]struct{}) - expiration = now.Add(c.ttl).UnixNano() ) collectNamesForIP := func(addr util.Address) { @@ -116,7 +110,7 @@ func (c *reverseDNSCache) Get(conns []ConnectionStats, now time.Time) map[util.A return } - names := c.getNamesForIP(addr, expiration) + names := c.getNamesForIP(addr) if len(names) == 0 { unresolved[addr] = struct{}{} } else if len(names) == c.maxDomainsPerIP { @@ -170,14 +164,18 @@ func (c *reverseDNSCache) Close() { } func (c *reverseDNSCache) Expire(now time.Time) { - deadline := now.UnixNano() expired := 0 c.mux.Lock() for addr, val := range c.data { - if val.expiration > deadline { - continue + for ip, deadline := range val.names { + if deadline.Before(now) { + delete(val.names, ip) + } } + if len(val.names) != 0 { + continue + } expired++ delete(c.data, addr) } @@ -192,61 +190,47 @@ func (c *reverseDNSCache) Expire(now time.Time) { ) } -func (c *reverseDNSCache) getNamesForIP(ip util.Address, updatedTTL int64) []string { +func (c *reverseDNSCache) getNamesForIP(ip util.Address) []string { val, ok := c.data[ip] if !ok { return nil } - - val.expiration = updatedTTL return val.copy() } type dnsCacheVal struct { - // opting for a []string instead of map[string]struct{} since common case is len(names) == 1 - names []string - expiration int64 + names map[string]time.Time } -func (v *dnsCacheVal) merge(name string, maxSize int) (rejected bool) { - normalized := strings.ToLower(name) - if i := sort.SearchStrings(v.names, normalized); i < len(v.names) && v.names[i] == normalized { +func (v *dnsCacheVal) merge(name string, deadline time.Time, maxSize int) (rejected bool) { + if _, ok := v.names[name]; ok { return false } - if len(v.names) == maxSize { return true } - v.names = append(v.names, normalized) - sort.Strings(v.names) + v.names[name] = deadline return false } func (v *dnsCacheVal) copy() []string { - cpy := make([]string, len(v.names)) - copy(cpy, v.names) + cpy := make([]string, 0, len(v.names)) + for n := range v.names { + cpy = append(cpy, n) + } + sort.Strings(cpy) return cpy } type translation struct { dns string - ips []util.Address + ips map[util.Address]time.Time } -func newTranslation(domain []byte) *translation { - return &translation{ - dns: string(domain), - ips: nil, +func (t *translation) add(addr util.Address, ttl time.Duration) { + if _, ok := t.ips[addr]; ok { + return } -} - -func (t *translation) add(addr util.Address) { - for _, other := range t.ips { - if other == addr { - return - } - } - - t.ips = append(t.ips, addr) + t.ips[addr] = time.Now().Add(ttl) } diff --git a/pkg/network/dns_cache_test.go b/pkg/network/dns_cache_test.go index 68be38b8fee63..48d836f08f776 100644 --- a/pkg/network/dns_cache_test.go +++ b/pkg/network/dns_cache_test.go @@ -3,6 +3,7 @@ package network import ( "fmt" "math/rand" + "strings" "testing" "time" @@ -16,12 +17,12 @@ func TestMultipleIPsForSameName(t *testing.T) { datadog1 := util.AddressFromString("52.85.98.155") datadog2 := util.AddressFromString("52.85.98.143") - datadogIPs := newTranslation([]byte("datadoghq.com")) - datadogIPs.add(datadog1) - datadogIPs.add(datadog2) + datadogIPs := newTranslation("datadoghq.com") + datadogIPs.add(datadog1, 1*time.Minute) + datadogIPs.add(datadog2, 1*time.Minute) - cache := newReverseDNSCache(100, 1*time.Minute, disableAutomaticExpiration) - cache.Add(datadogIPs, time.Now()) + cache := newReverseDNSCache(100, disableAutomaticExpiration) + cache.Add(datadogIPs) localhost := util.AddressFromString("127.0.0.1") connections := []ConnectionStats{ @@ -29,7 +30,7 @@ func TestMultipleIPsForSameName(t *testing.T) { {Source: localhost, Dest: datadog2}, } - actual := cache.Get(connections, time.Now()) + actual := cache.Get(connections) expected := map[util.Address][]string{ datadog1: {"datadoghq.com"}, datadog2: {"datadoghq.com"}, @@ -38,49 +39,48 @@ func TestMultipleIPsForSameName(t *testing.T) { } func TestMultipleNamesForSameIP(t *testing.T) { - cache := newReverseDNSCache(100, 1*time.Minute, disableAutomaticExpiration) + cache := newReverseDNSCache(100, disableAutomaticExpiration) raddr := util.AddressFromString("172.022.116.123") - tr1 := newTranslation([]byte("i-03e46c9ff42db4abc")) - tr1.add(raddr) - tr2 := newTranslation([]byte("ip-172-22-116-123.ec2.internal")) - tr2.add(raddr) + tr1 := newTranslation("i-03e46c9ff42db4abc") + tr1.add(raddr, 1*time.Minute) + tr2 := newTranslation("ip-172-22-116-123.ec2.internal") + tr2.add(raddr, 1*time.Minute) - now := time.Now() - cache.Add(tr1, now) - cache.Add(tr2, now) + cache.Add(tr1) + cache.Add(tr2) localhost := util.AddressFromString("127.0.0.1") connections := []ConnectionStats{{Source: localhost, Dest: raddr}} - names := cache.Get(connections, now) + names := cache.Get(connections) expected := []string{"i-03e46c9ff42db4abc", "ip-172-22-116-123.ec2.internal"} assert.ElementsMatch(t, expected, names[raddr]) } func TestDNSCacheExpiration(t *testing.T) { ttl := 100 * time.Millisecond - cache := newReverseDNSCache(1000, ttl, disableAutomaticExpiration) + cache := newReverseDNSCache(1000, disableAutomaticExpiration) t1 := time.Now() laddr1 := util.AddressFromString("127.0.0.1") raddr1 := util.AddressFromString("192.168.0.1") // host-a - hostA := newTranslation([]byte("host-a")) - hostA.add(raddr1) + hostA := newTranslation("host-a") + hostA.add(raddr1, ttl+20*time.Millisecond) laddr2 := util.AddressFromString("127.0.0.1") raddr2 := util.AddressFromString("192.168.0.2") // host-b - hostB := newTranslation([]byte("host-b")) - hostB.add(raddr2) + hostB := newTranslation("host-b") + hostB.add(raddr2, ttl+20*time.Millisecond) laddr3 := util.AddressFromString("127.0.0.1") raddr3 := util.AddressFromString("192.168.0.3") // host-c - hostC := newTranslation([]byte("host-c")) - hostC.add(raddr3) + hostC := newTranslation("host-c") + hostC.add(raddr3, ttl) - cache.Add(hostA, t1) - cache.Add(hostB, t1) - cache.Add(hostC, t1) + cache.Add(hostA) + cache.Add(hostB) + cache.Add(hostC) assert.Equal(t, 3, cache.Len()) // All entries should remain present (t2 < t1 + ttl) @@ -93,7 +93,7 @@ func TestDNSCacheExpiration(t *testing.T) { {Source: laddr1, Dest: raddr1}, {Source: laddr2, Dest: raddr2}, } - cache.Get(stats, t2) + cache.Get(stats) // Only IP from host-c should have expired t3 := t1.Add(ttl + 10*time.Millisecond) @@ -105,7 +105,7 @@ func TestDNSCacheExpiration(t *testing.T) { {Source: laddr2, Dest: raddr2}, {Source: laddr3, Dest: raddr3}, } - names := cache.Get(stats, t2) + names := cache.Get(stats) assert.Contains(t, names[raddr1], "host-a") assert.Contains(t, names[raddr2], "host-b") assert.Nil(t, names[raddr3]) @@ -118,12 +118,12 @@ func TestDNSCacheExpiration(t *testing.T) { func TestDNSCacheTelemetry(t *testing.T) { ttl := 100 * time.Millisecond - cache := newReverseDNSCache(1000, ttl, disableAutomaticExpiration) + cache := newReverseDNSCache(1000, disableAutomaticExpiration) t1 := time.Now() - translation := newTranslation([]byte("host-a")) - translation.add(util.AddressFromString("192.168.0.1")) - cache.Add(translation, t1) + translation := newTranslation("host-a") + translation.add(util.AddressFromString("192.168.0.1"), ttl) + cache.Add(translation) expected := map[string]int64{ "lookups": 0, @@ -147,7 +147,7 @@ func TestDNSCacheTelemetry(t *testing.T) { } // Attempt to resolve IPs - cache.Get(conns, t1) + cache.Get(conns) expected = map[string]int64{ "lookups": 3, // 127.0.0.1, 192.168.0.1, 192.168.0.2 "resolved": 1, // 192.168.0.1 @@ -174,9 +174,8 @@ func TestDNSCacheTelemetry(t *testing.T) { func TestDNSCacheMerge(t *testing.T) { ttl := 100 * time.Millisecond - cache := newReverseDNSCache(1000, ttl, disableAutomaticExpiration) + cache := newReverseDNSCache(1000, disableAutomaticExpiration) - ts := time.Now() conns := []ConnectionStats{ { Source: util.AddressFromString("127.0.0.1"), @@ -184,66 +183,62 @@ func TestDNSCacheMerge(t *testing.T) { }, } - t1 := newTranslation([]byte("host-b")) - t1.add(util.AddressFromString("192.168.0.1")) - cache.Add(t1, ts) - res := cache.Get(conns, ts) + t1 := newTranslation("host-b") + t1.add(util.AddressFromString("192.168.0.1"), ttl) + cache.Add(t1) + res := cache.Get(conns) assert.Equal(t, []string{"host-b"}, res[util.AddressFromString("192.168.0.1")]) - t2 := newTranslation([]byte("host-a")) - t2.add(util.AddressFromString("192.168.0.1")) - cache.Add(t2, ts) + t2 := newTranslation("host-a") + t2.add(util.AddressFromString("192.168.0.1"), ttl) + cache.Add(t2) - t3 := newTranslation([]byte("host-b")) - t3.add(util.AddressFromString("192.168.0.1")) - cache.Add(t3, ts) + t3 := newTranslation("host-b") + t3.add(util.AddressFromString("192.168.0.1"), ttl) + cache.Add(t3) - res = cache.Get(conns, ts) + res = cache.Get(conns) assert.Equal(t, []string{"host-a", "host-b"}, res[util.AddressFromString("192.168.0.1")]) } func TestDNSCacheMerge_MixedCaseNames(t *testing.T) { ttl := 100 * time.Millisecond - cache := newReverseDNSCache(1000, ttl, disableAutomaticExpiration) + cache := newReverseDNSCache(1000, disableAutomaticExpiration) - ts := time.Now() conns := []ConnectionStats{ { Dest: util.AddressFromString("192.168.0.1"), }, } - cache.Add(&translation{ - dns: "host.name.com", - ips: []util.Address{util.AddressFromString("192.168.0.1")}, - }, ts) + tr := newTranslation("host.name.com") + tr.add(util.AddressFromString("192.168.0.1"), ttl) + cache.Add(tr) - cache.Add(&translation{ - dns: "host.NaMe.com", - ips: []util.Address{util.AddressFromString("192.168.0.1")}, - }, ts) + tr = newTranslation("host.NaMe.com") + tr.add(util.AddressFromString("192.168.0.1"), ttl) + cache.Add(tr) - cache.Add(&translation{ - dns: "HOST.NAME.CoM", - ips: []util.Address{util.AddressFromString("192.168.0.1")}, - }, ts) + tr = newTranslation("HOST.NAME.CoM") + tr.add(util.AddressFromString("192.168.0.1"), ttl) + cache.Add(tr) - res := cache.Get(conns, ts) + res := cache.Get(conns) assert.Equal(t, []string{"host.name.com"}, res[util.AddressFromString("192.168.0.1")]) } func TestGetOversizedDNS(t *testing.T) { - cache := newReverseDNSCache(1000, time.Hour, time.Minute) + cache := newReverseDNSCache(1000, time.Minute) cache.maxDomainsPerIP = 10 addr := util.AddressFromString("192.168.0.1") - now := time.Now() + exp := time.Now().Add(1 * time.Hour) for i := 0; i < 5; i++ { cache.Add(&translation{ dns: fmt.Sprintf("%d.host.com", i), - ips: []util.Address{addr}, - }, now) + ips: map[util.Address]time.Time{addr: exp}, + }) } conns := []ConnectionStats{ @@ -252,15 +247,15 @@ func TestGetOversizedDNS(t *testing.T) { }, } - result := cache.Get(conns, now) + result := cache.Get(conns) assert.Len(t, result[addr], 5) assert.Len(t, cache.data[addr].names, 5) for i := 5; i < 100; i++ { cache.Add(&translation{ dns: fmt.Sprintf("%d.host.com", i), - ips: []util.Address{addr}, - }, now) + ips: map[util.Address]time.Time{addr: exp}, + }) } conns = []ConnectionStats{ @@ -269,7 +264,7 @@ func TestGetOversizedDNS(t *testing.T) { }, } - result = cache.Get(conns, now) + result = cache.Get(conns) assert.Len(t, result[addr], 0) assert.Len(t, cache.data[addr].names, 10) } @@ -279,17 +274,16 @@ func BenchmarkDNSCacheGet(b *testing.B) { // Instantiate cache and add numIPs to it var ( - cache = newReverseDNSCache(numIPs, 100*time.Millisecond, disableAutomaticExpiration) + cache = newReverseDNSCache(numIPs, disableAutomaticExpiration) added = make([]util.Address, 0, numIPs) addrGen = randomAddressGen() - now = time.Now() ) for i := 0; i < numIPs; i++ { address := addrGen() added = append(added, address) - translation := newTranslation([]byte("foo.local")) - translation.add(address) - cache.Add(translation, now) + translation := newTranslation("foo.local") + translation.add(address, 100*time.Millisecond) + cache.Add(translation) } // Benchmark Get operation with different resolve ratios @@ -299,7 +293,7 @@ func BenchmarkDNSCacheGet(b *testing.B) { b.ResetTimer() b.ReportAllocs() for i := 0; i < b.N; i++ { - _ = cache.Get(stats, now) + _ = cache.Get(stats) } }) } @@ -337,3 +331,10 @@ func payloadGen(size int, resolveRatio float64, added []util.Address) []Connecti return stats } + +func newTranslation(domain string) *translation { + return &translation{ + dns: strings.ToLower(domain), + ips: make(map[util.Address]time.Time), + } +} diff --git a/pkg/network/dns_parser.go b/pkg/network/dns_parser.go index 19a8946978a56..45c514a374199 100644 --- a/pkg/network/dns_parser.go +++ b/pkg/network/dns_parser.go @@ -2,6 +2,8 @@ package network import ( "bytes" + "strings" + "time" "github.com/DataDog/datadog-agent/pkg/process/util" "github.com/google/gopacket" @@ -110,7 +112,6 @@ func (p *dnsParser) ParseInto(data []byte, t *translation, pktInfo *dnsPacketInf pktInfo.key.clientPort = uint16(p.udpPayload.SrcPort) } else { pktInfo.key.clientPort = uint16(p.udpPayload.DstPort) - } pktInfo.key.protocol = UDP case layers.LayerTypeTCP: @@ -172,7 +173,7 @@ func (p *dnsParser) parseAnswerInto( // Get IPs p.extractIPsInto(alias, domainQueried, dns.Answers, t) p.extractIPsInto(alias, domainQueried, dns.Additionals, t) - t.dns = string(domainQueried) + t.dns = strings.ToLower(string(domainQueried)) pktInfo.pktType = SuccessfulResponse return nil @@ -200,7 +201,7 @@ func (*dnsParser) extractIPsInto(alias, domainQueried []byte, records []layers.D if bytes.Equal(domainQueried, record.Name) || (alias != nil && bytes.Equal(alias, record.Name)) { - t.add(util.AddressFromNetIP(record.IP)) + t.add(util.AddressFromNetIP(record.IP), time.Duration(record.TTL)*time.Second) } } } diff --git a/pkg/network/dns_snooper.go b/pkg/network/dns_snooper.go index c0ee585960050..b9ba918bbf848 100644 --- a/pkg/network/dns_snooper.go +++ b/pkg/network/dns_snooper.go @@ -12,7 +12,6 @@ import ( ) const ( - dnsCacheTTL = 3 * time.Minute dnsCacheExpirationPeriod = 1 * time.Minute dnsCacheSize = 100000 ) @@ -64,7 +63,7 @@ type PacketSource interface { // NewSocketFilterSnooper returns a new SocketFilterSnooper func NewSocketFilterSnooper(cfg *config.Config, source PacketSource) (*SocketFilterSnooper, error) { - cache := newReverseDNSCache(dnsCacheSize, dnsCacheTTL, dnsCacheExpirationPeriod) + cache := newReverseDNSCache(dnsCacheSize, dnsCacheExpirationPeriod) var statKeeper *dnsStatKeeper if cfg.CollectDNSStats { statKeeper = newDNSStatkeeper(cfg.DNSTimeout, cfg.MaxDNSStats) @@ -103,7 +102,7 @@ func NewSocketFilterSnooper(cfg *config.Config, source PacketSource) (*SocketFil // Resolve IPs to DNS addresses func (s *SocketFilterSnooper) Resolve(connections []ConnectionStats) map[util.Address][]string { - return s.cache.Get(connections, time.Now()) + return s.cache.Get(connections) } // GetDNSStats gets the latest DNSStats keyed by unique DNSKey, and domain @@ -174,7 +173,7 @@ func (s *SocketFilterSnooper) processPacket(data []byte, ts time.Time) error { } if pktInfo.pktType == SuccessfulResponse { - s.cache.Add(t, time.Now()) + s.cache.Add(t) atomic.AddInt64(&s.successes, 1) } else if pktInfo.pktType == FailedResponse { atomic.AddInt64(&s.errors, 1) @@ -232,9 +231,10 @@ func (s *SocketFilterSnooper) getCachedTranslation() *translation { // Recycle buffer if necessary if t.ips == nil || len(t.ips) > maxIPBufferSize { - t.ips = make([]util.Address, 30) + t.ips = make(map[util.Address]time.Time, 30) + } + for k := range t.ips { + delete(t.ips, k) } - t.ips = t.ips[:0] - return t } diff --git a/pkg/network/dns_snooper_test.go b/pkg/network/dns_snooper_test.go index 77aa6a26d601b..cd0a95aca5f64 100644 --- a/pkg/network/dns_snooper_test.go +++ b/pkg/network/dns_snooper_test.go @@ -109,19 +109,9 @@ func checkSnooping(t *testing.T, destIP string, reverseDNS *SocketFilterSnooper) destAddr := util.AddressFromString(destIP) srcAddr := util.AddressFromString("127.0.0.1") - timeout := time.After(1 * time.Second) -Loop: - // Wait until DNS entry becomes available (with a timeout) - for { - select { - case <-timeout: - break Loop - default: - if reverseDNS.cache.Len() >= 1 { - break Loop - } - } - } + require.Eventually(t, func() bool { + return reverseDNS.cache.Len() >= 1 + }, 1*time.Second, 10*time.Millisecond) // Verify that the IP from the connections above maps to the right name payload := []ConnectionStats{{Source: srcAddr, Dest: destAddr}} From a9a657993e3150bfc98146084d961c7b8cd8527e Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Tue, 20 Apr 2021 13:21:06 -0400 Subject: [PATCH 2/7] Add inUse flag that keeps cache entries around until they are no longer used --- pkg/network/dns_cache.go | 18 +++++++++++++++--- pkg/network/dns_cache_test.go | 10 ++++++++-- pkg/network/state.go | 2 +- 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/pkg/network/dns_cache.go b/pkg/network/dns_cache.go index d3c6af195557f..6e8929e5f874f 100644 --- a/pkg/network/dns_cache.go +++ b/pkg/network/dns_cache.go @@ -76,7 +76,8 @@ func (c *reverseDNSCache) Add(translation *translation) bool { } } else { atomic.AddInt64(&c.added, 1) - c.data[addr] = &dnsCacheVal{names: map[string]time.Time{translation.dns: deadline}} + // flag as in use, so mapping survives until next time connections are queried, in case TTL is shorter + c.data[addr] = &dnsCacheVal{names: map[string]time.Time{translation.dns: deadline}, inUse: true} } } @@ -87,6 +88,13 @@ func (c *reverseDNSCache) Add(translation *translation) bool { } func (c *reverseDNSCache) Get(conns []ConnectionStats) map[util.Address][]string { + c.mux.Lock() + defer c.mux.Unlock() + + for _, val := range c.data { + val.inUse = false + } + if len(conns) == 0 { return nil } @@ -120,12 +128,10 @@ func (c *reverseDNSCache) Get(conns []ConnectionStats) map[util.Address][]string } } - c.mux.Lock() for _, conn := range conns { collectNamesForIP(conn.Source) collectNamesForIP(conn.Dest) } - c.mux.Unlock() // Update stats for telemetry atomic.AddInt64(&c.lookups, int64(len(resolved)+len(unresolved))) @@ -167,6 +173,10 @@ func (c *reverseDNSCache) Expire(now time.Time) { expired := 0 c.mux.Lock() for addr, val := range c.data { + if val.inUse { + continue + } + for ip, deadline := range val.names { if deadline.Before(now) { delete(val.names, ip) @@ -195,11 +205,13 @@ func (c *reverseDNSCache) getNamesForIP(ip util.Address) []string { if !ok { return nil } + val.inUse = true return val.copy() } type dnsCacheVal struct { names map[string]time.Time + inUse bool } func (v *dnsCacheVal) merge(name string, deadline time.Time, maxSize int) (rejected bool) { diff --git a/pkg/network/dns_cache_test.go b/pkg/network/dns_cache_test.go index 48d836f08f776..47a959cf78fe2 100644 --- a/pkg/network/dns_cache_test.go +++ b/pkg/network/dns_cache_test.go @@ -88,7 +88,7 @@ func TestDNSCacheExpiration(t *testing.T) { cache.Expire(t2) assert.Equal(t, 3, cache.Len()) - // Bump host-a and host-b expiration + // Bump host-a and host-b in-use flag stats := []ConnectionStats{ {Source: laddr1, Dest: raddr1}, {Source: laddr2, Dest: raddr2}, @@ -110,9 +110,14 @@ func TestDNSCacheExpiration(t *testing.T) { assert.Contains(t, names[raddr2], "host-b") assert.Nil(t, names[raddr3]) - // All entries should have expired by now + // entries should still be around after expiration that are referenced t4 := t3.Add(ttl) cache.Expire(t4) + assert.Equal(t, 2, cache.Len()) + + // All entries should be allowed to expire now + cache.Get([]ConnectionStats{}) + cache.Expire(t4) assert.Equal(t, 0, cache.Len()) } @@ -160,6 +165,7 @@ func TestDNSCacheTelemetry(t *testing.T) { // Expire IP t2 := t1.Add(ttl + 1*time.Millisecond) + cache.Get([]ConnectionStats{}) cache.Expire(t2) expected = map[string]int64{ "lookups": 3, diff --git a/pkg/network/state.go b/pkg/network/state.go index 1a07415723574..62927bcee562d 100644 --- a/pkg/network/state.go +++ b/pkg/network/state.go @@ -140,7 +140,7 @@ func (ns *networkState) getClients() []string { return clients } -// Connections returns the connections for the given client +// GetDelta returns the connections for the given client // If the client is not registered yet, we register it and return the connections we have in the global state // Otherwise we return both the connections with last stats and the closed connections for this client func (ns *networkState) GetDelta( From 52dd9c5322d27196a0bb354087fb0a52a349b1ef Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Tue, 20 Apr 2021 14:36:08 -0400 Subject: [PATCH 3/7] Fix UDP snoop test to be less flaky --- pkg/network/dns_snooper_test.go | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pkg/network/dns_snooper_test.go b/pkg/network/dns_snooper_test.go index cd0a95aca5f64..772a3bf7ad1f3 100644 --- a/pkg/network/dns_snooper_test.go +++ b/pkg/network/dns_snooper_test.go @@ -131,25 +131,23 @@ func TestDNSOverUDPSnooping(t *testing.T) { // skipping for now as test seems to be flaky. Should be reinserted when cause // is discovered t.Skip() - cfg := testConfig() - buf, err := netebpf.ReadBPFModule(cfg.BPFDir, false) - require.NoError(t, err) - defer buf.Close() - - m, reverseDNS := getSnooper(t, buf, false, false, 15*time.Second, false) + m, reverseDNS := initDNSTestsWithDomainCollection(t, false) defer m.Stop(manager.CleanAll) defer reverseDNS.Close() // Connect to golang.org. This will result in a DNS lookup which will be captured by SocketFilterSnooper - conn, err := net.DialTimeout("tcp", "golang.org:80", 1*time.Second) - require.NoError(t, err) - - // Get destination IP to compare against snooped DNS - destIP, _, err := net.SplitHostPort(conn.RemoteAddr().String()) - conn.Close() - require.NoError(t, err) + _, _, reps := sendDNSQueries(t, []string{"golang.org"}, validDNSServerIP, UDP) + rep := reps[0] + require.NotNil(t, rep) + require.Equal(t, rep.Rcode, mdns.RcodeSuccess) - checkSnooping(t, destIP, reverseDNS) + for _, r := range rep.Answer { + aRecord, ok := r.(*mdns.A) + require.True(t, ok) + require.True(t, mdns.NumField(aRecord) >= 1) + destIP := mdns.Field(aRecord, 1) + checkSnooping(t, destIP, reverseDNS) + } } func TestDNSOverTCPSnooping(t *testing.T) { From 856819e6d6237967e80e60d047c3522a6b6ce5cd Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Tue, 20 Apr 2021 15:45:30 -0400 Subject: [PATCH 4/7] Ensure record deadline gets updated if it already exists --- pkg/network/dns_cache.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pkg/network/dns_cache.go b/pkg/network/dns_cache.go index 6e8929e5f874f..a6272a7d7178f 100644 --- a/pkg/network/dns_cache.go +++ b/pkg/network/dns_cache.go @@ -215,7 +215,10 @@ type dnsCacheVal struct { } func (v *dnsCacheVal) merge(name string, deadline time.Time, maxSize int) (rejected bool) { - if _, ok := v.names[name]; ok { + if exp, ok := v.names[name]; ok { + if deadline.After(exp) { + v.names[name] = deadline + } return false } if len(v.names) == maxSize { From 79cd18b3c01e3158a5e4b66eb167b3909f9c803d Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Thu, 27 May 2021 14:59:54 -0700 Subject: [PATCH 5/7] Add comment about lifecycle of inUse flag --- pkg/network/dns_cache.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/network/dns_cache.go b/pkg/network/dns_cache.go index a6272a7d7178f..12b270ba43a20 100644 --- a/pkg/network/dns_cache.go +++ b/pkg/network/dns_cache.go @@ -211,6 +211,10 @@ func (c *reverseDNSCache) getNamesForIP(ip util.Address) []string { type dnsCacheVal struct { names map[string]time.Time + // inUse keeps track of whether this dns cache record is currently in use by a connection. + // This flag is reset to false every time reverseDnsCache.Get is called. + // This flag is only set to true if reverseDNSCache.getNamesForIP returns this struct. + // If inUse is set, then this record will not be expired out. inUse bool } From 9463a9adb1ff8a0f2b88f0c647c4842fd9ba94c0 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 14 Jun 2021 11:21:09 -0700 Subject: [PATCH 6/7] Ensure new TTLs or additional names keep record around until next conns query --- pkg/network/dns_cache.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/network/dns_cache.go b/pkg/network/dns_cache.go index 12b270ba43a20..629a5502cb552 100644 --- a/pkg/network/dns_cache.go +++ b/pkg/network/dns_cache.go @@ -222,6 +222,7 @@ func (v *dnsCacheVal) merge(name string, deadline time.Time, maxSize int) (rejec if exp, ok := v.names[name]; ok { if deadline.After(exp) { v.names[name] = deadline + v.inUse = true } return false } @@ -230,6 +231,7 @@ func (v *dnsCacheVal) merge(name string, deadline time.Time, maxSize int) (rejec } v.names[name] = deadline + v.inUse = true return false } From e348948e3bdad138e859b1383e62eecd4bf29712 Mon Sep 17 00:00:00 2001 From: Bryce Kahle Date: Mon, 28 Jun 2021 13:11:31 -0700 Subject: [PATCH 7/7] Remove skip for UDP DNS test --- pkg/network/dns_snooper_test.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pkg/network/dns_snooper_test.go b/pkg/network/dns_snooper_test.go index 772a3bf7ad1f3..98f96b7b5fef3 100644 --- a/pkg/network/dns_snooper_test.go +++ b/pkg/network/dns_snooper_test.go @@ -127,10 +127,6 @@ func checkSnooping(t *testing.T, destIP string, reverseDNS *SocketFilterSnooper) } func TestDNSOverUDPSnooping(t *testing.T) { - // - // skipping for now as test seems to be flaky. Should be reinserted when cause - // is discovered - t.Skip() m, reverseDNS := initDNSTestsWithDomainCollection(t, false) defer m.Stop(manager.CleanAll) defer reverseDNS.Close()