Skip to content

Commit

Permalink
Fix amqp output block on write if disconnected (influxdata#2727)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielnelson authored and jeichorn committed Jul 24, 2017
1 parent eac44be commit 823323d
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ be deprecated eventually.
- [#2450](https://github.com/influxdata/telegraf/issues/2450): Network statistics not collected when system has alias interfaces
- [#1911](https://github.com/influxdata/telegraf/issues/1911): Sysstat plugin needs LANG=C or similar locale
- [#2528](https://github.com/influxdata/telegraf/issues/2528): File output closes standard streams on reload.
- [#2603](https://github.com/influxdata/telegraf/issues/2603): AMQP output disconnect blocks all outputs

## v1.2.1 [2017-02-01]

Expand Down
4 changes: 4 additions & 0 deletions plugins/outputs/amqp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ For an introduction to AMQP see:
## InfluxDB database
# database = "telegraf"
## Write timeout, formatted as a string. If not provided, will default
## to 5s. 0s means no timeout (not recommended).
# timeout = "5s"
## Optional SSL Config
# ssl_ca = "/etc/telegraf/ca.pem"
# ssl_cert = "/etc/telegraf/cert.pem"
Expand Down
72 changes: 57 additions & 15 deletions plugins/outputs/amqp/amqp.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package amqp
import (
"fmt"
"log"
"net"
"strings"
"sync"
"time"
Expand All @@ -15,6 +16,12 @@ import (
"github.com/streadway/amqp"
)

type client struct {
conn *amqp.Connection
channel *amqp.Channel
headers amqp.Table
}

type AMQP struct {
// AMQP brokers to send metrics to
URL string
Expand All @@ -30,6 +37,8 @@ type AMQP struct {
RetentionPolicy string
// InfluxDB precision (DEPRECATED)
Precision string
// Connection timeout
Timeout internal.Duration

// Path to CA file
SSLCA string `toml:"ssl_ca"`
Expand All @@ -40,10 +49,8 @@ type AMQP struct {
// Use SSL but skip chain & host verification
InsecureSkipVerify bool

conn *amqp.Connection
channel *amqp.Channel
sync.Mutex
headers amqp.Table
c *client

serializer serializers.Serializer
}
Expand Down Expand Up @@ -81,6 +88,10 @@ var sampleConfig = `
## InfluxDB database
# database = "telegraf"
## Write timeout, formatted as a string. If not provided, will default
## to 5s. 0s means no timeout (not recommended).
# timeout = "5s"
## Optional SSL Config
# ssl_ca = "/etc/telegraf/ca.pem"
# ssl_cert = "/etc/telegraf/cert.pem"
Expand All @@ -100,10 +111,7 @@ func (a *AMQP) SetSerializer(serializer serializers.Serializer) {
}

func (q *AMQP) Connect() error {
q.Lock()
defer q.Unlock()

q.headers = amqp.Table{
headers := amqp.Table{
"database": q.Database,
"retention_policy": q.RetentionPolicy,
}
Expand All @@ -126,13 +134,15 @@ func (q *AMQP) Connect() error {
amqpConf := amqp.Config{
TLSClientConfig: tls,
SASL: sasl, // if nil, it will be PLAIN
Dial: func(network, addr string) (net.Conn, error) {
return net.DialTimeout(network, addr, q.Timeout.Duration)
},
}

connection, err = amqp.DialConfig(q.URL, amqpConf)
if err != nil {
return err
}
q.conn = connection

channel, err := connection.Channel()
if err != nil {
Expand All @@ -151,25 +161,38 @@ func (q *AMQP) Connect() error {
if err != nil {
return fmt.Errorf("Failed to declare an exchange: %s", err)
}
q.channel = channel

q.setClient(&client{
conn: connection,
channel: channel,
headers: headers,
})

go func() {
err := <-connection.NotifyClose(make(chan *amqp.Error))
if err == nil {
return
}

q.setClient(nil)

log.Printf("I! Closing: %s", err)
log.Printf("I! Trying to reconnect")
for err := q.Connect(); err != nil; err = q.Connect() {
log.Println("E! ", err.Error())
time.Sleep(10 * time.Second)
}

}()
return nil
}

func (q *AMQP) Close() error {
err := q.conn.Close()
c := q.getClient()
if c == nil {
return nil
}

err := c.conn.Close()
if err != nil && err != amqp.ErrClosed {
log.Printf("E! Error closing AMQP connection: %s", err)
return err
Expand All @@ -186,11 +209,15 @@ func (q *AMQP) Description() string {
}

func (q *AMQP) Write(metrics []telegraf.Metric) error {
q.Lock()
defer q.Unlock()
if len(metrics) == 0 {
return nil
}

c := q.getClient()
if c == nil {
return fmt.Errorf("connection is not open")
}

outbuf := make(map[string][]byte)

for _, metric := range metrics {
Expand All @@ -210,13 +237,15 @@ func (q *AMQP) Write(metrics []telegraf.Metric) error {
}

for key, buf := range outbuf {
err := q.channel.Publish(
// Note that since the channel is not in confirm mode, the absence of
// an error does not indicate successful delivery.
err := c.channel.Publish(
q.Exchange, // exchange
key, // routing key
false, // mandatory
false, // immediate
amqp.Publishing{
Headers: q.headers,
Headers: c.headers,
ContentType: "text/plain",
Body: buf,
})
Expand All @@ -227,12 +256,25 @@ func (q *AMQP) Write(metrics []telegraf.Metric) error {
return nil
}

func (q *AMQP) getClient() *client {
q.Lock()
defer q.Unlock()
return q.c
}

func (q *AMQP) setClient(c *client) {
q.Lock()
q.c = c
q.Unlock()
}

func init() {
outputs.Add("amqp", func() telegraf.Output {
return &AMQP{
AuthMethod: DefaultAuthMethod,
Database: DefaultDatabase,
RetentionPolicy: DefaultRetentionPolicy,
Timeout: internal.Duration{Duration: time.Second * 5},
}
})
}

0 comments on commit 823323d

Please sign in to comment.