diff --git a/bin/check-marathon-task.rb b/bin/check-marathon-task.rb index d82eb40..4843423 100755 --- a/bin/check-marathon-task.rb +++ b/bin/check-marathon-task.rb @@ -67,34 +67,15 @@ def run h.request(req) end - tasks = JSON.parse(r.body)['tasks'] + ok_count, unhealthy = check_tasks r.body - tasks.select! do |t| - t['appId'] == "/#{config[:task]}" - end - - unhealthy = [] - - # Collect last error message for all health checks that are not alive - tasks.each do |task| - checks = task['healthCheckResults'] || [] - checks.each do |check| - if check['alive'] - next - end - message = check['lastFailureCause'] || - 'Health check not alive' - unhealthy << message - end - end - - message = "#{tasks.length}/#{config[:instances]} #{config[:task]} tasks running" + message = "#{ok_count}/#{config[:instances]} #{config[:task]} tasks running" if unhealthy.any? message << ":\n" << unhealthy.join("\n") end - if unhealthy.any? || tasks.length < config[:instances] + if unhealthy.any? || ok_count < config[:instances] critical message end @@ -108,4 +89,41 @@ def run unknown "marathon task state could not be retrieved:\n" << failures.join("\n") end + + # Parses JSON data as returned from Marathon's tasks API + # @param data [String] Server response + # @return [Numeric, [String]] Number of running tasks and a list of error + # messages from unhealthy tasks + def check_tasks(data) + begin + tasks = JSON.parse(data)['tasks'] + rescue JSON::ParserError + raise "Could not parse JSON response: #{data}" + end + + if tasks.nil? + raise "No tasks in server response: #{data}" + end + + tasks.select! do |t| + t['appId'] == "/#{config[:task]}" + end + + unhealthy = [] + + # Collect last error message for all health checks that are not alive + tasks.each do |task| + checks = task['healthCheckResults'] || [] + checks.each do |check| + if check['alive'] + next + end + message = check['lastFailureCause'] || + 'Health check not alive' + unhealthy << message + end + end + + [tasks.length, unhealthy] + end end diff --git a/test/check_marathon_task_spec.rb b/test/check_marathon_task_spec.rb new file mode 100644 index 0000000..67c9392 --- /dev/null +++ b/test/check_marathon_task_spec.rb @@ -0,0 +1,55 @@ +require_relative './spec_helper.rb' +require_relative '../bin/check-marathon-task.rb' +require_relative './fixtures.rb' + +# rubocop:disable Style/ClassVars +class MarathonTaskCheck + at_exit do + @@autorun = false + end + + def critical(*); end + + def warning(*); end + + def ok(*); end + + def unknown(*); end +end + +def check_results(parameters) + check = MarathonTaskCheck.new parameters.split(' ') + check.check_tasks marathon_response +end + +describe 'MarathonTaskCheck' do + before do + @default_parameters = '--server localhost --task foo/bar --instances 1' + @check = MarathonTaskCheck.new @default_parameters.split(' ') + end + + describe '#run' do + it 'tests that a single running task is ok' do + tasks_ok, unhealthy = check_results @default_parameters + expect(tasks_ok).to be 1 + expect(unhealthy).to be == [] + end + + it 'counts tasks correctly' do + tasks_running, unhealthy = check_results '--server s --task non/existing --instances 1' + expect(tasks_running).to be 0 + expect(unhealthy).to be == [] + end + + it 'does not count unhealthy tasks' do + tasks_running, unhealthy = check_results '--server s --task broken/app --instances 1' + expect(tasks_running).to be 2 + expect(unhealthy.count).to eq 2 + end + + it 'tests that an empty server response raises an error' do + expect { @check.check_tasks '{}' }.to raise_error(/No tasks/) + expect { @check.check_tasks '' }.to raise_error(/Could not parse JSON/) + end + end +end diff --git a/test/fixtures.rb b/test/fixtures.rb new file mode 100644 index 0000000..ba3cb13 --- /dev/null +++ b/test/fixtures.rb @@ -0,0 +1,116 @@ +def marathon_response + <<-EOF +{ + "tasks": [ + { + "healthCheckResults": [ + { + "taskId": "foo_bar.0", + "lastFailureCause": null, + "lastSuccess": "2016-07-14T15:27:48.286Z", + "lastFailure": null, + "firstSuccess": "2016-07-14T14:10:57.583Z", + "consecutiveFailures": 0, + "alive": true + } + ], + "servicePorts": [ + 1234 + ], + "appId": "/foo/bar", + "ipAddresses": [ + { + "protocol": "IPv4", + "ipAddress": "123.123.123.123" + } + ], + "id": "foo_bar.0", + "slaveId": "0-S0", + "host": "host.example.com", + "state": "TASK_RUNNING", + "startedAt": "2016-07-14T14:09:43.350Z", + "stagedAt": "2016-07-14T14:09:41.953Z", + "ports": [ + 123450 + ], + "version": "2016-07-14T14:09:41.270Z" + }, + { + "healthCheckResults": [ + { + "taskId": "broken_app.0", + "lastFailureCause": null, + "lastSuccess": null, + "lastFailure": null, + "firstSuccess": null, + "consecutiveFailures": 0, + "alive": false + }, + { + "taskId": "broken_app.1", + "lastFailureCause": null, + "lastSuccess": "2016-07-14T15:27:48.286Z", + "lastFailure": null, + "firstSuccess": "2016-07-14T14:10:57.583Z", + "consecutiveFailures": 0, + "alive": true + } + ], + "servicePorts": [ + 1234 + ], + "appId": "/broken/app", + "ipAddresses": [ + { + "protocol": "IPv4", + "ipAddress": "123.123.123.123" + } + ], + "id": "broken_app.0", + "slaveId": "0-S0", + "host": "host.example.com", + "state": "TASK_RUNNING", + "startedAt": "2016-07-14T14:09:43.350Z", + "stagedAt": "2016-07-14T14:09:41.953Z", + "ports": [ + 123450 + ], + "version": "2016-07-14T14:09:41.270Z" + }, + { + "healthCheckResults": [ + { + "taskId": "broken_app.1", + "lastFailureCause": "I broke", + "lastSuccess": null, + "lastFailure": "2016-07-14T14:09:41.270Z", + "firstSuccess": null, + "consecutiveFailures": 1, + "alive": false + } + ], + "servicePorts": [ + 1234 + ], + "appId": "/broken/app", + "ipAddresses": [ + { + "protocol": "IPv4", + "ipAddress": "123.123.123.123" + } + ], + "id": "broken_app.0", + "slaveId": "0-S0", + "host": "host.example.com", + "state": "TASK_RUNNING", + "startedAt": "2016-07-14T14:09:43.350Z", + "stagedAt": "2016-07-14T14:09:41.953Z", + "ports": [ + 123450 + ], + "version": "2016-07-14T14:09:41.270Z" + } + ] +} +EOF +end