From d05125e84bffb587825308dae80d176a70ac2b0e Mon Sep 17 00:00:00 2001 From: Ayrat Badykov Date: Thu, 19 Nov 2020 19:19:23 +0300 Subject: [PATCH 1/3] retry datadog errors --- .../perf/apps/load_test/lib/service/datadog/api.ex | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/priv/perf/apps/load_test/lib/service/datadog/api.ex b/priv/perf/apps/load_test/lib/service/datadog/api.ex index 5ccc0341f4..26109bc005 100644 --- a/priv/perf/apps/load_test/lib/service/datadog/api.ex +++ b/priv/perf/apps/load_test/lib/service/datadog/api.ex @@ -36,7 +36,7 @@ defmodule LoadTest.Service.Datadog.API do assert_metrics(environment, start_unix, end_unix) end - def do_assert_metrics(environment, start_unix, end_unix, poll_count \\ 200) do + def do_assert_metrics(environment, start_unix, end_unix, poll_count \\ 60) do case fetch_events(start_unix, end_unix, environment) do {:ok, []} -> case poll_count do @@ -60,7 +60,7 @@ defmodule LoadTest.Service.Datadog.API do end end - def fetch_events(start_time, end_time, environment) do + def fetch_events(start_time, end_time, environment, retries \\ 5) do params = %{ start: start_time, end: end_time, @@ -80,10 +80,16 @@ defmodule LoadTest.Service.Datadog.API do {:ok, events} {:ok, %{body: body}} -> - {:error, body} + case retries do + 0 -> {:error, body} + _ -> fetch_events(start_time, end_time, environment, retries - 1) + end {:error, error} -> - {:error, error} + case retries do + 0 -> {:error, error} + _ -> fetch_events(start_time, end_time, environment, retries - 1) + end end end From 78a3558481d75b584fb6a93061f0dfc0401c6ffe Mon Sep 17 00:00:00 2001 From: Ayrat Badykov Date: Thu, 19 Nov 2020 20:00:48 +0300 Subject: [PATCH 2/3] use header function to fail --- .../apps/load_test/lib/service/datadog/api.ex | 64 +++++++++---------- 1 file changed, 31 insertions(+), 33 deletions(-) diff --git a/priv/perf/apps/load_test/lib/service/datadog/api.ex b/priv/perf/apps/load_test/lib/service/datadog/api.ex index 26109bc005..e18ca421e1 100644 --- a/priv/perf/apps/load_test/lib/service/datadog/api.ex +++ b/priv/perf/apps/load_test/lib/service/datadog/api.ex @@ -36,17 +36,15 @@ defmodule LoadTest.Service.Datadog.API do assert_metrics(environment, start_unix, end_unix) end - def do_assert_metrics(environment, start_unix, end_unix, poll_count \\ 60) do + defp do_assert_metrics(environment, start_unix, end_unix, poll_count \\ 60) + + defp do_assert_metrics(_environment, _start_unix, _end_unix, 0), do: :ok + + defp do_assert_metrics(environment, start_unix, end_unix, poll_count) do case fetch_events(start_unix, end_unix, environment) do {:ok, []} -> - case poll_count do - 0 -> - :ok - - _ -> - Process.sleep(1_000) - do_assert_metrics(environment, start_unix, end_unix, poll_count - 1) - end + Process.sleep(1_000) + do_assert_metrics(environment, start_unix, end_unix, poll_count - 1) {:ok, events} -> # failed monitors with the same tags do not emit events, so we're resolving @@ -60,7 +58,15 @@ defmodule LoadTest.Service.Datadog.API do end end - def fetch_events(start_time, end_time, environment, retries \\ 5) do + defp fetch_events(start_time, end_time, environment, retries \\ 5) + + defp fetch_events(start_time, end_time, environment, 0) do + Logger.error("failed to fetch events #{inspect({start_time, end_time, environment})}") + + {:error, :failed_to_fetch_event} + end + + defp fetch_events(start_time, end_time, environment, retries) do params = %{ start: start_time, end: end_time, @@ -80,16 +86,12 @@ defmodule LoadTest.Service.Datadog.API do {:ok, events} {:ok, %{body: body}} -> - case retries do - 0 -> {:error, body} - _ -> fetch_events(start_time, end_time, environment, retries - 1) - end + Logger.error("failed to fetch events #{inspect(body)}") + fetch_events(start_time, end_time, environment, retries - 1) {:error, error} -> - case retries do - 0 -> {:error, error} - _ -> fetch_events(start_time, end_time, environment, retries - 1) - end + Logger.error("failed to fetch events #{inspect(error)}") + fetch_events(start_time, end_time, environment, retries - 1) end end @@ -125,6 +127,12 @@ defmodule LoadTest.Service.Datadog.API do defp do_resolve_monitors([], _), do: :ok + defp do_resolve_monitors(params, 0) do + Logger.error("failed to resolve monitors #{params}") + + {:error, :failed_to_resolve_monitors} + end + defp do_resolve_monitors(params, retries) do payload = Jason.encode!(%{"resolve" => params}) @@ -137,26 +145,16 @@ defmodule LoadTest.Service.Datadog.API do {:ok, %{body: body}} -> Logger.error("failed to resolve monitors #{inspect(body)}") - case retries do - 0 -> - {:error, body} + Process.sleep(1_000) - _ -> - Process.sleep(1_000) - do_resolve_monitors(params, retries - 1) - end + do_resolve_monitors(params, retries - 1) - {:error, error} = other -> + {:error, error} -> Logger.error("failed to resolve monitors #{inspect(error)}") - case retries do - 0 -> - other + Process.sleep(1_000) - _ -> - Process.sleep(1_000) - do_resolve_monitors(params, retries - 1) - end + do_resolve_monitors(params, retries - 1) end end From 684b6d543edfd78e32e0b9256e764eb9112bfb20 Mon Sep 17 00:00:00 2001 From: Ayrat Badykov Date: Fri, 20 Nov 2020 09:44:22 +0300 Subject: [PATCH 3/3] add `retrying` to log messages --- priv/perf/apps/load_test/lib/service/datadog/api.ex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/priv/perf/apps/load_test/lib/service/datadog/api.ex b/priv/perf/apps/load_test/lib/service/datadog/api.ex index e18ca421e1..7407de5864 100644 --- a/priv/perf/apps/load_test/lib/service/datadog/api.ex +++ b/priv/perf/apps/load_test/lib/service/datadog/api.ex @@ -86,11 +86,11 @@ defmodule LoadTest.Service.Datadog.API do {:ok, events} {:ok, %{body: body}} -> - Logger.error("failed to fetch events #{inspect(body)}") + Logger.warn("failed to fetch events #{inspect(body)}. retrying") fetch_events(start_time, end_time, environment, retries - 1) {:error, error} -> - Logger.error("failed to fetch events #{inspect(error)}") + Logger.warn("failed to fetch events #{inspect(error)}. retrying") fetch_events(start_time, end_time, environment, retries - 1) end end @@ -143,14 +143,14 @@ defmodule LoadTest.Service.Datadog.API do :ok {:ok, %{body: body}} -> - Logger.error("failed to resolve monitors #{inspect(body)}") + Logger.warn("failed to resolve monitors #{inspect(body)}. retrying") Process.sleep(1_000) do_resolve_monitors(params, retries - 1) {:error, error} -> - Logger.error("failed to resolve monitors #{inspect(error)}") + Logger.warn("failed to resolve monitors #{inspect(error)}. retrying") Process.sleep(1_000)