Skip to content

Commit

Permalink
Merge pull request #281 from Shopify/cbruckmayer/leader-elected
Browse files Browse the repository at this point in the history
Add logs around leader election
  • Loading branch information
ChrisBr authored Aug 28, 2024
2 parents fa5fc85 + 8773e71 commit e4804de
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 28 deletions.
6 changes: 6 additions & 0 deletions ruby/lib/ci/queue/redis/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ def max_test_failed?

attr_reader :redis, :redis_url

def measure
starting = Process.clock_gettime(Process::CLOCK_MONOTONIC)
yield
Process.clock_gettime(Process::CLOCK_MONOTONIC) - starting
end

def key(*args)
['build', build_id, *args].join(':')
end
Expand Down
6 changes: 0 additions & 6 deletions ruby/lib/ci/queue/redis/supervisor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,6 @@ def wait_for_workers

private

def measure
starting = Process.clock_gettime(Process::CLOCK_MONOTONIC)
yield
Process.clock_gettime(Process::CLOCK_MONOTONIC) - starting
end

def active_workers?
# if there are running jobs we assume there are still agents active
redis.zrangebyscore(key('running'), CI::Queue.time_now.to_f - config.timeout, "+inf", limit: [0,1]).count > 0
Expand Down
23 changes: 15 additions & 8 deletions ruby/lib/ci/queue/redis/worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -204,15 +204,22 @@ def push(tests)
@total = tests.size

if @master = redis.setnx(key('master-status'), 'setup')
redis.multi do |transaction|
transaction.lpush(key('queue'), tests) unless tests.empty?
transaction.set(key('total'), @total)
transaction.set(key('master-status'), 'ready')

transaction.expire(key('queue'), config.redis_ttl)
transaction.expire(key('total'), config.redis_ttl)
transaction.expire(key('master-status'), config.redis_ttl)
puts "Worker electected as leader, pushing #{@total} tests to the queue."
puts

duration = measure do
redis.multi do |transaction|
transaction.lpush(key('queue'), tests) unless tests.empty?
transaction.set(key('total'), @total)
transaction.set(key('master-status'), 'ready')

transaction.expire(key('queue'), config.redis_ttl)
transaction.expire(key('total'), config.redis_ttl)
transaction.expire(key('master-status'), config.redis_ttl)
end
end

puts "Finished pushing #{@total} tests to the queue in #{duration.round(2)}s."
end
register
redis.expire(key('workers'), config.redis_ttl)
Expand Down
5 changes: 3 additions & 2 deletions ruby/lib/minitest/queue/runner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def bisect_command

failing_order = queue.candidates
step("Final validation")
status = if run_tests_in_fork(failing_order)
if run_tests_in_fork(failing_order)
step(yellow("The bisection was inconclusive, there might not be any leaky test here."))
File.write('log/test_order.log', "")
exit! 1
Expand Down Expand Up @@ -314,7 +314,8 @@ def report_grind_command
private

attr_reader :queue_config, :options, :command, :argv
attr_accessor :queue, :queue_url, :grind_list, :grind_count, :load_paths, :verbose
attr_writer :queue_url
attr_accessor :queue, :grind_list, :grind_count, :load_paths, :verbose

def require_worker_id!
if queue.distributed?
Expand Down
2 changes: 1 addition & 1 deletion ruby/test/ci/queue/redis_supervisor_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def test_wait_for_workers_timeout
thread.wakeup
worker(1)
thread.join
assert_includes io, "Aborting, it seems all workers died.\n"
assert_includes io.join, "Aborting, it seems all workers died.\n"
end

def test_num_workers
Expand Down
9 changes: 1 addition & 8 deletions ruby/test/integration/minitest_redis_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,6 @@ def test_max_test_failed
assert_equal 'Ran 47 tests, 47 assertions, 3 failures, 0 errors, 0 skips, 44 requeues in X.XXs', output

# Run the reporter
exit_code = nil
out, err = capture_subprocess_io do
system(
@exe, 'report',
Expand Down Expand Up @@ -862,12 +861,6 @@ def test_redis_reporter
)
end

warning = <<~END
[WARNING] Atest#test_bar was picked up by another worker because it didn't complete in the allocated 2 seconds.
You may want to either optimize this test or bump ci-queue timeout.
It's also possible that the worker that was processing it was terminated without being able to report back.
END

warnings_file.rewind
content = JSON.parse(warnings_file.read)
assert_equal 1, content.size
Expand Down Expand Up @@ -953,7 +946,7 @@ def test_application_error

assert_equal 42, $?.exitstatus

out, err = capture_subprocess_io do
out, _ = capture_subprocess_io do
system(
@exe, 'report',
'--queue', @redis_url,
Expand Down
20 changes: 18 additions & 2 deletions ruby/test/integration/rspec_redis_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def test_redis_runner

assert_empty err
expected_output = strip_heredoc <<-EOS
Worker electected as leader, pushing 3 tests to the queue.
Finished pushing 3 tests to the queue in X.XXs.
Randomized with seed 123
..*.
Expand Down Expand Up @@ -88,6 +91,9 @@ def test_redis_runner_retry

assert_empty err
expected_output = strip_heredoc <<-EOS
Worker electected as leader, pushing 3 tests to the queue.
Finished pushing 3 tests to the queue in X.XXs.
Randomized with seed 123
..*.
Expand Down Expand Up @@ -267,6 +273,9 @@ def test_before_suite_errors

assert_empty err
expected_output = strip_heredoc <<-EOS
Worker electected as leader, pushing 2 tests to the queue.
Finished pushing 2 tests to the queue in X.XXs.
Randomized with seed 123
Expand Down Expand Up @@ -308,6 +317,9 @@ def test_report

assert_empty err
expected_output = strip_heredoc <<-EOS
Worker electected as leader, pushing 3 tests to the queue.
Finished pushing 3 tests to the queue in X.XXs.
Randomized with seed 123
..F
Expand Down Expand Up @@ -379,11 +391,12 @@ def test_world_wants_to_quit
end

assert_empty err

expected_output = strip_heredoc <<-EOS
Finished in X.XXXXX seconds (files took X.XXXXX seconds to load)
0 examples, 0 failures
Finished in X.XXXXX seconds (files took X.XXXXX seconds to load)
0 examples, 0 failures
EOS
assert_equal expected_output, normalize(out)
Expand All @@ -405,6 +418,9 @@ def test_world_wants_to_quit

assert_empty err
expected_output = strip_heredoc <<-EOS
Worker electected as leader, pushing 1 tests to the queue.
Finished pushing 1 tests to the queue in X.XXs.
Randomized with seed 123
F
Expand Down
2 changes: 1 addition & 1 deletion ruby/test/support/shared_queue_assertions.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module SharedQueueAssertions
include QueueHelper

def setup
@queue = populate(build_queue)
capture_io { @queue = populate(build_queue) }
end

def test_progess
Expand Down

0 comments on commit e4804de

Please sign in to comment.