diff --git a/lib/kubeclient/informer.rb b/lib/kubeclient/informer.rb index bfe523b1..47fa3ff2 100644 --- a/lib/kubeclient/informer.rb +++ b/lib/kubeclient/informer.rb @@ -8,6 +8,7 @@ def initialize(client, resource_name, reconcile_timeout: 15 * 60, logger: nil) @logger = logger @cache = nil @started = nil + @stopped = false @watching = [] end @@ -21,22 +22,34 @@ def watch(&block) # not implicit so users know they have to `stop` def start_worker + @stopped = false @worker = Thread.new do loop do - fill_cache - watch_to_update_cache - rescue StandardError => e - # need to keep retrying since we work in the background - @logger&.error("ignoring error during background work #{e}") - ensure - sleep(1) # do not overwhelm the api-server if we are somehow broken + begin + fill_cache + watch_to_update_cache + rescue StandardError => e + # need to keep retrying since we work in the background + @logger&.error("ignoring error during background work #{e}") + ensure + sleep(1) # do not overwhelm the api-server if we are somehow broken + end + break if @stopped end end - sleep(0.01) until @cache + sleep(0.01) until @cache || @stopped end def stop_worker - @worker&.kill # TODO: be nicer ? + @stopped = true + [@waiter, @worker].compact.each do |thread| + begin + thread.run # cancel sleep so either the loop sleep or the timeout sleep are interrupted + rescue ThreadError + # thread was already dead + end + thread.join + end end private @@ -69,7 +82,7 @@ def watch_to_update_cache stop_reason = 'disconnect' # stop watcher without using timeout - Thread.new do + @waiter = Thread.new do sleep(@reconcile_timeout) stop_reason = 'reconcile' @watcher.finish @@ -88,6 +101,13 @@ def watch_to_update_cache @watching.each { |q| q << notice } end @logger&.info("watch restarted: #{stop_reason}") + + # wake the waiter unless it's dead so it does not hang around + begin + @waiter.run + rescue ThreadError # rubocop:disable Lint/SuppressedException + end + @waiter.join end end end diff --git a/test/test_informer.rb b/test/test_informer.rb index bf0dbd99..fd9bd686 100644 --- a/test/test_informer.rb +++ b/test/test_informer.rb @@ -4,8 +4,7 @@ require 'stringio' require 'logger' -# tests with_retries in kubeclient.rb -class RetryTest < MiniTest::Test +class TestInformer < MiniTest::Test def setup super skip if RUBY_ENGINE == 'truffleruby' # TODO: race condition in truffle-ruby fails random tests @@ -87,14 +86,14 @@ def test_restarts_on_error status: 200 ) slept = [] - informer.stubs(:sleep).with { |x| slept << x; sleep(0.01) } + informer.stubs(:sleep).with { |x| slept << x; sleep(0.02) } with_worker do assert_equal(['a'], informer.list.map { |p| p.metadata.name }) - sleep(0.05) + sleep(0.16) # should give us 4+ restarts (each timeout is 1 sleep and 1 sleep before restart) end - assert slept.size >= 2, slept + assert slept.size >= 4, slept assert_requested(list, at_least_times: 2) assert_requested(watch, at_least_times: 2) end @@ -131,10 +130,14 @@ def test_can_watch_watches def test_timeout timeout = 0.1 informer.instance_variable_set(:@reconcile_timeout, timeout) - stub_list + list = stub_list Kubeclient::Common::WatchStream.any_instance.expects(:finish) - stub_request(:get, %r{/v1/watch/pods}) + watch = stub_request(:get, %r{/v1/watch/pods}) + with_worker { sleep(timeout * 1.9) } + + assert_requested(list) + assert_requested(watch) end private