Skip to content

Commit

Permalink
Make user lookup resilient and consistent (#3529)
Browse files Browse the repository at this point in the history
* Make user lookup resilient and consistent

* Address PR feedback
  • Loading branch information
iaftab-alam authored Dec 13, 2023
1 parent d9eab13 commit 3afc75e
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 40 deletions.
4 changes: 4 additions & 0 deletions app/controllers/v3/roles_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def index
message: message,
decorators: decorators
)
rescue UaaUnavailable
raise CloudController::Errors::ApiError.new_from_details('UaaUnavailable')
end

def show
Expand All @@ -43,6 +45,8 @@ def show
resource_not_found!(:role) unless role

render status: :ok, json: Presenters::V3::RolePresenter.new(role, decorators:)
rescue UaaUnavailable
raise CloudController::Errors::ApiError.new_from_details('UaaUnavailable')
end

def create
Expand Down
2 changes: 2 additions & 0 deletions app/controllers/v3/users_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ def show
user_not_found! unless user

render status: :ok, json: Presenters::V3::UserPresenter.new(user, uaa_users: User.uaa_users_info([user.guid]))
rescue VCAP::CloudController::UaaUnavailable
raise CloudController::Errors::ApiError.new_from_details('UaaUnavailable')
end

def create
Expand Down
56 changes: 39 additions & 17 deletions lib/cloud_controller/uaa/uaa_client.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ def token_info
end

def users_for_ids(user_ids)
fetch_users(user_ids)
rescue UaaUnavailable, CF::UAA::UAAError => e
logger.error("Failed to retrieve users from UAA: #{e.inspect}")
{}
with_request_error_handling { fetch_users(user_ids) }
end

def usernames_for_ids(user_ids)
Expand All @@ -64,22 +61,21 @@ def id_for_username(username, origin: nil)
end

def ids_for_usernames_and_origins(usernames, origins, precise_username_match=true)
operator = precise_username_match ? 'eq' : 'co'
username_filter_string = usernames&.map { |u| "username #{operator} \"#{u}\"" }&.join(' or ')
origin_filter_string = origins&.map { |o| "origin eq \"#{o}\"" }&.join(' or ')
with_request_error_handling do
operator = precise_username_match ? 'eq' : 'co'
username_filter_string = usernames&.map { |u| "username #{operator} \"#{u}\"" }&.join(' or ')
origin_filter_string = origins&.map { |o| "origin eq \"#{o}\"" }&.join(' or ')

filter_string = construct_filter_string(username_filter_string, origin_filter_string)
filter_string = construct_filter_string(username_filter_string, origin_filter_string)

results = if precise_username_match
query(:user_id, includeInactive: true, filter: filter_string)
else
query(:user, filter: filter_string, attributes: 'id')
end
results = if precise_username_match
query(:user_id, includeInactive: true, filter: filter_string)
else
query(:user, filter: filter_string, attributes: 'id')
end

results['resources'].pluck('id')
rescue CF::UAA::UAAError => e
logger.error("Failed to retrieve user ids from UAA: #{e.inspect}")
raise UaaUnavailable
results['resources'].pluck('id')
end
end

def construct_filter_string(username_filter_string, origin_filter_string)
Expand All @@ -104,6 +100,32 @@ def info
CF::UAA::Info.new(uaa_target, uaa_connection_opts)
end

def with_request_error_handling
delay = 0.25
max_delay = 5
retry_until = Time.now.utc + 60 # retry for 1 minute from now
factor = 2

begin
yield
rescue CF::UAA::InvalidToken => e
logger.error("UAA request for token failed: #{e.inspect}")
raise
rescue UaaUnavailable, CF::UAA::UAAError => e
if Time.now.utc > retry_until
logger.error('Unable to establish a connection to UAA, no more retries, raising an exception.')
raise UaaUnavailable
else
sleep_time = [delay, max_delay].min
logger.error("Failed to retrieve details from UAA: #{e.inspect}")
logger.info("Attempting to connect to the UAA. Total #{(retry_until - Time.now.utc).round(2)} seconds remaining. Next retry after #{sleep_time} seconds.")
sleep(sleep_time)
delay *= factor
retry
end
end
end

private

def query(type, **)
Expand Down
130 changes: 107 additions & 23 deletions spec/unit/lib/uaa/uaa_client_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -315,10 +315,11 @@ module VCAP::CloudController
context 'when UAA is unavailable' do
before do
allow(uaa_client).to receive(:token_info).and_raise(UaaUnavailable)
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
end

it 'returns an empty hash' do
expect(uaa_client.users_for_ids([userid_1])).to eq({})
it 'raises an exception' do
expect { uaa_client.users_for_ids([userid_1]) }.to raise_error(UaaUnavailable)
end
end

Expand Down Expand Up @@ -378,21 +379,23 @@ module VCAP::CloudController

context 'when the endpoint returns an error' do
let(:uaa_error) { CF::UAA::UAAError.new('some error') }
let(:mock_logger) { double(:steno_logger, error: nil) }
let(:mock_logger) { double(:steno_logger, error: nil, info: nil) }

before do
scim = instance_double(CF::UAA::Scim)
allow(scim).to receive(:query).and_raise(uaa_error)
allow(uaa_client).to receive_messages(scim: scim, logger: mock_logger)
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
end

it 'returns an empty hash' do
expect(uaa_client.users_for_ids([userid_1])).to eq({})
it 'raises an exception' do
expect { uaa_client.users_for_ids([userid_1]) }.to raise_error(UaaUnavailable)
end

it 'logs the error' do
uaa_client.users_for_ids([userid_1])
expect(mock_logger).to have_received(:error).with("Failed to retrieve users from UAA: #{uaa_error.inspect}")
it 'retries, raises an exception after 17 attempts' do
expect { uaa_client.users_for_ids([userid_1]) }.to raise_error(UaaUnavailable)
expect(uaa_client).to have_received(:scim).exactly(17).times
expect(uaa_client).to have_received(:sleep).exactly(16).times
end
end

Expand Down Expand Up @@ -439,8 +442,9 @@ module VCAP::CloudController
context 'when token is invalid or expired twice' do
let(:auth_header) { 'bearer invalid' }

it 'retries once and then returns no usernames' do
expect(uaa_client.users_for_ids([userid_1, userid_2])).to eq({})
it 'fails immediately without retries' do
expect { uaa_client.users_for_ids([userid_1, userid_2]) }.to raise_error(CF::UAA::InvalidToken)
expect(uaa_client).not_to receive(:sleep)
end
end
end
Expand Down Expand Up @@ -548,6 +552,7 @@ module VCAP::CloudController
let(:username1) { '[email protected]' }
let(:username2) { '[email protected]' }
let(:partial_username) { 'user' }
let(:mock_logger) { double(:steno_logger, error: nil, info: nil) }

context 'with usernames but no origins' do
it 'returns the ids for the usernames' do
Expand Down Expand Up @@ -675,13 +680,14 @@ module VCAP::CloudController

context 'when UAA is unavailable' do
before do
allow(uaa_client).to receive(:token_info).and_raise(UaaUnavailable)
allow(uaa_client).to receive(:query).and_raise(UaaUnavailable)
allow(uaa_client).to receive(:sleep) { |n| Timecop.travel(n) }
end

it 'raises UaaUnavailable' do
expect do
uaa_client.ids_for_usernames_and_origins([username1], nil)
end.to raise_error(UaaUnavailable)
it 'retries, raises an exception after 17 attempts' do
expect { uaa_client.ids_for_usernames_and_origins([username1], nil) }.to raise_error(UaaUnavailable)
expect(uaa_client).to have_received(:query).exactly(17).times
expect(uaa_client).to have_received(:sleep).exactly(16).times
end
end

Expand All @@ -690,12 +696,13 @@ module VCAP::CloudController
scim = double('scim')
allow(scim).to receive(:query).and_raise(CF::UAA::TargetError)
allow(uaa_client).to receive(:scim).and_return(scim)
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
end

it 'raises UaaUnavailable' do
expect do
uaa_client.ids_for_usernames_and_origins([username1], nil)
end.to raise_error(UaaUnavailable)
it 'retries, raises an exception after 17 attempts' do
expect { uaa_client.ids_for_usernames_and_origins([username1], nil) }.to raise_error(UaaUnavailable)
expect(uaa_client).to have_received(:scim).exactly(17).times
expect(uaa_client).to have_received(:sleep).exactly(16).times
end
end

Expand All @@ -704,12 +711,13 @@ module VCAP::CloudController
scim = double('scim')
allow(scim).to receive(:query).and_raise(CF::UAA::BadTarget)
allow(uaa_client).to receive(:scim).and_return(scim)
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
end

it 'raises UaaUnavailable' do
expect do
uaa_client.ids_for_usernames_and_origins([username1], nil)
end.to raise_error(UaaUnavailable)
it 'retries, raises an exception after 17 attempts' do
expect { uaa_client.ids_for_usernames_and_origins([username1], nil) }.to raise_error(UaaUnavailable)
expect(uaa_client).to have_received(:scim).exactly(17).times
expect(uaa_client).to have_received(:sleep).exactly(16).times
end
end
end
Expand Down Expand Up @@ -814,5 +822,81 @@ module VCAP::CloudController
end
end
end

describe '#with_request_error_handling' do
before do
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
end

context 'when the block succeeds immediately' do
it 'does not sleep or raise an exception' do
expect { uaa_client.with_request_error_handling {} }.not_to raise_error
end
end

context 'when the block raises an exception' do
let(:successful_block) do
proc {
@count ||= 0
@count += 1
@count == 2 ? true : raise(UaaUnavailable)
}
end

it 'retries once and eventually succeeds' do
expect { subject.with_request_error_handling(&successful_block) }.not_to raise_error
end

it 'fails immediately if invalidToken exception has been thrown' do
expect { subject.with_request_error_handling { raise CF::UAA::InvalidToken } }.to raise_error(CF::UAA::InvalidToken)
expect(uaa_client).not_to receive(:sleep)
end

it 'retries and eventually raises an error when the block fails' do
attempts = 0

expect do
subject.with_request_error_handling do
attempts += 1
Timecop.travel(Time.now.utc + 50)
raise UaaUnavailable
end
end.to raise_error(UaaUnavailable)
expect(uaa_client).to have_received(:sleep).exactly(1).times
expect(attempts).to eq(2)
end

it 'stops retrying after 60 seconds and raises an exception' do
start_time = Time.now.utc

Timecop.freeze do
expect do
subject.with_request_error_handling do
Timecop.travel(start_time + 61)
raise UaaUnavailable
end
end.to raise_error(UaaUnavailable)
end
expect(uaa_client).not_to receive(:sleep)
end

it 'raises an error after 17 attempts in approximately 1 minute when each yield call immediately' do
attempts = 0
start_time = Time.now.utc

expect do
subject.with_request_error_handling do
attempts += 1
raise UaaUnavailable
end
end.to raise_error(UaaUnavailable)
end_time = Time.now.utc
duration = end_time.to_f - start_time.to_f
expect(attempts).to be_within(1).of(17)
expect(duration).to be_within(1).of(62)
expect(uaa_client).to have_received(:sleep).exactly(16).times
end
end
end
end
end

0 comments on commit 3afc75e

Please sign in to comment.