Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add taskList throttling to allow users to limit activities executed per second #432

Merged
merged 28 commits into from
Jan 5, 2018
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
8d1f383
Add user-defined tasklist throttling
madhuravi Nov 21, 2017
4cf7326
Add tests with 0 throttling limit
madhuravi Nov 25, 2017
68be53c
Add concurrent publish/consume with varying dispatch throttling
madhuravi Nov 27, 2017
f1e96bf
Move lock before task list usage
madhuravi Nov 27, 2017
812e46b
Tighten tests
madhuravi Nov 27, 2017
6874f95
Remove redundant time.sleep
madhuravi Nov 27, 2017
b932509
Reduce worker count to reduce latency
madhuravi Nov 27, 2017
993bf87
Fix race condition in test
madhuravi Nov 27, 2017
2a62e42
Add back sleep in sync match tests
madhuravi Nov 27, 2017
b0f92fc
Wait for throttling token till long poll expiration
madhuravi Nov 28, 2017
9c84525
Switch to use golang rate limiter to accept float
madhuravi Nov 28, 2017
92a4e91
Fix lock, take into account context deadline
madhuravi Nov 28, 2017
8650e55
Add poll throttle latency metric
madhuravi Nov 29, 2017
0fa0e62
Move throttle to add APIs to limit poll correctly
madhuravi Dec 15, 2017
43c1a44
Use atomic.Value for rate limiter and fix tests
madhuravi Dec 20, 2017
b1ad7a8
Fix race condition
madhuravi Dec 21, 2017
40269a4
Fix concurrent activities test
madhuravi Dec 21, 2017
7c2cd17
Add RLock
madhuravi Dec 22, 2017
ca9b497
Wrap ratelimiter max dispatch update in lock
madhuravi Dec 22, 2017
3dba4b0
Move cancel context to task list to remove extra goroutine
madhuravi Dec 23, 2017
b63dbd6
Undo Makefile changes
madhuravi Dec 23, 2017
4c13d6a
Add separate tests for task list manager
madhuravi Dec 27, 2017
a9c88f8
Fixes from comments
madhuravi Jan 3, 2018
7494dcb
Avoid write lock in majority of requests
madhuravi Jan 3, 2018
f69f892
Handle throttle error and not propagate up to AddTask
madhuravi Jan 5, 2018
fe0e8cc
Only set dispatch limit on get task
madhuravi Jan 5, 2018
e4debb3
Remove ununsed metric
madhuravi Jan 5, 2018
310dbdc
Fix error lib
madhuravi Jan 5, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions common/metrics/defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,8 @@ const (
LeaseFailureCounter
ConditionFailedErrorCounter
RespondQueryTaskFailedCounter
SyncThrottleCounter
BufferThrottleCounter
)

// MetricDefs record the metrics for all services
Expand Down Expand Up @@ -679,6 +681,8 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
LeaseFailureCounter: {metricName: "lease.failures"},
ConditionFailedErrorCounter: {metricName: "condition-failed-errors"},
RespondQueryTaskFailedCounter: {metricName: "respond-query-failed"},
SyncThrottleCounter: {metricName: "sync.throttle.count"},
BufferThrottleCounter: {metricName: "buffer.throttle.count"},
},
}

Expand Down
42 changes: 34 additions & 8 deletions service/matching/matchingEngine.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ func (e *matchingEngineImpl) Stop() {
}

func (e *matchingEngineImpl) getTaskLists(maxCount int) (lists []taskListManager) {
e.taskListsLock.Lock()
e.taskListsLock.RLock()
defer e.taskListsLock.RUnlock()
lists = make([]taskListManager, 0, len(e.taskLists))
count := 0
for _, tlMgr := range e.taskLists {
Expand All @@ -139,7 +140,6 @@ func (e *matchingEngineImpl) getTaskLists(maxCount int) (lists []taskListManager
break
}
}
e.taskListsLock.Unlock()
return
}

Expand All @@ -153,20 +153,33 @@ func (e *matchingEngineImpl) String() string {
return r
}

// Returns taskListManager for a task list. If not already cached gets new range from DB and if successful creates one.
func (e *matchingEngineImpl) getTaskListManager(taskList *taskListID) (taskListManager, error) {
return e.getTaskListManagerWithRPS(taskList, nil)
}

// Returns taskListManager for a task list. If not already cached gets new range from DB and
// if successful creates one. The passed in throttling limit determines how many tasks are
// dispatcher per second.
func (e *matchingEngineImpl) getTaskListManagerWithRPS(
taskList *taskListID, maxDispatchPerSecond *float64,
) (taskListManager, error) {
// The first check is an optimization so almost all requests will have a task list manager
// and return avoiding the write lock
e.taskListsLock.RLock()
if result, ok := e.taskLists[*taskList]; ok {
e.taskListsLock.RUnlock()
result.UpdateMaxDispatch(maxDispatchPerSecond)
return result, nil
}
e.taskListsLock.RUnlock()
mgr := newTaskListManager(e, taskList, e.config)
// If it gets here, write lock and check again in case a task list is created between the two locks
e.taskListsLock.Lock()
if result, ok := e.taskLists[*taskList]; ok {
result.UpdateMaxDispatch(maxDispatchPerSecond)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if aren't gonna switch to atomics, consider moving this outside the lock so that, we don't block addition of new task lists, when another tasklist is blocked on UpdateMaxDispatch for multiple seconds if/when they roll out a change in poll limit

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm little worried we are switching to a write lock on this hot path. Can we figure out a way which does not require acquiring the write lock on the top level map for TaskListMgr.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is one way with
RLock
Check map,
if ok,
runlock, update max dispatch
if not ok,
RUnlock
WLock
Again check map
if still not ok, then lock and write

So we will have some duplicate code, we will be checking if it is in the map twice. That way, if it is in map originally, it will onyl write lock. Makes code uglier but avoids write lock in majority of the cases.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have that logic in GetTaskList. I'm not sure if this new method is needed. If you move the logic of setting the throttling limit to GetTask than we can simplify this code a lot.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed Update method but FYI most of this code is still needed for task list creation.

e.taskListsLock.Unlock()
return result, nil
}
mgr := newTaskListManager(e, taskList, e.config, maxDispatchPerSecond)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this after your RUnlock() on line 178

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we had some dup code here in checking the map. Cleaning up.

e.taskLists[*taskList] = mgr
e.taskListsLock.Unlock()
logging.LogTaskListLoadingEvent(e.logger, taskList.taskListName, taskList.taskType)
Expand All @@ -179,6 +192,13 @@ func (e *matchingEngineImpl) getTaskListManager(taskList *taskListID) (taskListM
return mgr, nil
}

// For use in tests
func (e *matchingEngineImpl) updateTaskList(taskList *taskListID, mgr taskListManager) {
e.taskListsLock.Lock()
defer e.taskListsLock.Unlock()
e.taskLists[*taskList] = mgr
}

func (e *matchingEngineImpl) removeTaskListManager(id *taskListID) {
e.taskListsLock.Lock()
defer e.taskListsLock.Unlock()
Expand Down Expand Up @@ -247,7 +267,7 @@ pollLoop:
// long-poll when frontend calls CancelOutstandingPoll API
pollerCtx := context.WithValue(ctx, pollerIDKey, pollerID)
taskList := newTaskListID(domainID, taskListName, persistence.TaskListTypeDecision)
tCtx, err := e.getTask(pollerCtx, taskList)
tCtx, err := e.getTask(pollerCtx, taskList, nil)
if err != nil {
// TODO: Is empty poll the best reply for errPumpClosed?
if err == ErrNoTasks || err == errPumpClosed {
Expand Down Expand Up @@ -341,10 +361,14 @@ pollLoop:
}

taskList := newTaskListID(domainID, taskListName, persistence.TaskListTypeActivity)
var maxDispatch *float64
if request.TaskListMetadata != nil {
maxDispatch = request.TaskListMetadata.MaxTasksPerSecond
}
// Add frontend generated pollerID to context so tasklistMgr can support cancellation of
// long-poll when frontend calls CancelOutstandingPoll API
pollerCtx := context.WithValue(ctx, pollerIDKey, pollerID)
tCtx, err := e.getTask(pollerCtx, taskList)
tCtx, err := e.getTask(pollerCtx, taskList, maxDispatch)
if err != nil {
// TODO: Is empty poll the best reply for errPumpClosed?
if err == ErrNoTasks || err == errPumpClosed {
Expand Down Expand Up @@ -450,8 +474,10 @@ func (e *matchingEngineImpl) CancelOutstandingPoll(ctx context.Context, request
}

// Loads a task from persistence and wraps it in a task context
func (e *matchingEngineImpl) getTask(ctx context.Context, taskList *taskListID) (*taskContext, error) {
tlMgr, err := e.getTaskListManager(taskList)
func (e *matchingEngineImpl) getTask(
ctx context.Context, taskList *taskListID, maxDispatchPerSecond *float64,
) (*taskContext, error) {
tlMgr, err := e.getTaskListManagerWithRPS(taskList, maxDispatchPerSecond)
if err != nil {
return nil, err
}
Expand Down
Loading