diff --git a/master/tasks.go b/master/tasks.go index c3ea2d106..73347585c 100644 --- a/master/tasks.go +++ b/master/tasks.go @@ -305,6 +305,7 @@ func (t *FindItemNeighborsTask) run(j *task.JobsAllocator) error { t.taskMonitor.Add(TaskFindItemNeighbors, len(dataset.ItemLabels)) // inverse document frequency of labels for i := range labeledItems { + labeledItems[i] = lo.Uniq(labeledItems[i]) if dataset.ItemCount() == len(labeledItems[i]) { labelIDF[i] = 1 } else { @@ -646,6 +647,7 @@ func (t *FindUserNeighborsTask) run(j *task.JobsAllocator) error { t.taskMonitor.Add(TaskFindUserNeighbors, len(dataset.UserLabels)) // inverse document frequency of labels for i := range labeledUsers { + labeledUsers[i] = lo.Uniq(labeledUsers[i]) if dataset.UserCount() == len(labeledUsers[i]) { labelIDF[i] = 1 } else { diff --git a/master/tasks_test.go b/master/tasks_test.go index 04e8aee88..03f8d2b49 100644 --- a/master/tasks_test.go +++ b/master/tasks_test.go @@ -256,8 +256,8 @@ func (s *MasterTestSuite) TestFindItemNeighborsIVF_ZeroIDF() { // create dataset err := s.DataClient.BatchInsertItems(ctx, []data.Item{ - {"0", false, []string{"*"}, time.Now(), []string{"a"}, ""}, - {"1", false, []string{"*"}, time.Now(), []string{"a"}, ""}, + {"0", false, []string{"*"}, time.Now(), []string{"a", "a"}, ""}, + {"1", false, []string{"*"}, time.Now(), []string{"a", "a"}, ""}, }) s.NoError(err) err = s.DataClient.BatchInsertFeedback(ctx, []data.Feedback{ @@ -467,8 +467,8 @@ func (s *MasterTestSuite) TestFindUserNeighborsIVF_ZeroIDF() { // create dataset err := s.DataClient.BatchInsertUsers(ctx, []data.User{ - {"0", []string{"a"}, nil, ""}, - {"1", []string{"a"}, nil, ""}, + {"0", []string{"a", "a"}, nil, ""}, + {"1", []string{"a", "a"}, nil, ""}, }) s.NoError(err) err = s.DataClient.BatchInsertFeedback(ctx, []data.Feedback{