diff --git a/config/config.toml b/config/config.toml index d7deb9f9d..637865b7b 100644 --- a/config/config.toml +++ b/config/config.toml @@ -149,6 +149,20 @@ score = "count(feedback, .FeedbackType == 'star')" # The filter for items in the leaderboard. filter = "(now() - item.Timestamp).Hours() < 168" +# [[recommend.item-to-item]] + +# # The name of the item-to-item recommender. +# name = "similar_embedding" + +# # The type of the item-to-item recommender. There are three types: +# # embedding: recommend by Euclidean distance of embeddings. +# # tags: recommend by number of common tags. +# # users: recommend by number of common users. +# type = "embedding" + +# # The column of the item embeddings. Leave blank if type is "users". +# column = "item.Labels.embedding" + [recommend.user_neighbors] # The type of neighbors for users. There are three types: @@ -157,7 +171,7 @@ filter = "(now() - item.Timestamp).Hours() < 168" # auto: If a user have labels, neighbors are found by number of common labels. # If this user have no labels, neighbors are found by number of common liked items. # The default value is "auto". -neighbor_type = "similar" +neighbor_type = "related" [recommend.item_neighbors] diff --git a/logics/item_to_item.go b/logics/item_to_item.go index f015eddac..737b4509b 100644 --- a/logics/item_to_item.go +++ b/logics/item_to_item.go @@ -16,6 +16,9 @@ package logics import ( "errors" + "sort" + "time" + "github.com/chewxy/math32" mapset "github.com/deckarep/golang-set/v2" "github.com/expr-lang/expr" @@ -29,8 +32,6 @@ import ( "github.com/zhenghaoz/gorse/storage/cache" "github.com/zhenghaoz/gorse/storage/data" "go.uber.org/zap" - "sort" - "time" ) type ItemToItemOptions struct { @@ -304,7 +305,7 @@ func (idf IDF) distance(a, b []dataset.ID) float32 { if len(a) == len(b) && commonCount == float32(len(a)) { // If two items have the same tags, its distance is zero. return 0 - } else if commonCount > 0 { + } else if commonCount > 0 && len(a) > 0 && len(b) > 0 { // Add shrinkage to avoid division by zero return 1 - commonSum*commonCount/ math32.Sqrt(idf.weightedSum(a))/ @@ -320,9 +321,7 @@ func (idf IDF) weightedSumCommonElements(a, b []dataset.ID) (float32, float32) { i, j, sum, count := 0, 0, float32(0), float32(0) for i < len(a) && j < len(b) { if a[i] == b[j] { - if a[i] >= 0 && int(a[i]) < len(idf) { - sum += idf[a[i]] - } + sum += idf[a[i]] count++ i++ j++ @@ -338,9 +337,7 @@ func (idf IDF) weightedSumCommonElements(a, b []dataset.ID) (float32, float32) { func (idf IDF) weightedSum(a []dataset.ID) float32 { var sum float32 for _, i := range a { - if i >= 0 && int(i) < len(idf) { - sum += idf[i] - } + sum += idf[i] } return sum } diff --git a/master/tasks.go b/master/tasks.go index 0211645a8..fded47ced 100644 --- a/master/tasks.go +++ b/master/tasks.go @@ -990,14 +990,30 @@ func (m *Master) LoadDataFromDatabase( func (m *Master) updateItemToItem(dataset *dataset.Dataset) error { ctx, span := m.tracer.Start(context.Background(), "Generate item-to-item recommendation", - len(dataset.GetItems())*len(m.Config.Recommend.ItemToItem)*2) + len(dataset.GetItems())*(len(m.Config.Recommend.ItemToItem)+1)*2) defer span.End() + // Add built-in item-to-item recommenders + itemToItemConfigs := m.Config.Recommend.ItemToItem + builtInConfig := config.ItemToItemConfig{} + builtInConfig.Name = cache.Neighbors + switch m.Config.Recommend.ItemNeighbors.NeighborType { + case config.NeighborTypeSimilar: + builtInConfig.Type = "tags" + builtInConfig.Column = "item.Labels" + case config.NeighborTypeRelated: + builtInConfig.Type = "users" + case config.NeighborTypeAuto: + builtInConfig.Type = "auto" + } + itemToItemConfigs = append(itemToItemConfigs, builtInConfig) + // Build item-to-item recommenders - itemToItemRecommenders := make([]logics.ItemToItem, 0, len(m.Config.Recommend.ItemToItem)) - for _, cfg := range m.Config.Recommend.ItemToItem { + itemToItemRecommenders := make([]logics.ItemToItem, 0, len(itemToItemConfigs)) + for _, cfg := range itemToItemConfigs { recommender, err := logics.NewItemToItem(cfg, m.Config.Recommend.CacheSize, dataset.GetTimestamp(), &logics.ItemToItemOptions{ - TagsIDF: dataset.GetItemColumnValuesIDF(), + TagsIDF: dataset.GetItemColumnValuesIDF(), + UsersIDF: dataset.GetUserIDF(), }) if err != nil { return errors.Trace(err) @@ -1006,10 +1022,10 @@ func (m *Master) updateItemToItem(dataset *dataset.Dataset) error { } // Push items to item-to-item recommenders - for _, item := range dataset.GetItems() { + for i, item := range dataset.GetItems() { if !item.IsHidden { for _, recommender := range itemToItemRecommenders { - recommender.Push(&item, nil) + recommender.Push(&item, dataset.GetItemFeedback()[i]) span.Add(1) } } @@ -1018,8 +1034,8 @@ func (m *Master) updateItemToItem(dataset *dataset.Dataset) error { // Save item-to-item recommendations to cache for i, recommender := range itemToItemRecommenders { recommender.PopAll(func(itemId string, score []cache.Score) { - itemToItemConfig := m.Config.Recommend.ItemToItem[i] - if m.needUpdateItemToItem(itemId, m.Config.Recommend.ItemToItem[i]) { + itemToItemConfig := itemToItemConfigs[i] + if m.needUpdateItemToItem(itemId, itemToItemConfigs[i]) { log.Logger().Debug("update item-to-item recommendation", zap.String("item_id", itemId), zap.String("name", itemToItemConfig.Name), @@ -1097,27 +1113,30 @@ func (m *Master) updateUserToUser(dataset *dataset.Dataset) error { cfg.Type = "tags" cfg.Column = "user.Labels" case config.NeighborTypeRelated: - cfg.Type = "users" + cfg.Type = "items" case config.NeighborTypeAuto: cfg.Type = "auto" } userToUserRecommender, err := logics.NewUserToUser(cfg, m.Config.Recommend.CacheSize, dataset.GetTimestamp(), &logics.UserToUserOptions{ - TagsIDF: dataset.GetUserColumnValuesIDF(), + TagsIDF: dataset.GetUserColumnValuesIDF(), + ItemsIDF: dataset.GetItemIDF(), }) if err != nil { return errors.Trace(err) } // Push users to user-to-user recommender - for _, user := range dataset.GetUsers() { - userToUserRecommender.Push(&user, nil) + for i, user := range dataset.GetUsers() { + userToUserRecommender.Push(&user, dataset.GetUserFeedback()[i]) span.Add(1) } // Save user-to-user recommendations to cache userToUserRecommender.PopAll(func(userId string, score []cache.Score) { if m.needUpdateUserToUser(userId) { - log.Logger().Debug("update user neighbors", zap.String("user_id", userId)) + log.Logger().Debug("update user neighbors", + zap.String("user_id", userId), + zap.Int("n_recommendations", len(score))) // Save user-to-user recommendations to cache if err := m.CacheClient.AddScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, userId), score); err != nil { log.Logger().Error("failed to save user neighbors to cache", zap.String("user_id", userId), zap.Error(err)) @@ -1141,7 +1160,7 @@ func (m *Master) needUpdateUserToUser(userId string) bool { ctx := context.Background() // check cache - if items, err := m.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, userId), []string{""}, 0, -1); err != nil { + if items, err := m.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, userId), nil, 0, -1); err != nil { log.Logger().Error("failed to load user neighbors", zap.String("user_id", userId), zap.Error(err)) return true } else if len(items) == 0 { diff --git a/master/tasks_test.go b/master/tasks_test.go index 1ea3c49e6..a56724096 100644 --- a/master/tasks_test.go +++ b/master/tasks_test.go @@ -26,7 +26,7 @@ import ( "github.com/zhenghaoz/gorse/storage/data" ) -func (s *MasterTestSuite) TestFindItemNeighborsBruteForce() { +func (s *MasterTestSuite) TestFindItemNeighbors() { ctx := context.Background() // create config s.Config = &config.Config{} @@ -81,36 +81,34 @@ func (s *MasterTestSuite) TestFindItemNeighborsBruteForce() { } // load mock dataset - dataset, _, _, err := s.LoadDataFromDatabase(context.Background(), s.DataClient, []string{"FeedbackType"}, + dataset, _, dataSet, err := s.LoadDataFromDatabase(context.Background(), s.DataClient, []string{"FeedbackType"}, nil, 0, 0, NewOnlineEvaluator(), nil) s.NoError(err) s.rankingTrainSet = dataset // similar items (common users) s.Config.Recommend.ItemNeighbors.NeighborType = config.NeighborTypeRelated - neighborTask := NewFindItemNeighborsTask(&s.Master) - s.NoError(neighborTask.run(context.Background(), nil)) - similar, err := s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "9"), []string{""}, 0, 100) + s.NoError(s.updateItemToItem(dataSet)) + similar, err := s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "9"), nil, 0, 100) s.NoError(err) s.Equal([]string{"7", "5", "3"}, cache.ConvertDocumentsToValues(similar)) // similar items in category (common users) similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "9"), []string{"*"}, 0, 100) s.NoError(err) - s.Equal([]string{"7", "5", "1"}, cache.ConvertDocumentsToValues(similar)) + s.Equal([]string{"7", "5"}, cache.ConvertDocumentsToValues(similar)) // similar items (common labels) err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyItemTime, "8"), time.Now())) s.NoError(err) s.Config.Recommend.ItemNeighbors.NeighborType = config.NeighborTypeSimilar - neighborTask = NewFindItemNeighborsTask(&s.Master) - s.NoError(neighborTask.run(context.Background(), nil)) - similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "8"), []string{""}, 0, 100) + s.NoError(s.updateItemToItem(dataSet)) + similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "8"), nil, 0, 100) s.NoError(err) s.Equal([]string{"0", "2", "4"}, cache.ConvertDocumentsToValues(similar)) // similar items in category (common labels) similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "8"), []string{"*"}, 0, 100) s.NoError(err) - s.Equal([]string{"0", "2", "6"}, cache.ConvertDocumentsToValues(similar)) + s.Equal([]string{"0", "2"}, cache.ConvertDocumentsToValues(similar)) // similar items (auto) err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyItemTime, "8"), time.Now())) @@ -118,167 +116,16 @@ func (s *MasterTestSuite) TestFindItemNeighborsBruteForce() { err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyItemTime, "9"), time.Now())) s.NoError(err) s.Config.Recommend.ItemNeighbors.NeighborType = config.NeighborTypeAuto - neighborTask = NewFindItemNeighborsTask(&s.Master) - s.NoError(neighborTask.run(context.Background(), nil)) - similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "8"), []string{""}, 0, 100) + s.NoError(s.updateItemToItem(dataSet)) + similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "8"), nil, 0, 100) s.NoError(err) s.Equal([]string{"0", "2", "4"}, cache.ConvertDocumentsToValues(similar)) - similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "9"), []string{""}, 0, 100) + similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "9"), nil, 0, 100) s.NoError(err) s.Equal([]string{"7", "5", "3"}, cache.ConvertDocumentsToValues(similar)) } -// -//func (s *MasterTestSuite) TestFindItemNeighborsIVF() { -// // create mock master -// ctx := context.Background() -// // create config -// s.Config = &config.Config{} -// s.Config.Recommend.CacheSize = 3 -// s.Config.Master.NumJobs = 4 -// s.Config.Recommend.ItemNeighbors.EnableIndex = true -// s.Config.Recommend.ItemNeighbors.IndexRecall = 1 -// s.Config.Recommend.ItemNeighbors.IndexFitEpoch = 10 -// // collect similar -// items := []data.Item{ -// {ItemId: "0", IsHidden: false, Categories: []string{"*"}, Timestamp: time.Now(), Labels: []string{"a", "b", "c", "d"}, Comment: ""}, -// {ItemId: "1", IsHidden: false, Categories: []string{"*"}, Timestamp: time.Now(), Labels: []string{}, Comment: ""}, -// {ItemId: "2", IsHidden: false, Categories: []string{"*"}, Timestamp: time.Now(), Labels: []string{"b", "c", "d"}, Comment: ""}, -// {ItemId: "3", IsHidden: false, Categories: nil, Timestamp: time.Now(), Labels: []string{}, Comment: ""}, -// {ItemId: "4", IsHidden: false, Categories: nil, Timestamp: time.Now(), Labels: []string{"b", "c"}, Comment: ""}, -// {ItemId: "5", IsHidden: false, Categories: []string{"*"}, Timestamp: time.Now(), Labels: []string{}, Comment: ""}, -// {ItemId: "6", IsHidden: false, Categories: []string{"*"}, Timestamp: time.Now(), Labels: []string{"c"}, Comment: ""}, -// {ItemId: "7", IsHidden: false, Categories: []string{"*"}, Timestamp: time.Now(), Labels: []string{}, Comment: ""}, -// {ItemId: "8", IsHidden: false, Categories: []string{"*"}, Timestamp: time.Now(), Labels: []string{"a", "b", "c", "d", "e"}, Comment: ""}, -// {ItemId: "9", IsHidden: false, Categories: nil, Timestamp: time.Now(), Labels: []string{}, Comment: ""}, -// } -// feedbacks := make([]data.Feedback, 0) -// for i := 0; i < 10; i++ { -// for j := 0; j <= i; j++ { -// if i%2 == 1 { -// feedbacks = append(feedbacks, data.Feedback{ -// FeedbackKey: data.FeedbackKey{ -// ItemId: strconv.Itoa(i), -// UserId: strconv.Itoa(j), -// FeedbackType: "FeedbackType", -// }, -// Timestamp: time.Now(), -// }) -// } -// } -// } -// var err error -// err = s.DataClient.BatchInsertItems(ctx, items) -// s.NoError(err) -// err = s.DataClient.BatchInsertFeedback(ctx, feedbacks, true, true, true) -// s.NoError(err) -// -// // insert hidden item -// err = s.DataClient.BatchInsertItems(ctx, []data.Item{{ -// ItemId: "10", -// Labels: []string{"a", "b", "c", "d", "e"}, -// IsHidden: true, -// }}) -// s.NoError(err) -// for i := 0; i <= 10; i++ { -// err = s.DataClient.BatchInsertFeedback(ctx, []data.Feedback{{ -// FeedbackKey: data.FeedbackKey{UserId: strconv.Itoa(i), ItemId: "10", FeedbackType: "FeedbackType"}, -// }}, true, true, true) -// s.NoError(err) -// } -// -// // load mock dataset -// dataset, _, _, err := s.LoadDataFromDatabase(context.Background(), s.DataClient, []string{"FeedbackType"}, -// nil, 0, 0, NewOnlineEvaluator(), nil) -// s.NoError(err) -// s.rankingTrainSet = dataset -// -// // similar items (common users) -// s.Config.Recommend.ItemNeighbors.NeighborType = config.NeighborTypeRelated -// neighborTask := NewFindItemNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err := s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "9"), []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"7", "5", "3"}, cache.ConvertDocumentsToValues(similar)) -// // similar items in category (common users) -// similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "9"), []string{"*"}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"7", "5", "1"}, cache.ConvertDocumentsToValues(similar)) -// -// // similar items (common labels) -// err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyItemTime, "8"), time.Now())) -// s.NoError(err) -// s.Config.Recommend.ItemNeighbors.NeighborType = config.NeighborTypeSimilar -// neighborTask = NewFindItemNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "8"), []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"0", "2", "4"}, cache.ConvertDocumentsToValues(similar)) -// // similar items in category (common labels) -// similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "8"), []string{"*"}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"0", "2", "6"}, cache.ConvertDocumentsToValues(similar)) -// -// // similar items (auto) -// err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyItemTime, "8"), time.Now())) -// s.NoError(err) -// err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyItemTime, "9"), time.Now())) -// s.NoError(err) -// s.Config.Recommend.ItemNeighbors.NeighborType = config.NeighborTypeAuto -// neighborTask = NewFindItemNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "8"), []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"0", "2", "4"}, cache.ConvertDocumentsToValues(similar)) -// similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "9"), []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"7", "5", "3"}, cache.ConvertDocumentsToValues(similar)) -//} - -//func (s *MasterTestSuite) TestFindItemNeighborsIVF_ZeroIDF() { -// ctx := context.Background() -// // create config -// s.Config = &config.Config{} -// s.Config.Recommend.CacheSize = 3 -// s.Config.Master.NumJobs = 4 -// s.Config.Recommend.ItemNeighbors.EnableIndex = true -// s.Config.Recommend.ItemNeighbors.IndexRecall = 1 -// s.Config.Recommend.ItemNeighbors.IndexFitEpoch = 10 -// -// // create dataset -// err := s.DataClient.BatchInsertItems(ctx, []data.Item{ -// {ItemId: "0", IsHidden: false, Categories: []string{"*"}, Timestamp: time.Now(), Labels: []string{"a", "a"}, Comment: ""}, -// {ItemId: "1", IsHidden: false, Categories: []string{"*"}, Timestamp: time.Now(), Labels: []string{"a", "a"}, Comment: ""}, -// }) -// s.NoError(err) -// err = s.DataClient.BatchInsertFeedback(ctx, []data.Feedback{ -// {FeedbackKey: data.FeedbackKey{FeedbackType: "FeedbackType", UserId: "0", ItemId: "0"}}, -// {FeedbackKey: data.FeedbackKey{FeedbackType: "FeedbackType", UserId: "0", ItemId: "1"}}, -// }, true, true, true) -// s.NoError(err) -// dataset, _, _, err := s.LoadDataFromDatabase(context.Background(), s.DataClient, []string{"FeedbackType"}, -// nil, 0, 0, NewOnlineEvaluator(), nil) -// s.NoError(err) -// s.rankingTrainSet = dataset -// -// // similar items (common users) -// s.Config.Recommend.ItemNeighbors.NeighborType = config.NeighborTypeRelated -// neighborTask := NewFindItemNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err := s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "0"), []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"1"}, cache.ConvertDocumentsToValues(similar)) -// -// // similar items (common labels) -// s.Config.Recommend.ItemNeighbors.NeighborType = config.NeighborTypeSimilar -// neighborTask = NewFindItemNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err = s.CacheClient.SearchScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "0"), []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"1"}, cache.ConvertDocumentsToValues(similar)) -//} - -func (s *MasterTestSuite) TestFindUserNeighborsBruteForce() { +func (s *MasterTestSuite) TestFindUserNeighbors() { ctx := context.Background() // create config s.Config = &config.Config{} @@ -317,16 +164,15 @@ func (s *MasterTestSuite) TestFindUserNeighborsBruteForce() { s.NoError(err) err = s.DataClient.BatchInsertFeedback(ctx, feedbacks, true, true, true) s.NoError(err) - dataset, _, _, err := s.LoadDataFromDatabase(context.Background(), s.DataClient, []string{"FeedbackType"}, + dataset, _, dataSet, err := s.LoadDataFromDatabase(context.Background(), s.DataClient, []string{"FeedbackType"}, nil, 0, 0, NewOnlineEvaluator(), nil) s.NoError(err) s.rankingTrainSet = dataset // similar items (common users) s.Config.Recommend.UserNeighbors.NeighborType = config.NeighborTypeRelated - neighborTask := NewFindUserNeighborsTask(&s.Master) - s.NoError(neighborTask.run(context.Background(), nil)) - similar, err := s.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, "9"), []string{""}, 0, 100) + s.NoError(s.updateUserToUser(dataSet)) + similar, err := s.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, "9"), nil, 0, 100) s.NoError(err) s.Equal([]string{"7", "5", "3"}, cache.ConvertDocumentsToValues(similar)) @@ -334,9 +180,8 @@ func (s *MasterTestSuite) TestFindUserNeighborsBruteForce() { err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyUserTime, "8"), time.Now())) s.NoError(err) s.Config.Recommend.UserNeighbors.NeighborType = config.NeighborTypeSimilar - neighborTask = NewFindUserNeighborsTask(&s.Master) - s.NoError(neighborTask.run(context.Background(), nil)) - similar, err = s.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, "8"), []string{""}, 0, 100) + s.NoError(s.updateUserToUser(dataSet)) + similar, err = s.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, "8"), nil, 0, 100) s.NoError(err) s.Equal([]string{"0", "2", "4"}, cache.ConvertDocumentsToValues(similar)) @@ -346,141 +191,15 @@ func (s *MasterTestSuite) TestFindUserNeighborsBruteForce() { err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyUserTime, "9"), time.Now())) s.NoError(err) s.Config.Recommend.UserNeighbors.NeighborType = config.NeighborTypeAuto - neighborTask = NewFindUserNeighborsTask(&s.Master) - s.NoError(neighborTask.run(context.Background(), nil)) - similar, err = s.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, "8"), []string{""}, 0, 100) + s.NoError(s.updateUserToUser(dataSet)) + similar, err = s.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, "8"), nil, 0, 100) s.NoError(err) s.Equal([]string{"0", "2", "4"}, cache.ConvertDocumentsToValues(similar)) - similar, err = s.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, "9"), []string{""}, 0, 100) + similar, err = s.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, "9"), nil, 0, 100) s.NoError(err) s.Equal([]string{"7", "5", "3"}, cache.ConvertDocumentsToValues(similar)) } -// -//func (s *MasterTestSuite) TestFindUserNeighborsIVF() { -// ctx := context.Background() -// // create config -// s.Config = &config.Config{} -// s.Config.Recommend.CacheSize = 3 -// s.Config.Master.NumJobs = 4 -// s.Config.Recommend.UserNeighbors.EnableIndex = true -// s.Config.Recommend.UserNeighbors.IndexRecall = 1 -// s.Config.Recommend.UserNeighbors.IndexFitEpoch = 10 -// // collect similar -// users := []data.User{ -// {UserId: "0", Labels: []string{"a", "b", "c", "d"}, Subscribe: nil, Comment: ""}, -// {UserId: "1", Labels: []string{}, Subscribe: nil, Comment: ""}, -// {UserId: "2", Labels: []string{"b", "c", "d"}, Subscribe: nil, Comment: ""}, -// {UserId: "3", Labels: []string{}, Subscribe: nil, Comment: ""}, -// {UserId: "4", Labels: []string{"b", "c"}, Subscribe: nil, Comment: ""}, -// {UserId: "5", Labels: []string{}, Subscribe: nil, Comment: ""}, -// {UserId: "6", Labels: []string{"c"}, Subscribe: nil, Comment: ""}, -// {UserId: "7", Labels: []string{}, Subscribe: nil, Comment: ""}, -// {UserId: "8", Labels: []string{"a", "b", "c", "d", "e"}, Subscribe: nil, Comment: ""}, -// {UserId: "9", Labels: []string{}, Subscribe: nil, Comment: ""}, -// } -// feedbacks := make([]data.Feedback, 0) -// for i := 0; i < 10; i++ { -// for j := 0; j <= i; j++ { -// if i%2 == 1 { -// feedbacks = append(feedbacks, data.Feedback{ -// FeedbackKey: data.FeedbackKey{ -// ItemId: strconv.Itoa(j), -// UserId: strconv.Itoa(i), -// FeedbackType: "FeedbackType", -// }, -// Timestamp: time.Now(), -// }) -// } -// } -// } -// var err error -// err = s.DataClient.BatchInsertUsers(ctx, users) -// s.NoError(err) -// err = s.DataClient.BatchInsertFeedback(ctx, feedbacks, true, true, true) -// s.NoError(err) -// dataset, _, _, err := s.LoadDataFromDatabase(context.Background(), s.DataClient, []string{"FeedbackType"}, -// nil, 0, 0, NewOnlineEvaluator(), nil) -// s.NoError(err) -// s.rankingTrainSet = dataset -// -// // similar items (common users) -// s.Config.Recommend.UserNeighbors.NeighborType = config.NeighborTypeRelated -// neighborTask := NewFindUserNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err := s.CacheClient.SearchScores(ctx, cache.UserNeighbors, "9", []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"7", "5", "3"}, cache.ConvertDocumentsToValues(similar)) -// -// // similar items (common labels) -// err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyUserTime, "8"), time.Now())) -// s.NoError(err) -// s.Config.Recommend.UserNeighbors.NeighborType = config.NeighborTypeSimilar -// neighborTask = NewFindUserNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err = s.CacheClient.SearchScores(ctx, cache.UserNeighbors, "8", []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"0", "2", "4"}, cache.ConvertDocumentsToValues(similar)) -// -// // similar items (auto) -// err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyUserTime, "8"), time.Now())) -// s.NoError(err) -// err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyUserTime, "9"), time.Now())) -// s.NoError(err) -// s.Config.Recommend.UserNeighbors.NeighborType = config.NeighborTypeAuto -// neighborTask = NewFindUserNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err = s.CacheClient.SearchScores(ctx, cache.UserNeighbors, "8", []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"0", "2", "4"}, cache.ConvertDocumentsToValues(similar)) -// similar, err = s.CacheClient.SearchScores(ctx, cache.UserNeighbors, "9", []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"7", "5", "3"}, cache.ConvertDocumentsToValues(similar)) -//} -// -//func (s *MasterTestSuite) TestFindUserNeighborsIVF_ZeroIDF() { -// ctx := context.Background() -// // create config -// s.Config = &config.Config{} -// s.Config.Recommend.CacheSize = 3 -// s.Config.Master.NumJobs = 4 -// s.Config.Recommend.UserNeighbors.EnableIndex = true -// s.Config.Recommend.UserNeighbors.IndexRecall = 1 -// s.Config.Recommend.UserNeighbors.IndexFitEpoch = 10 -// -// // create dataset -// err := s.DataClient.BatchInsertUsers(ctx, []data.User{ -// {UserId: "0", Labels: []string{"a", "a"}, Subscribe: nil, Comment: ""}, -// {UserId: "1", Labels: []string{"a", "a"}, Subscribe: nil, Comment: ""}, -// }) -// s.NoError(err) -// err = s.DataClient.BatchInsertFeedback(ctx, []data.Feedback{ -// {FeedbackKey: data.FeedbackKey{FeedbackType: "FeedbackType", UserId: "0", ItemId: "0"}}, -// {FeedbackKey: data.FeedbackKey{FeedbackType: "FeedbackType", UserId: "1", ItemId: "0"}}, -// }, true, true, true) -// s.NoError(err) -// dataset, _, _, err := s.LoadDataFromDatabase(context.Background(), s.DataClient, []string{"FeedbackType"}, -// nil, 0, 0, NewOnlineEvaluator(), nil) -// s.NoError(err) -// s.rankingTrainSet = dataset -// -// // similar users (common items) -// s.Config.Recommend.UserNeighbors.NeighborType = config.NeighborTypeRelated -// neighborTask := NewFindUserNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err := s.CacheClient.SearchScores(ctx, cache.UserNeighbors, "0", []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"1"}, cache.ConvertDocumentsToValues(similar)) -// -// // similar users (common labels) -// s.Config.Recommend.UserNeighbors.NeighborType = config.NeighborTypeSimilar -// neighborTask = NewFindUserNeighborsTask(&s.Master) -// s.NoError(neighborTask.run(context.Background(), nil)) -// similar, err = s.CacheClient.SearchScores(ctx, cache.UserNeighbors, "0", []string{""}, 0, 100) -// s.NoError(err) -// s.Equal([]string{"1"}, cache.ConvertDocumentsToValues(similar)) -//} - func (s *MasterTestSuite) TestLoadDataFromDatabase() { ctx := context.Background() // create config @@ -701,12 +420,13 @@ func (s *MasterTestSuite) TestNonPersonalizedRecommend() { })) } -func (s *MasterTestSuite) TestCheckItemNeighborCacheTimeout() { +func (s *MasterTestSuite) TestNeedUpdateItemToItem() { s.Config = config.GetDefaultConfig() + recommendConfig := config.ItemToItemConfig{Name: cache.Neighbors} ctx := context.Background() // empty cache - s.True(s.checkItemNeighborCacheTimeout("1", nil)) + s.True(s.needUpdateItemToItem("1", recommendConfig)) err := s.CacheClient.AddScores(ctx, cache.ItemToItem, cache.Key(cache.Neighbors, "1"), []cache.Score{ {Id: "2", Score: 1, Categories: []string{""}}, {Id: "3", Score: 2, Categories: []string{""}}, @@ -715,33 +435,30 @@ func (s *MasterTestSuite) TestCheckItemNeighborCacheTimeout() { s.NoError(err) // digest mismatch - err = s.CacheClient.Set(ctx, cache.String(cache.Key(cache.ItemToItemDigest, cache.Key(cache.Neighbors, "1")), "digest")) + err = s.CacheClient.Set(ctx, cache.String(cache.Key(cache.ItemToItemDigest, cache.Neighbors, "1"), "digest")) s.NoError(err) - s.True(s.checkItemNeighborCacheTimeout("1", nil)) + s.True(s.needUpdateItemToItem("1", recommendConfig)) // staled cache - err = s.CacheClient.Set(ctx, cache.String(cache.Key(cache.ItemToItemDigest, cache.Key(cache.Neighbors, "1")), s.Config.ItemNeighborDigest())) + err = s.CacheClient.Set(ctx, cache.String(cache.Key(cache.ItemToItemDigest, cache.Neighbors, "1"), recommendConfig.Hash())) s.NoError(err) - s.True(s.checkItemNeighborCacheTimeout("1", nil)) - err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyItemTime, "1"), time.Now().Add(-time.Minute))) + s.True(s.needUpdateItemToItem("1", recommendConfig)) + err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.ItemToItemUpdateTime, cache.Neighbors, "1"), time.Now().Add(-s.Config.Recommend.CacheExpire))) s.NoError(err) - s.True(s.checkItemNeighborCacheTimeout("1", nil)) - err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.ItemToItemUpdateTime, cache.Key(cache.Neighbors, "1")), time.Now().Add(-time.Hour))) - s.NoError(err) - s.True(s.checkItemNeighborCacheTimeout("1", nil)) + s.True(s.needUpdateItemToItem("1", recommendConfig)) // not staled cache - err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.ItemToItemUpdateTime, cache.Key(cache.Neighbors, "1")), time.Now())) + err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.ItemToItemUpdateTime, cache.Neighbors, "1"), time.Now())) s.NoError(err) - s.False(s.checkItemNeighborCacheTimeout("1", nil)) + s.False(s.needUpdateItemToItem("1", recommendConfig)) } -func (s *MasterTestSuite) TestCheckUserNeighborCacheTimeout() { +func (s *MasterTestSuite) TestNeedUpdateUserToUser() { ctx := context.Background() s.Config = config.GetDefaultConfig() // empty cache - s.True(s.checkUserNeighborCacheTimeout("1")) + s.True(s.needUpdateUserToUser("1")) err := s.CacheClient.AddScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, "1"), []cache.Score{ {Id: "1", Score: 1, Categories: []string{""}}, {Id: "2", Score: 2, Categories: []string{""}}, @@ -750,23 +467,20 @@ func (s *MasterTestSuite) TestCheckUserNeighborCacheTimeout() { s.NoError(err) // digest mismatch - err = s.CacheClient.Set(ctx, cache.String(cache.Key(cache.UserToUserDigest, cache.Key(cache.Neighbors, "1")), "digest")) + err = s.CacheClient.Set(ctx, cache.String(cache.Key(cache.UserToUserDigest, cache.Neighbors, "1"), "digest")) s.NoError(err) - s.True(s.checkUserNeighborCacheTimeout("1")) + s.True(s.needUpdateUserToUser("1")) // staled cache - err = s.CacheClient.Set(ctx, cache.String(cache.Key(cache.UserToUserDigest, cache.Key(cache.Neighbors, "1")), s.Config.UserNeighborDigest())) - s.NoError(err) - s.True(s.checkUserNeighborCacheTimeout("1")) - err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.LastModifyUserTime, "1"), time.Now().Add(-time.Minute))) + err = s.CacheClient.Set(ctx, cache.String(cache.Key(cache.UserToUserDigest, cache.Neighbors, "1"), s.Config.UserNeighborDigest())) s.NoError(err) - s.True(s.checkUserNeighborCacheTimeout("1")) - err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.UserToUserUpdateTime, cache.Key(cache.Neighbors, "1")), time.Now().Add(-time.Hour))) + s.True(s.needUpdateUserToUser("1")) + err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.UserToUserUpdateTime, cache.Neighbors, "1"), time.Now().Add(-s.Config.Recommend.CacheExpire))) s.NoError(err) - s.True(s.checkUserNeighborCacheTimeout("1")) + s.True(s.needUpdateUserToUser("1")) // not staled cache - err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.UserToUserUpdateTime, cache.Key(cache.Neighbors, "1")), time.Now())) + err = s.CacheClient.Set(ctx, cache.Time(cache.Key(cache.UserToUserUpdateTime, cache.Neighbors, "1"), time.Now())) s.NoError(err) - s.False(s.checkUserNeighborCacheTimeout("1")) + s.False(s.needUpdateUserToUser("1")) }