Skip to content

Commit

Permalink
Fix test
Browse files Browse the repository at this point in the history
  • Loading branch information
zhenghaoz committed Jan 25, 2025
1 parent 10e3f35 commit 0ae900d
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 351 deletions.
16 changes: 15 additions & 1 deletion config/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,20 @@ score = "count(feedback, .FeedbackType == 'star')"
# The filter for items in the leaderboard.
filter = "(now() - item.Timestamp).Hours() < 168"

# [[recommend.item-to-item]]

# # The name of the item-to-item recommender.
# name = "similar_embedding"

# # The type of the item-to-item recommender. There are three types:
# # embedding: recommend by Euclidean distance of embeddings.
# # tags: recommend by number of common tags.
# # users: recommend by number of common users.
# type = "embedding"

# # The column of the item embeddings. Leave blank if type is "users".
# column = "item.Labels.embedding"

[recommend.user_neighbors]

# The type of neighbors for users. There are three types:
Expand All @@ -157,7 +171,7 @@ filter = "(now() - item.Timestamp).Hours() < 168"
# auto: If a user have labels, neighbors are found by number of common labels.
# If this user have no labels, neighbors are found by number of common liked items.
# The default value is "auto".
neighbor_type = "similar"
neighbor_type = "related"

[recommend.item_neighbors]

Expand Down
15 changes: 6 additions & 9 deletions logics/item_to_item.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ package logics

import (
"errors"
"sort"
"time"

"github.com/chewxy/math32"
mapset "github.com/deckarep/golang-set/v2"
"github.com/expr-lang/expr"
Expand All @@ -29,8 +32,6 @@ import (
"github.com/zhenghaoz/gorse/storage/cache"
"github.com/zhenghaoz/gorse/storage/data"
"go.uber.org/zap"
"sort"
"time"
)

type ItemToItemOptions struct {
Expand Down Expand Up @@ -304,7 +305,7 @@ func (idf IDF) distance(a, b []dataset.ID) float32 {
if len(a) == len(b) && commonCount == float32(len(a)) {
// If two items have the same tags, its distance is zero.
return 0
} else if commonCount > 0 {
} else if commonCount > 0 && len(a) > 0 && len(b) > 0 {
// Add shrinkage to avoid division by zero
return 1 - commonSum*commonCount/
math32.Sqrt(idf.weightedSum(a))/
Expand All @@ -320,9 +321,7 @@ func (idf IDF) weightedSumCommonElements(a, b []dataset.ID) (float32, float32) {
i, j, sum, count := 0, 0, float32(0), float32(0)
for i < len(a) && j < len(b) {
if a[i] == b[j] {
if a[i] >= 0 && int(a[i]) < len(idf) {
sum += idf[a[i]]
}
sum += idf[a[i]]
count++
i++
j++
Expand All @@ -338,9 +337,7 @@ func (idf IDF) weightedSumCommonElements(a, b []dataset.ID) (float32, float32) {
func (idf IDF) weightedSum(a []dataset.ID) float32 {
var sum float32
for _, i := range a {
if i >= 0 && int(i) < len(idf) {
sum += idf[i]
}
sum += idf[i]
}
return sum
}
Expand Down
47 changes: 33 additions & 14 deletions master/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -990,14 +990,30 @@ func (m *Master) LoadDataFromDatabase(

func (m *Master) updateItemToItem(dataset *dataset.Dataset) error {
ctx, span := m.tracer.Start(context.Background(), "Generate item-to-item recommendation",
len(dataset.GetItems())*len(m.Config.Recommend.ItemToItem)*2)
len(dataset.GetItems())*(len(m.Config.Recommend.ItemToItem)+1)*2)
defer span.End()

// Add built-in item-to-item recommenders
itemToItemConfigs := m.Config.Recommend.ItemToItem
builtInConfig := config.ItemToItemConfig{}
builtInConfig.Name = cache.Neighbors
switch m.Config.Recommend.ItemNeighbors.NeighborType {
case config.NeighborTypeSimilar:
builtInConfig.Type = "tags"
builtInConfig.Column = "item.Labels"
case config.NeighborTypeRelated:
builtInConfig.Type = "users"
case config.NeighborTypeAuto:
builtInConfig.Type = "auto"
}
itemToItemConfigs = append(itemToItemConfigs, builtInConfig)

// Build item-to-item recommenders
itemToItemRecommenders := make([]logics.ItemToItem, 0, len(m.Config.Recommend.ItemToItem))
for _, cfg := range m.Config.Recommend.ItemToItem {
itemToItemRecommenders := make([]logics.ItemToItem, 0, len(itemToItemConfigs))
for _, cfg := range itemToItemConfigs {
recommender, err := logics.NewItemToItem(cfg, m.Config.Recommend.CacheSize, dataset.GetTimestamp(), &logics.ItemToItemOptions{
TagsIDF: dataset.GetItemColumnValuesIDF(),
TagsIDF: dataset.GetItemColumnValuesIDF(),
UsersIDF: dataset.GetUserIDF(),
})
if err != nil {
return errors.Trace(err)
Expand All @@ -1006,10 +1022,10 @@ func (m *Master) updateItemToItem(dataset *dataset.Dataset) error {
}

// Push items to item-to-item recommenders
for _, item := range dataset.GetItems() {
for i, item := range dataset.GetItems() {
if !item.IsHidden {
for _, recommender := range itemToItemRecommenders {
recommender.Push(&item, nil)
recommender.Push(&item, dataset.GetItemFeedback()[i])
span.Add(1)
}
}
Expand All @@ -1018,8 +1034,8 @@ func (m *Master) updateItemToItem(dataset *dataset.Dataset) error {
// Save item-to-item recommendations to cache
for i, recommender := range itemToItemRecommenders {
recommender.PopAll(func(itemId string, score []cache.Score) {
itemToItemConfig := m.Config.Recommend.ItemToItem[i]
if m.needUpdateItemToItem(itemId, m.Config.Recommend.ItemToItem[i]) {
itemToItemConfig := itemToItemConfigs[i]
if m.needUpdateItemToItem(itemId, itemToItemConfigs[i]) {
log.Logger().Debug("update item-to-item recommendation",
zap.String("item_id", itemId),
zap.String("name", itemToItemConfig.Name),
Expand Down Expand Up @@ -1097,27 +1113,30 @@ func (m *Master) updateUserToUser(dataset *dataset.Dataset) error {
cfg.Type = "tags"
cfg.Column = "user.Labels"
case config.NeighborTypeRelated:
cfg.Type = "users"
cfg.Type = "items"
case config.NeighborTypeAuto:
cfg.Type = "auto"
}
userToUserRecommender, err := logics.NewUserToUser(cfg, m.Config.Recommend.CacheSize, dataset.GetTimestamp(), &logics.UserToUserOptions{
TagsIDF: dataset.GetUserColumnValuesIDF(),
TagsIDF: dataset.GetUserColumnValuesIDF(),
ItemsIDF: dataset.GetItemIDF(),
})
if err != nil {
return errors.Trace(err)
}

// Push users to user-to-user recommender
for _, user := range dataset.GetUsers() {
userToUserRecommender.Push(&user, nil)
for i, user := range dataset.GetUsers() {
userToUserRecommender.Push(&user, dataset.GetUserFeedback()[i])
span.Add(1)
}

// Save user-to-user recommendations to cache
userToUserRecommender.PopAll(func(userId string, score []cache.Score) {
if m.needUpdateUserToUser(userId) {
log.Logger().Debug("update user neighbors", zap.String("user_id", userId))
log.Logger().Debug("update user neighbors",
zap.String("user_id", userId),
zap.Int("n_recommendations", len(score)))
// Save user-to-user recommendations to cache
if err := m.CacheClient.AddScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, userId), score); err != nil {
log.Logger().Error("failed to save user neighbors to cache", zap.String("user_id", userId), zap.Error(err))
Expand All @@ -1141,7 +1160,7 @@ func (m *Master) needUpdateUserToUser(userId string) bool {
ctx := context.Background()

// check cache
if items, err := m.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, userId), []string{""}, 0, -1); err != nil {
if items, err := m.CacheClient.SearchScores(ctx, cache.UserToUser, cache.Key(cache.Neighbors, userId), nil, 0, -1); err != nil {
log.Logger().Error("failed to load user neighbors", zap.String("user_id", userId), zap.Error(err))
return true
} else if len(items) == 0 {
Expand Down
Loading

0 comments on commit 0ae900d

Please sign in to comment.