Skip to content

Commit

Permalink
[Azure Search] Boost freshness (#521)
Browse files Browse the repository at this point in the history
Add a scoring profile that gives reasonable relevancy by favoring packages with high download counts that have been updated recently. I tested this by indexing PROD and then trying queries from our OneNote by hand.

Addresses #6904
  • Loading branch information
loic-sharma authored May 29, 2019
1 parent 72057e5 commit 2151f87
Show file tree
Hide file tree
Showing 9 changed files with 66 additions and 37 deletions.
4 changes: 2 additions & 2 deletions src/NuGet.Services.AzureSearch/IndexBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ private Index InitializeIndex<TDocument>(string name, bool addScoringProfile)

if (addScoringProfile)
{
index.ScoringProfiles = new List<ScoringProfile> { DownloadCountBoosterProfile.Instance };
index.DefaultScoringProfile = DownloadCountBoosterProfile.Name;
index.ScoringProfiles = new List<ScoringProfile> { DefaultScoringProfile.Instance };
index.DefaultScoringProfile = DefaultScoringProfile.Name;
}

return index;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public abstract class BaseMetadataDocument : CommittedDocument, IBaseMetadataDoc
public string ProjectUrl { get; set; }

[IsSortable]
[IsFilterable]
public DateTimeOffset? Published { get; set; }

public string ReleaseNotes { get; set; }
Expand Down
3 changes: 3 additions & 0 deletions src/NuGet.Services.AzureSearch/Models/SearchDocument.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ public class Full : UpdateLatest
{
[IsFilterable]
public long? TotalDownloadCount { get; set; }

[IsFilterable]
public double? LogBase2DownloadCount { get; set; }
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
<Compile Include="PackageIdToOwnersBuilder.cs" />
<Compile Include="IOwnerDataClient.cs" />
<Compile Include="OwnerDataClient.cs" />
<Compile Include="ScoringProfiles\DownloadCountBoosterProfile.cs" />
<Compile Include="ScoringProfiles\DefaultScoringProfile.cs" />
<Compile Include="Owners2AzureSearch\Owners2AzureSearchCommand.cs" />
<Compile Include="SearchService\AzureSearchQueryBuilder.cs" />
<Compile Include="BlobContainerBuilder.cs" />
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Generic;
using Microsoft.Azure.Search.Models;
using NuGet.Services.AzureSearch.SearchService;

namespace NuGet.Services.AzureSearch.ScoringProfiles
{
public static class DefaultScoringProfile
{
public const string Name = "nuget_scoring_profile";

public static readonly ScoringProfile Instance = new ScoringProfile(
Name,
textWeights: new TextWeights
{
Weights = new Dictionary<string, double>
{
// Exact match of the package id should be boosted the highest,
// followed by a tokenized match on the package id.
{ IndexFields.PackageId, 10 },
{ IndexFields.TokenizedPackageId, 5 },
}
},
functions: new List<ScoringFunction>()
{
// Greatly boost results with high download counts. We score off the log of the download count
// with linear interpolation so that the boost slows down at higher download counts. We cannot
// use the raw download count with a log interpolation as that would result in a large boosting
// range, which would need to be offset by an unmanageably high boosting factor.
new MagnitudeScoringFunction(
fieldName: IndexFields.Search.LogBase2DownloadCount,
boost: 100.0,
parameters: new MagnitudeScoringParameters(
boostingRangeStart: 0,
boostingRangeEnd: Math.Log(999_999_999_999, 2),
shouldBoostBeyondRangeByConstant: true),
interpolation: ScoringFunctionInterpolation.Linear),

// Boost results with a recent published date. We use a quadatric interpolation
// so that the boost decreases faster as the publish date nears the end of the boost range.
new FreshnessScoringFunction(
fieldName: IndexFields.Published,
boost: 5.0,
boostingDuration: TimeSpan.FromDays(365),
interpolation: ScoringFunctionInterpolation.Quadratic),
},

// The scores of each Scoring Function should be summed together before multiplying the base relevance scores.
// See: https://stackoverflow.com/questions/41427940/how-do-scoring-profiles-generate-scores-in-azure-search
functionAggregation: ScoringFunctionAggregation.Sum);
}
}

This file was deleted.

1 change: 1 addition & 0 deletions src/NuGet.Services.AzureSearch/SearchDocumentBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ public SearchDocument.Full FullFromDb(
owners: owners);
DocumentUtilities.PopulateMetadata(document, packageId, package);
document.TotalDownloadCount = totalDownloadCount;
document.LogBase2DownloadCount = Math.Log(totalDownloadCount, 2.0);

return document;
}
Expand Down
2 changes: 2 additions & 0 deletions src/NuGet.Services.AzureSearch/SearchService/IndexFields.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ private static string Name(string input)
}

public static readonly string Authors = Name(nameof(BaseMetadataDocument.Authors));
public static readonly string Created = Name(nameof(BaseMetadataDocument.Created));
public static readonly string Description = Name(nameof(BaseMetadataDocument.Description));
public static readonly string LastEdited = Name(nameof(BaseMetadataDocument.LastEdited));
public static readonly string NormalizedVersion = Name(nameof(BaseMetadataDocument.NormalizedVersion));
Expand All @@ -32,6 +33,7 @@ public static class Search
public static readonly string Owners = Name(nameof(SearchDocument.Full.Owners));
public static readonly string SearchFilters = Name(nameof(SearchDocument.UpdateLatest.SearchFilters));
public static readonly string TotalDownloadCount = Name(nameof(SearchDocument.Full.TotalDownloadCount));
public static readonly string LogBase2DownloadCount = Name(nameof(SearchDocument.Full.LogBase2DownloadCount));
public static readonly string Versions = Name(nameof(SearchDocument.UpdateLatest.Versions));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ public async Task SetsExpectedProperties(SearchFilters searchFilters, string exp
{
""@search.action"": ""upload"",
""totalDownloadCount"": 1001,
""logBase2DownloadCount"": 9.9672262588359928,
""owners"": [
""Microsoft"",
""azure-sdk""
Expand Down

0 comments on commit 2151f87

Please sign in to comment.