Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: config options for fuzzy search #898

Merged
merged 21 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion db-service/lib/cqn4sql.js
Original file line number Diff line number Diff line change
Expand Up @@ -2203,7 +2203,7 @@ function cqn4sql(originalQuery, model) {
const searchFunc = {
func: 'search',
args: [
searchIn.length > 1 ? { list: searchIn } : { ...searchIn[0] },
{ list: searchIn },
xpr.length === 1 && 'val' in xpr[0] ? xpr[0] : { xpr },
],
}
Expand Down
57 changes: 55 additions & 2 deletions hana/lib/cql-functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,63 @@ const StandardFunctions = {
contains: (...args) => args.length > 2 ? `CONTAINS(${args})` : `(CASE WHEN coalesce(locate(${args}),0)>0 THEN TRUE ELSE FALSE END)`,
concat: (...args) => `(${args.map(a => (a.xpr ? `(${a})` : a)).join(' || ')})`,
search: function (ref, arg) {
if (cds.env.hana.fuzzy === false) {
// REVISIT: remove once the protocol adapter only creates vals
arg = arg.xpr ? arg.xpr : arg
if (Array.isArray(arg)) arg = [{ val: arg.filter(a => a.val).map(a => a.val).join(' ') }]
else arg = [arg]
const searchTerms = arg[0].val
.match(/("")|("(?:[^"]|\\")*(?:[^\\]|\\\\)")|(\S*)/g)
.filter(el => el.length).map(el => `%${el.replace(/^\"|\"$/g, '').toLowerCase()}%`)

const columns = ref.list
const xpr = []
for (const s of searchTerms) {
const nestedXpr = []
for (const c of columns) {
if (nestedXpr.length) nestedXpr.push('or')
nestedXpr.push({ func: 'lower', args: [c]}, 'like', {val: s})
}
if (xpr.length) xpr.push('and')
xpr.push({xpr: nestedXpr})
}

const { toString } = ref
return `(CASE WHEN (${toString({ xpr })}) THEN TRUE ELSE FALSE END)`
}

// fuzziness config
const fuzzyIndex = cds.env.hana?.fuzzy || 0.7

const csnElements = ref.list
// if column specific value is provided, the configuration has to be defined on column level
if (csnElements.some(e => e.element?.['@Search.ranking'] || e.element?.['@Search.fuzzinessThreshold'])) {
csnElements.forEach(e => {
let fuzzy = `FUZZY`

// weighted search
const rank = e.element?.['@Search.ranking']?.['=']
if(rank === 'HIGH') fuzzy += ' WEIGHT 0.8'
else if(rank === 'LOW') fuzzy += ' WEIGHT 0.3'
else fuzzy += ' WEIGHT 0.5' // MEDIUM

// fuzziness
fuzzy+= ` MINIMAL TOKEN SCORE ${e.element?.['@Search.fuzzinessThreshold'] || fuzzyIndex}`
fuzzy+= " SIMILARITY CALCULATION MODE 'search'"

// rewrite ref to xpr to mix in search config
// ensure in place modification to reuse .toString method that ensures quoting
e.xpr = [{ ref: e.ref }, fuzzy]
delete e.ref
})
} else {
ref = `${ref} FUZZY MINIMAL TOKEN SCORE ${fuzzyIndex} SIMILARITY CALCULATION MODE 'search'`
}

// REVISIT: remove once the protocol adapter only creates vals
if (Array.isArray(arg.xpr)) arg = { val: arg.xpr.filter(a => a.val).map(a => a.val).join(' ') }
// REVISIT: make this more configurable
return (`(CASE WHEN SCORE(${arg} IN ${ref} FUZZY MINIMAL TOKEN SCORE 0.7 SIMILARITY CALCULATION MODE 'search') > 0 THEN TRUE ELSE FALSE END)`)

return (`(CASE WHEN SCORE(${arg} IN ${ref}) > 0 THEN TRUE ELSE FALSE END)`)
},

// Date and Time Functions
Expand Down
6 changes: 5 additions & 1 deletion hana/test/fuzzy.cds
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
using {sap.capire.bookshop.Books as Books} from '../../test/bookshop/db/schema.cds';
using {sap.capire.bookshop.BooksAnnotated as BooksAnnotated} from '../../test/bookshop/db/schema.cds';

annotate BooksAnnotated with @cds.search: {title, descr, currency.code};
annotate BooksAnnotated:title with @(Search.ranking: HIGH, Search.fuzzinessThreshold: 0.9);
annotate BooksAnnotated:descr with @(Search.ranking: LOW, Search.fuzzinessThreshold: 0.6);
81 changes: 70 additions & 11 deletions hana/test/fuzzy.test.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,78 @@
const cds = require('../../test/cds')

describe('Fuzzy search', () => {
describe('search', () => {
const { expect } = cds.test(__dirname, 'fuzzy.cds')

test('select', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const res = await SELECT.from(Books).where({
func: 'contains',
args: [
{ list: [{ ref: ['title'] }, { ref: ['descr'] }] },
{ val: 'poem' },
{ func: 'FUZZY', args: [{ val: 0.8 }, { val: 'similarCalculationMode=searchCompare' }] }
]
beforeEach (() => {
delete cds.env.hana.fuzzy
})

describe('fuzzy', () => {
test('default', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio"').columns('1')
const {sql} = cqn.toSQL()
expect(sql).to.include('FUZZY MINIMAL TOKEN SCORE 0.7')
const res = await cqn
expect(res.length).to.be(2) // Eleonora and Jane Eyre
})

//HCE returns different result than HXE
test.skip('multiple search terms', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio" "jane"').columns('1')
const {sql, values} = cqn.toSQL()
expect(sql).to.include('FUZZY MINIMAL TOKEN SCORE 0.7')
expect(values[0]).to.eq('"autobio" "jane"') // taken as is
const res = await cqn
expect(res.length).to.be(2) // Eleonora and Jane Eyre
})

test('global config', async () => {
cds.env.hana.fuzzy = 1
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio"').columns('1')
const {sql} = cqn.toSQL()
expect(sql).to.include('FUZZY MINIMAL TOKEN SCORE 1')
const res = await cqn
expect(res.length).to.be(2) // Eleonora and Jane Eyre
})

expect(res).to.have.property('length').to.be.eq(1)
test('annotations', async () => {
const { BooksAnnotated } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(BooksAnnotated).search('"heights"').columns('1')
const {sql} = cqn.toSQL()
expect(sql).to.include('title FUZZY WEIGHT 0.8 MINIMAL TOKEN SCORE 0.9')
expect(sql).to.include('code FUZZY WEIGHT 0.5 MINIMAL TOKEN SCORE 0.7')
expect(sql).to.include('descr FUZZY WEIGHT 0.3 MINIMAL TOKEN SCORE 0.6')

const res = await SELECT.from(BooksAnnotated).search('"heights"')
expect(res[0].title).to.eq('Wuthering Heights')
})
})

describe('like', () => {
beforeEach (() => cds.env.hana.fuzzy = false)
test('fallback - 1 search term', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio"').columns('1')
const {sql} = cqn.toSQL()
// 5 columns to be searched createdBy, modifiedBy, title, descr, currency_code
expect(sql.match(/(like)/g).length).to.be(5)
const res = await cqn
expect(res.length).to.be(2) // Eleonora and Jane Eyre
})

test('fallback - 2 search terms', async () => {
const { Books } = cds.entities('sap.capire.bookshop')
const cqn = SELECT.from(Books).search('"autobio"', '"Jane"').columns('1')
const {sql, values} = cqn.toSQL()
// 5 columns to be searched createdBy, modifiedBy, title, descr, currency_code
expect(sql.match(/(like)/g).length).to.be(10)
expect(values).to.include('%autobio%')
expect(values).to.include('%jane%')
const res = await cqn
expect(res.length).to.be(1) // Jane Eyre
})
})
})
3 changes: 2 additions & 1 deletion test/bookshop/db/schema.cds
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,5 @@ entity C : managed {
B : Integer;
toB : Composition of many B
on toB.ID = $self.B;
}
};
entity BooksAnnotated as projection on Books;
Loading