Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Add support of multilingual search #40

Merged
merged 5 commits into from
Sep 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ GIT

GIT
remote: https://github.com/ontoportal-lirmm/ontologies_linked_data.git
revision: e4b3a6d9bf575c1420924d4dbe1490248040aff7
branch: feature/add-multi-provider-authentification
revision: 4c89c8346766d23e09b24c8e29750bf3a91e6b53
branch: development
specs:
ontologies_linked_data (0.0.1)
activesupport
Expand Down Expand Up @@ -105,7 +105,7 @@ GEM
multi_json (~> 1.0)
addressable (2.8.5)
public_suffix (>= 2.0.2, < 6.0)
airbrussh (1.4.1)
airbrussh (1.4.2)
sshkit (>= 1.6.1, != 1.7.0)
backports (3.24.1)
bcrypt (3.1.19)
Expand Down Expand Up @@ -207,10 +207,10 @@ GEM
net-smtp
memoist (0.16.2)
method_source (1.0.0)
mime-types (3.4.1)
mime-types (3.5.1)
mime-types-data (~> 3.2015)
mime-types-data (3.2023.0218.1)
mini_mime (1.1.2)
mime-types-data (3.2023.0808)
mini_mime (1.1.5)
minitest (4.7.5)
minitest-stub_any_instance (1.0.3)
mlanett-redis-lock (0.2.7)
Expand All @@ -231,7 +231,7 @@ GEM
net-protocol
net-ssh (7.2.0)
netrc (0.11.0)
newrelic_rpm (9.3.1)
newrelic_rpm (9.4.2)
oj (2.18.5)
omni_logger (0.1.4)
logger
Expand All @@ -248,7 +248,7 @@ GEM
rack (>= 0.4)
rack-attack (6.6.1)
rack (>= 1.0, < 3)
rack-cache (1.13.0)
rack-cache (1.14.0)
rack (>= 0.4)
rack-cors (1.0.6)
rack (>= 1.6.0)
Expand Down
15 changes: 9 additions & 6 deletions helpers/search_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ def get_term_search_query(text, params={})
end
end

lang = params["lang"] || params["language"]
lang_suffix = lang && !lang.eql?("all") ? "_#{lang}" : ""

query = ""
params["defType"] = "edismax"
params["stopwords"] = "true"
Expand All @@ -98,25 +101,25 @@ def get_term_search_query(text, params={})

if params[EXACT_MATCH_PARAM] == "true"
query = "\"#{solr_escape(text)}\""
params["qf"] = "resource_id^20 prefLabelExact^10 synonymExact #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact synonymExact #{QUERYLESS_FIELDS_STR}"
params["qf"] = "resource_id^20 prefLabelExact#{lang_suffix }^10 synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } synonymExact#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
elsif params[SUGGEST_PARAM] == "true" || text[-1] == '*'
text.gsub!(/\*+$/, '')
query = "\"#{solr_escape(text)}\""
params["qt"] = "/suggest_ncbo"
params["qf"] = "prefLabelExact^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["qf"] = "prefLabelExact#{lang_suffix }^100 prefLabelSuggestEdge^50 synonymSuggestEdge^10 prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["pf"] = "prefLabelSuggest^50"
params["hl.fl"] = "prefLabelExact prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "prefLabelExact#{lang_suffix } prefLabelSuggestEdge synonymSuggestEdge prefLabelSuggestNgram synonymSuggestNgram resource_id #{QUERYLESS_FIELDS_STR}"
else
if text.strip.empty?
query = '*'
else
query = solr_escape(text)
end

params["qf"] = "resource_id^100 prefLabelExact^90 prefLabel^70 synonymExact^50 synonym^10 #{QUERYLESS_FIELDS_STR}"
params["qf"] = "resource_id^100 prefLabelExact#{lang_suffix }^90 prefLabel#{lang_suffix }^70 synonymExact#{lang_suffix }^50 synonym#{lang_suffix }^10 #{QUERYLESS_FIELDS_STR}"
params["qf"] << " property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
params["hl.fl"] = "resource_id prefLabelExact prefLabel synonymExact synonym #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "resource_id prefLabelExact#{lang_suffix } prefLabel#{lang_suffix } synonymExact#{lang_suffix } synonym#{lang_suffix } #{QUERYLESS_FIELDS_STR}"
params["hl.fl"] = "#{params["hl.fl"]} property" if params[INCLUDE_PROPERTIES_PARAM] == "true"
end

Expand Down
44 changes: 44 additions & 0 deletions test/controllers/test_search_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,48 @@ def test_search_provisional_class
assert_equal @@test_pc_child.label, provisional[0]["prefLabel"].first
end

def test_multilingual_search
get "/search?q=Activity&ontologies=BROSEARCHTEST-0"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]

doc = res["collection"].select{|doc| doc["@id"].to_s.eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first
refute_nil doc

#res = LinkedData::Models::Class.search("prefLabel_none:Activity", {:fq => "submissionAcronym:BROSEARCHTEST-0", :start => 0, :rows => 80}, :main)
#refute_equal 0, res["response"]["numFound"]
#refute_nil res["response"]["docs"].select{|doc| doc["resource_id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first



get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en"
res = MultiJson.load(last_response.body)
refute_equal 0, res["totalCount"]
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first


get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true"
res = MultiJson.load(last_response.body)
assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=ActivityEnglish&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true"
res = MultiJson.load(last_response.body)
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=Activity&ontologies=BROSEARCHTEST-0&lang=en&require_exact_match=true"
res = MultiJson.load(last_response.body)
assert_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first

get "/search?q=Activit%C3%A9&ontologies=BROSEARCHTEST-0&lang=fr&require_exact_match=true"
res = MultiJson.load(last_response.body)
refute_nil res["collection"].select{|doc| doc["@id"].eql?('http://bioontology.org/ontologies/Activity.owl#Activity')}.first


end

end
3 changes: 3 additions & 0 deletions test/data/ontology_files/BRO_v3.2.owl
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,9 @@

<owl:Class rdf:about="&activity;Activity">
<core:prefLabel rdf:datatype="&xsd;string">Activity</core:prefLabel>
<core:prefLabel rdf:datatype="&xsd;string">Activity</core:prefLabel>
<core:prefLabel xml:lang="en">ActivityEnglish</core:prefLabel>
<core:prefLabel xml:lang="fr">Activité</core:prefLabel>
<desc:definition rdf:datatype="&xsd;string">Activity of interest that may be related to a BRO:Resource.</desc:definition>
<core:altLabel>activities</core:altLabel>
</owl:Class>
Expand Down
13 changes: 13 additions & 0 deletions test/solr/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
version: '3.8'

services:
op_solr:
image: solr:8.8
volumes:
- ./solr_configsets:/configsets:ro
ports:
- "8983:8983"
command: >
bash -c "precreate-core term_search_core1 /configsets/term_search
&& precreate-core prop_search_core1 /configsets/property_search
&& solr-foreground"
35 changes: 20 additions & 15 deletions test/solr/generate_ncbo_configsets.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,23 @@
# generates solr configsets by merging _default configset with config files in config/solr
# _default is copied from sorl distribuion solr-8.10.1/server/solr/configsets/_default/

pushd solr/configsets
ld_config='../../../../ontologies_linked_data/config/solr/'
#ld_config='../../../../config/solr/'
ls -l $ld_config
pwd
[ -d property_search ] && rm -Rf property_search
[ -d term_search ] && rm -Rf property_search
[ -d $ld_config/property_search ] || echo "cant find ontologies_linked_data project"
mkdir -p property_search/conf
mkdir -p term_search/conf
cp -a _default/conf/* property_search/conf/
cp -a _default/conf/* term_search/conf/
cp -a $ld_config/property_search/* property_search/conf
cp -a $ld_config/term_search/* term_search/conf
popd
#cd solr/configsets
ld_config='config/solr'
configsets='test/solr/configsets'
[ -d ${configsets}/property_search ] && rm -Rf ${configsets}/property_search
[ -d ${configsets}/term_search ] && rm -Rf ${configsets}/term_search
if [[ ! -d ${ld_config}/property_search ]]; then
echo 'cant find ld solr config sets'
exit 1
fi
if [[ ! -d ${configsets}/_default/conf ]]; then
echo 'cant find default solr configset'
exit 1
fi
mkdir -p ${configsets}/property_search/conf
mkdir -p ${configsets}/term_search/conf
cp -a ${configsets}/_default/conf/* ${configsets}/property_search/conf/
cp -a ${configsets}/_default/conf/* ${configsets}/term_search/conf/
cp -a $ld_config/property_search/* ${configsets}/property_search/conf
cp -a $ld_config/term_search/* ${configsets}/term_search/conf