From 7515e79710a4f518969d7b8b3f771f2592643138 Mon Sep 17 00:00:00 2001 From: "Kenneth Enevoldsen (UCloud)" Date: Thu, 15 Feb 2024 16:47:56 +0100 Subject: [PATCH] fix: Added e5 mistral scores --- .gitignore | 1 + makefile | 1 + .../Angry_Tweets.json | 2 +- .../Bornholm_Parallel.json | 2 +- .../DKHate.json | 2 +- .../intfloat__e5-mistral-7b-instruct/DaLAJ.json | 2 +- .../Da_Political_Comments.json | 2 +- .../intfloat__e5-mistral-7b-instruct/LCC.json | 2 +- .../Language_Identification.json | 2 +- .../Massive_Intent.json | 2 +- .../Massive_Scenario.json | 2 +- .../intfloat__e5-mistral-7b-instruct/NoReC.json | 2 +- .../NorQuad.json | 1 + .../Norwegian_courts.json | 1 + .../Norwegian_parliament.json | 2 +- .../SNL_Clustering.json | 1 + .../SNL_Retrieval.json | 1 + .../intfloat__e5-mistral-7b-instruct/ScaLA.json | 2 +- .../SweFAQ.json | 2 +- .../SweReC.json | 2 +- .../SwednClustering.json | 1 + .../SwednRetrieval.json | 1 + .../TV2Nord_Retrieval.json | 1 + .../Twitterhjerne.json | 1 + .../VG_Clustering.json | 1 + .../translate-e5-large/SwednClustering.json | 1 + src/seb/registered_models/e5_mistral.py | 17 ++++++++++------- 27 files changed, 36 insertions(+), 21 deletions(-) create mode 100644 src/seb/cache/intfloat__e5-mistral-7b-instruct/NorQuad.json create mode 100644 src/seb/cache/intfloat__e5-mistral-7b-instruct/Norwegian_courts.json create mode 100644 src/seb/cache/intfloat__e5-mistral-7b-instruct/SNL_Clustering.json create mode 100644 src/seb/cache/intfloat__e5-mistral-7b-instruct/SNL_Retrieval.json create mode 100644 src/seb/cache/intfloat__e5-mistral-7b-instruct/SwednClustering.json create mode 100644 src/seb/cache/intfloat__e5-mistral-7b-instruct/SwednRetrieval.json create mode 100644 src/seb/cache/intfloat__e5-mistral-7b-instruct/TV2Nord_Retrieval.json create mode 100644 src/seb/cache/intfloat__e5-mistral-7b-instruct/Twitterhjerne.json create mode 100644 src/seb/cache/intfloat__e5-mistral-7b-instruct/VG_Clustering.json create mode 100644 src/seb/cache/translate-e5-large/SwednClustering.json diff --git a/.gitignore b/.gitignore index f9836212..f94e8aa0 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,4 @@ speed_test.py # ucloud run_sonar.py setup_ucloud.sh +run_e5.py diff --git a/makefile b/makefile index b53236d9..18bd4267 100644 --- a/makefile +++ b/makefile @@ -1,5 +1,6 @@ install: @echo "--- 🚀 Installing project ---" + pip install pip --upgrade pip install -e ".[dev, docs, openai, cohere, tests, mistral, fasttext]" static-type-check: diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Angry_Tweets.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Angry_Tweets.json index fcde1dda..61cd09e5 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Angry_Tweets.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Angry_Tweets.json @@ -1 +1 @@ -{"task_name":"Angry Tweets","task_description":"A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets","task_version":"1.1.1","time_of_run":"2024-01-12T16:11:31.466348","scores":{"da":{"accuracy":0.5653295128939828,"f1":0.5537309606310544,"accuracy_stderr":0.025429788216722243,"f1_stderr":0.025584076731803976,"main_score":0.5653295128939828}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"Angry Tweets","task_description":"A sentiment dataset with 3 classes (positiv, negativ, neutral) for Danish tweets","task_version":"1.1.1","time_of_run":"2024-02-15T14:57:28.346842","scores":{"da":{"accuracy":0.5837631327602675,"f1":0.5755645471553164,"accuracy_stderr":0.019082128904879224,"f1_stderr":0.016545668708125016,"main_score":0.5837631327602675}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Bornholm_Parallel.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Bornholm_Parallel.json index 0662a5ce..3b71888e 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Bornholm_Parallel.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Bornholm_Parallel.json @@ -1 +1 @@ -{"task_name":"Bornholm Parallel","task_description":"Danish Bornholmsk Parallel Corpus. Bornholmsk is a Danish dialect spoken on the island of Bornholm, Denmark. Historically it is a part of east Danish which was also spoken in Scania and Halland, Sweden.","task_version":"1.1.1","time_of_run":"2024-01-12T16:31:41.615348","scores":{"da":{"precision":0.44724749694749694,"recall":0.546,"f1":0.47312554112554106,"accuracy":0.546,"main_score":0.47312554112554106},"da-bornholm":{"precision":0.44724749694749694,"recall":0.546,"f1":0.47312554112554106,"accuracy":0.546,"main_score":0.47312554112554106}},"main_score":"f1"} \ No newline at end of file +{"task_name":"Bornholm Parallel","task_description":"Danish Bornholmsk Parallel Corpus. Bornholmsk is a Danish dialect spoken on the island of Bornholm, Denmark. Historically it is a part of east Danish which was also spoken in Scania and Halland, Sweden.","task_version":"1.1.1","time_of_run":"2024-02-15T14:57:51.724795","scores":{"da":{"precision":0.4778175324675325,"recall":0.582,"f1":0.5046997557997558,"accuracy":0.582,"main_score":0.5046997557997558},"da-bornholm":{"precision":0.4778175324675325,"recall":0.582,"f1":0.5046997557997558,"accuracy":0.582,"main_score":0.5046997557997558}},"main_score":"f1"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/DKHate.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/DKHate.json index f7fbb081..9da78680 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/DKHate.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/DKHate.json @@ -1 +1 @@ -{"task_name":"DKHate","task_description":"Danish Tweets annotated for Hate Speech either being Offensive or not","task_version":"1.1.1","time_of_run":"2024-01-12T16:51:56.137053","scores":{"da":{"accuracy":0.6231003039513678,"f1":0.5147324862025631,"ap":0.9019485020655956,"accuracy_stderr":0.06707622033651643,"f1_stderr":0.04801957253516925,"ap_stderr":0.011704051137462472,"main_score":0.6231003039513678}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"DKHate","task_description":"Danish Tweets annotated for Hate Speech either being Offensive or not","task_version":"1.1.1","time_of_run":"2024-02-15T15:11:33.730063","scores":{"da":{"accuracy":0.6452887537993921,"f1":0.5334840010219304,"ap":0.9071823148838007,"accuracy_stderr":0.06940548288099752,"f1_stderr":0.04601363559496756,"ap_stderr":0.009999081908632078,"main_score":0.6452887537993921}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/DaLAJ.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/DaLAJ.json index c3a30441..6b1e201e 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/DaLAJ.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/DaLAJ.json @@ -1 +1 @@ -{"task_name":"DaLAJ","task_description":"A Swedish dataset for linguistic acceptability. Available as a part of Superlim.","task_version":"1.1.1","time_of_run":"2024-01-14T16:32:38.725500","scores":{"sv":{"accuracy":0.4999999999999999,"f1":0.49816828359738186,"ap":0.5000396174219223,"accuracy_stderr":0.006001311979492911,"f1_stderr":0.0058224094857192285,"ap_stderr":0.003019437971892235,"main_score":0.4999999999999999}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"DaLAJ","task_description":"A Swedish dataset for linguistic acceptability. Available as a part of Superlim.","task_version":"1.1.1","time_of_run":"2024-02-15T15:36:47.152600","scores":{"sv":{"accuracy":0.50259009009009,"f1":0.4999577132881273,"ap":0.5013403816610403,"accuracy_stderr":0.006002368455511896,"f1_stderr":0.00717540300380278,"ap_stderr":0.00304189174846488,"main_score":0.50259009009009}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Da_Political_Comments.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Da_Political_Comments.json index ee91bfb4..c45af1a5 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Da_Political_Comments.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Da_Political_Comments.json @@ -1 +1 @@ -{"task_name":"Da Political Comments","task_description":"A dataset of Danish political comments rated for sentiment","task_version":"1.1.1","time_of_run":"2024-01-12T17:25:31.589536","scores":{"da":{"accuracy":0.37086570477247505,"f1":0.33451118026552445,"accuracy_stderr":0.028042872572902663,"f1_stderr":0.019791432707526823,"main_score":0.37086570477247505}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"Da Political Comments","task_description":"A dataset of Danish political comments rated for sentiment","task_version":"1.1.1","time_of_run":"2024-02-15T14:58:53.699184","scores":{"da":{"accuracy":0.3972807991120977,"f1":0.36759785298684816,"accuracy_stderr":0.02312098404486449,"f1_stderr":0.014653088429746523,"main_score":0.3972807991120977}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/LCC.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/LCC.json index 86cbfb92..fe58ab6f 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/LCC.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/LCC.json @@ -1 +1 @@ -{"task_name":"LCC","task_description":"The leipzig corpora collection, annotated for sentiment","task_version":"1.1.1","time_of_run":"2024-01-12T16:20:26.846658","scores":{"da":{"accuracy":0.608,"f1":0.5984443551003225,"accuracy_stderr":0.0310984101058416,"f1_stderr":0.028992849338073282,"main_score":0.608}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"LCC","task_description":"The leipzig corpora collection, annotated for sentiment","task_version":"1.1.1","time_of_run":"2024-02-15T15:09:45.985379","scores":{"da":{"accuracy":0.6393333333333333,"f1":0.6327560931038911,"accuracy_stderr":0.050195174624216174,"f1_stderr":0.047439752010702683,"main_score":0.6393333333333333}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Language_Identification.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Language_Identification.json index 4e2b591f..8bcd9a37 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Language_Identification.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Language_Identification.json @@ -1 +1 @@ -{"task_name":"Language Identification","task_description":"A dataset for Nordic language identification.","task_version":"1.1.1","time_of_run":"2024-01-13T00:06:38.159285","scores":{"da":{"accuracy":0.6319666666666667,"f1":0.6179089390100627,"accuracy_stderr":0.010069149804118408,"f1_stderr":0.011243281125761617,"main_score":0.6319666666666667},"sv":{"accuracy":0.6319666666666667,"f1":0.6179089390100627,"accuracy_stderr":0.010069149804118408,"f1_stderr":0.011243281125761617,"main_score":0.6319666666666667},"nb":{"accuracy":0.6319666666666667,"f1":0.6179089390100627,"accuracy_stderr":0.010069149804118408,"f1_stderr":0.011243281125761617,"main_score":0.6319666666666667},"nn":{"accuracy":0.6319666666666667,"f1":0.6179089390100627,"accuracy_stderr":0.010069149804118408,"f1_stderr":0.011243281125761617,"main_score":0.6319666666666667},"is":{"accuracy":0.6319666666666667,"f1":0.6179089390100627,"accuracy_stderr":0.010069149804118408,"f1_stderr":0.011243281125761617,"main_score":0.6319666666666667},"fo":{"accuracy":0.6319666666666667,"f1":0.6179089390100627,"accuracy_stderr":0.010069149804118408,"f1_stderr":0.011243281125761617,"main_score":0.6319666666666667}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"Language Identification","task_description":"A dataset for Nordic language identification.","task_version":"1.1.1","time_of_run":"2024-02-15T15:29:14.879883","scores":{"da":{"accuracy":0.6521666666666668,"f1":0.6463880767093156,"accuracy_stderr":0.008136679639481666,"f1_stderr":0.009421448316751334,"main_score":0.6521666666666668},"sv":{"accuracy":0.6521666666666668,"f1":0.6463880767093156,"accuracy_stderr":0.008136679639481666,"f1_stderr":0.009421448316751334,"main_score":0.6521666666666668},"nb":{"accuracy":0.6521666666666668,"f1":0.6463880767093156,"accuracy_stderr":0.008136679639481666,"f1_stderr":0.009421448316751334,"main_score":0.6521666666666668},"nn":{"accuracy":0.6521666666666668,"f1":0.6463880767093156,"accuracy_stderr":0.008136679639481666,"f1_stderr":0.009421448316751334,"main_score":0.6521666666666668},"is":{"accuracy":0.6521666666666668,"f1":0.6463880767093156,"accuracy_stderr":0.008136679639481666,"f1_stderr":0.009421448316751334,"main_score":0.6521666666666668},"fo":{"accuracy":0.6521666666666668,"f1":0.6463880767093156,"accuracy_stderr":0.008136679639481666,"f1_stderr":0.009421448316751334,"main_score":0.6521666666666668}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Massive_Intent.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Massive_Intent.json index f90432cd..d944ce02 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Massive_Intent.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Massive_Intent.json @@ -1 +1 @@ -{"task_name":"Massive Intent","task_description":"MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages","task_version":"1.1.1","time_of_run":"2024-01-12T19:44:31.866728","scores":{"da":{"accuracy":0.6443846671149966,"f1":0.6144527899712677,"accuracy_stderr":0.013655291187753937,"f1_stderr":0.008044562677972891,"main_score":0.6443846671149966},"nb":{"accuracy":0.6147276395427034,"f1":0.5770433304356521,"accuracy_stderr":0.013614327792467596,"f1_stderr":0.009829023567742397,"main_score":0.6147276395427034},"sv":{"accuracy":0.660053799596503,"f1":0.6189633593223225,"accuracy_stderr":0.014624252889682296,"f1_stderr":0.01056297816892318,"main_score":0.660053799596503}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"Massive Intent","task_description":"MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages","task_version":"1.1.1","time_of_run":"2024-02-15T15:19:15.379575","scores":{"da":{"accuracy":0.7128782784129118,"f1":0.6801928888774748,"accuracy_stderr":0.009453817754777464,"f1_stderr":0.01028423684836327,"main_score":0.7128782784129118},"nb":{"accuracy":0.6930733019502353,"f1":0.6582621843095777,"accuracy_stderr":0.011772311855889124,"f1_stderr":0.008012157979029806,"main_score":0.6930733019502353},"sv":{"accuracy":0.724915938130464,"f1":0.692935064035517,"accuracy_stderr":0.011559726658161192,"f1_stderr":0.009758150146092522,"main_score":0.724915938130464}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Massive_Scenario.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Massive_Scenario.json index 1f8694f0..5f9c8fa1 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Massive_Scenario.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Massive_Scenario.json @@ -1 +1 @@ -{"task_name":"Massive Scenario","task_description":"MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages","task_version":"1.1.1","time_of_run":"2024-01-12T21:04:04.073134","scores":{"da":{"accuracy":0.6948554135843982,"f1":0.6848225761082032,"accuracy_stderr":0.012949020917584937,"f1_stderr":0.013744445377989327,"main_score":0.6948554135843982},"nb":{"accuracy":0.6773369199731001,"f1":0.6677134070954015,"accuracy_stderr":0.01651533153102061,"f1_stderr":0.014947986322389185,"main_score":0.6773369199731001},"sv":{"accuracy":0.7202084734364491,"f1":0.7067606773625197,"accuracy_stderr":0.01626231997571331,"f1_stderr":0.014451017435312007,"main_score":0.7202084734364491}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"Massive Scenario","task_description":"MASSIVE: A 1M-Example Multilingual Natural Language Understanding Dataset with 51 Typologically-Diverse Languages","task_version":"1.1.1","time_of_run":"2024-02-15T15:23:40.911554","scores":{"da":{"accuracy":0.7559515803631472,"f1":0.7502643214262198,"accuracy_stderr":0.01612291756152768,"f1_stderr":0.015876447974206353,"main_score":0.7559515803631472},"nb":{"accuracy":0.746301277740417,"f1":0.7387755515981577,"accuracy_stderr":0.01164722630134689,"f1_stderr":0.01148189194727614,"main_score":0.746301277740417},"sv":{"accuracy":0.7771015467383995,"f1":0.7678172138788529,"accuracy_stderr":0.013947138453446924,"f1_stderr":0.012186676832063086,"main_score":0.7771015467383995}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/NoReC.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/NoReC.json index 87b951ea..d3fe388e 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/NoReC.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/NoReC.json @@ -1 +1 @@ -{"task_name":"NoReC","task_description":"A Norwegian dataset for sentiment classification on review","task_version":"1.1.1","time_of_run":"2024-01-13T00:39:07.204084","scores":{"nb":{"accuracy":0.57685546875,"f1":0.5594704179983383,"accuracy_stderr":0.013212858145054313,"f1_stderr":0.015893138645226176,"main_score":0.57685546875}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"NoReC","task_description":"A Norwegian dataset for sentiment classification on review","task_version":"1.1.1","time_of_run":"2024-02-15T15:30:20.590376","scores":{"nb":{"accuracy":0.60224609375,"f1":0.5842523486277139,"accuracy_stderr":0.019453213243399253,"f1_stderr":0.022949451646144644,"main_score":0.60224609375}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/NorQuad.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/NorQuad.json new file mode 100644 index 00000000..023aa0f4 --- /dev/null +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/NorQuad.json @@ -0,0 +1 @@ +{"task_name":"NorQuad","task_description":"Human-created question for Norwegian wikipedia passages.","task_version":"0.0.1","time_of_run":"2024-02-15T14:16:44.942321","scores":{"nb":{"ndcg_at_1":0.29102,"ndcg_at_3":0.24332,"ndcg_at_5":0.26064,"ndcg_at_10":0.27494,"ndcg_at_100":0.31125,"ndcg_at_1000":0.35391,"map_at_1":0.14551,"map_at_3":0.19572,"map_at_5":0.20473,"map_at_10":0.21058,"map_at_100":0.21667,"map_at_1000":0.2182,"recall_at_1":0.14551,"recall_at_3":0.23535,"recall_at_5":0.26953,"recall_at_10":0.30566,"recall_at_100":0.45654,"recall_at_1000":0.74951,"precision_at_1":0.29102,"precision_at_3":0.1569,"precision_at_5":0.10781,"precision_at_10":0.06113,"precision_at_100":0.00913,"precision_at_1000":0.0015,"mrr_at_1":0.29102,"mrr_at_3":0.35107,"mrr_at_5":0.3645,"mrr_at_10":0.37202,"mrr_at_100":0.37992,"mrr_at_1000":0.38085}},"main_score":"ndcg_at_10"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Norwegian_courts.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Norwegian_courts.json new file mode 100644 index 00000000..5fca4031 --- /dev/null +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Norwegian_courts.json @@ -0,0 +1 @@ +{"task_name":"Norwegian courts","task_description":"Nynorsk and Bokmål parallel corpus from Norwegian courts. Norway has two standardised written languages. Bokmål is a variant closer to Danish, while Nynorsk was created to resemble regional dialects of Norwegian.","task_version":"1.1.1","time_of_run":"2024-02-15T14:05:08.082051","scores":{"nb":{"precision":0.9035087719298246,"recall":0.9298245614035088,"f1":0.9122807017543859,"accuracy":0.9298245614035088,"main_score":0.9122807017543859},"nn":{"precision":0.9035087719298246,"recall":0.9298245614035088,"f1":0.9122807017543859,"accuracy":0.9298245614035088,"main_score":0.9122807017543859}},"main_score":"f1"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Norwegian_parliament.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Norwegian_parliament.json index 91dbe96a..5e88e025 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Norwegian_parliament.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Norwegian_parliament.json @@ -1 +1 @@ -{"task_name":"Norwegian parliament","task_description":"Norwegian parliament speeches annotated with the party of the speaker (`Sosialistisk Venstreparti` vs `Fremskrittspartiet`)","task_version":"1.1.1","time_of_run":"2024-01-13T09:25:14.721140","scores":{"nb":{"accuracy":0.5960833333333333,"f1":0.5927369481971759,"ap":0.5580471249230353,"accuracy_stderr":0.02310618604039476,"f1_stderr":0.02254823659759275,"ap_stderr":0.015919209043183613,"main_score":0.5960833333333333}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"Norwegian parliament","task_description":"Norwegian parliament speeches annotated with the party of the speaker (`Sosialistisk Venstreparti` vs `Fremskrittspartiet`)","task_version":"1.1.1","time_of_run":"2024-02-15T15:34:20.034788","scores":{"nb":{"accuracy":0.6073333333333333,"f1":0.6038486310734704,"ap":0.5664906173152808,"accuracy_stderr":0.0222604532248959,"f1_stderr":0.022953370688813035,"ap_stderr":0.017692204096777492,"main_score":0.6073333333333333}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/SNL_Clustering.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SNL_Clustering.json new file mode 100644 index 00000000..7b5e5eb0 --- /dev/null +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SNL_Clustering.json @@ -0,0 +1 @@ +{"task_name":"SNL Clustering","task_description":"Webscrabed articles from the Norwegian lexicon 'Det Store Norske Leksikon'. Uses articles categories as clusters.","task_version":"0.0.1","time_of_run":"2024-02-15T14:13:27.893954","scores":{"nb":{"v_measure":0.6630119895582604,"v_measure_std":0.011742769318871622}},"main_score":"v_measure"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/SNL_Retrieval.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SNL_Retrieval.json new file mode 100644 index 00000000..4b5d368b --- /dev/null +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SNL_Retrieval.json @@ -0,0 +1 @@ +{"task_name":"SNL Retrieval","task_description":"Webscrabed articles and ingresses from the Norwegian lexicon 'Det Store Norske Leksikon'.","task_version":"0.0.1","time_of_run":"2024-02-15T14:15:51.240238","scores":{"nb":{"ndcg_at_1":0.89769,"ndcg_at_3":0.93635,"ndcg_at_5":0.93985,"ndcg_at_10":0.94283,"ndcg_at_100":0.94611,"ndcg_at_1000":0.94673,"map_at_1":0.89769,"map_at_3":0.92731,"map_at_5":0.92927,"map_at_10":0.93049,"map_at_100":0.93116,"map_at_1000":0.93119,"recall_at_1":0.89769,"recall_at_3":0.96231,"recall_at_5":0.97077,"recall_at_10":0.98,"recall_at_100":0.99538,"recall_at_1000":1.0,"precision_at_1":0.89769,"precision_at_3":0.32077,"precision_at_5":0.19415,"precision_at_10":0.098,"precision_at_100":0.00995,"precision_at_1000":0.001,"mrr_at_1":0.89769,"mrr_at_3":0.92731,"mrr_at_5":0.92927,"mrr_at_10":0.93049,"mrr_at_100":0.93116,"mrr_at_1000":0.93119}},"main_score":"ndcg_at_10"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/ScaLA.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/ScaLA.json index 6c4e87cf..7a6fcc8a 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/ScaLA.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/ScaLA.json @@ -1 +1 @@ -{"task_name":"ScaLA","task_description":"A linguistic acceptability task for Danish, Norwegian Bokmål Norwegian Nynorsk and Swedish.","task_version":"1.1.1","time_of_run":"2024-01-12T23:06:35.527904","scores":{"da":{"accuracy":0.5046875,"f1":0.5012936392159489,"ap":0.5024235203858394,"accuracy_stderr":0.006763003786398744,"f1_stderr":0.006751041530193849,"ap_stderr":0.0034780532032580407,"main_score":0.5046875},"nb":{"accuracy":0.504345703125,"f1":0.5028259464108461,"ap":0.502300558459674,"accuracy_stderr":0.010656283828815899,"f1_stderr":0.01224526624454141,"ap_stderr":0.005477723754765124,"main_score":0.504345703125},"sv":{"accuracy":0.50810546875,"f1":0.5041345942032682,"ap":0.5041854590962495,"accuracy_stderr":0.009226319389790377,"f1_stderr":0.0113272958618099,"ap_stderr":0.004806314587320303,"main_score":0.50810546875},"nn":{"accuracy":0.5015625,"f1":0.49956320214680394,"ap":0.5008083696621946,"accuracy_stderr":0.004847528767965178,"f1_stderr":0.007018678421752453,"ap_stderr":0.0024316493025775387,"main_score":0.5015625}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"ScaLA","task_description":"A linguistic acceptability task for Danish, Norwegian Bokmål Norwegian Nynorsk and Swedish.","task_version":"1.1.1","time_of_run":"2024-02-15T15:27:24.831813","scores":{"da":{"accuracy":0.504443359375,"f1":0.5016672540819356,"ap":0.5022694001324884,"accuracy_stderr":0.004889887452497829,"f1_stderr":0.006550234270641194,"ap_stderr":0.0024767744445673276,"main_score":0.504443359375},"nb":{"accuracy":0.50341796875,"f1":0.501234466142355,"ap":0.5017592672593842,"accuracy_stderr":0.006351411149026992,"f1_stderr":0.006439864847875912,"ap_stderr":0.003206402978469232,"main_score":0.50341796875},"sv":{"accuracy":0.503515625,"f1":0.5001611345657621,"ap":0.5017961710745878,"accuracy_stderr":0.005285179346771409,"f1_stderr":0.008263554745573335,"ap_stderr":0.002686125889304091,"main_score":0.503515625},"nn":{"accuracy":0.49853515625,"f1":0.49672303309946314,"ap":0.49928727245893717,"accuracy_stderr":0.004211691143134341,"f1_stderr":0.004960073685900642,"ap_stderr":0.0020993277141615795,"main_score":0.49853515625}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/SweFAQ.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SweFAQ.json index 36c91a8c..e435a367 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/SweFAQ.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SweFAQ.json @@ -1 +1 @@ -{"task_name":"SweFAQ","task_description":"A Swedish QA dataset derived from FAQ","task_version":"0.0.1","time_of_run":"2024-01-14T16:49:25.697013","scores":{"sv":{"ndcg_at_1":0.54191,"ndcg_at_3":0.65689,"ndcg_at_5":0.68483,"ndcg_at_10":0.70948,"ndcg_at_100":0.73295,"ndcg_at_1000":0.7345,"map_at_1":0.54191,"map_at_3":0.62801,"map_at_5":0.6434,"map_at_10":0.65398,"map_at_100":0.65926,"map_at_1000":0.65933,"recall_at_1":0.54191,"recall_at_3":0.74074,"recall_at_5":0.80897,"recall_at_10":0.88304,"recall_at_100":0.9883,"recall_at_1000":1.0,"precision_at_1":0.54191,"precision_at_3":0.24691,"precision_at_5":0.16179,"precision_at_10":0.0883,"precision_at_100":0.00988,"precision_at_1000":0.001,"mrr_at_1":0.54191,"mrr_at_3":0.62801,"mrr_at_5":0.6434,"mrr_at_10":0.65398,"mrr_at_100":0.65926,"mrr_at_1000":0.65933}},"main_score":"ndcg_at_10"} \ No newline at end of file +{"task_name":"SweFAQ","task_description":"A Swedish QA dataset derived from FAQ","task_version":"0.0.1","time_of_run":"2024-02-15T15:37:12.180225","scores":{"sv":{"ndcg_at_1":0.54971,"ndcg_at_3":0.67899,"ndcg_at_5":0.70138,"ndcg_at_10":0.71981,"ndcg_at_100":0.74285,"ndcg_at_1000":0.74468,"map_at_1":0.54971,"map_at_3":0.6475,"map_at_5":0.65988,"map_at_10":0.66756,"map_at_100":0.67254,"map_at_1000":0.67262,"recall_at_1":0.54971,"recall_at_3":0.76998,"recall_at_5":0.82456,"recall_at_10":0.88109,"recall_at_100":0.98635,"recall_at_1000":1.0,"precision_at_1":0.54971,"precision_at_3":0.25666,"precision_at_5":0.16491,"precision_at_10":0.08811,"precision_at_100":0.00986,"precision_at_1000":0.001,"mrr_at_1":0.54971,"mrr_at_3":0.6475,"mrr_at_5":0.65988,"mrr_at_10":0.66756,"mrr_at_100":0.67254,"mrr_at_1000":0.67262}},"main_score":"ndcg_at_10"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/SweReC.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SweReC.json index f86cc178..5ff0973a 100644 --- a/src/seb/cache/intfloat__e5-mistral-7b-instruct/SweReC.json +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SweReC.json @@ -1 +1 @@ -{"task_name":"SweReC","task_description":"A Swedish dataset for sentiment classification on review","task_version":"1.1.1","time_of_run":"2024-01-14T16:18:08.327548","scores":{"sv":{"accuracy":0.80185546875,"f1":0.7171129811453116,"accuracy_stderr":0.016684363824126476,"f1_stderr":0.014911710029520691,"main_score":0.80185546875}},"main_score":"accuracy"} \ No newline at end of file +{"task_name":"SweReC","task_description":"A Swedish dataset for sentiment classification on review","task_version":"1.1.1","time_of_run":"2024-02-15T15:36:16.576626","scores":{"sv":{"accuracy":0.79912109375,"f1":0.7243729504029643,"accuracy_stderr":0.01612984470659247,"f1_stderr":0.023740204176982662,"main_score":0.79912109375}},"main_score":"accuracy"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/SwednClustering.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SwednClustering.json new file mode 100644 index 00000000..4fdc63a9 --- /dev/null +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SwednClustering.json @@ -0,0 +1 @@ +{"task_name":"SwednClustering","task_description":"The SWE-DN corpus is based on 1,963,576 news articles from the Swedish newspaper Dagens Nyheter (DN) during the years 2000--2020. The articles are filtered to resemble the CNN/DailyMail dataset both regarding textual structure. This dataset uses the category labels as clusters.","task_version":"0.0.1","time_of_run":"2024-02-15T14:25:46.167270","scores":{"sv":{"v_measure":0.1123071477019931,"v_measure_std":0.11137086837705644}},"main_score":"v_measure"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/SwednRetrieval.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SwednRetrieval.json new file mode 100644 index 00000000..3320bedd --- /dev/null +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/SwednRetrieval.json @@ -0,0 +1 @@ +{"task_name":"SwednRetrieval","task_description":"News Article Summary Semantic Similarity Estimation.","task_version":"0.0.1","time_of_run":"2024-02-15T14:21:46.819228","scores":{"sv":{"ndcg_at_1":0.64844,"ndcg_at_3":0.60315,"ndcg_at_5":0.64279,"ndcg_at_10":0.67631,"ndcg_at_100":0.71442,"ndcg_at_1000":0.72556,"map_at_1":0.32422,"map_at_3":0.54875,"map_at_5":0.58053,"map_at_10":0.60037,"map_at_100":0.61113,"map_at_1000":0.61169,"recall_at_1":0.32422,"recall_at_3":0.60449,"recall_at_5":0.68213,"recall_at_10":0.76562,"recall_at_100":0.91309,"recall_at_1000":0.98584,"precision_at_1":0.64844,"precision_at_3":0.40299,"precision_at_5":0.27285,"precision_at_10":0.15312,"precision_at_100":0.01826,"precision_at_1000":0.00197,"mrr_at_1":0.64844,"mrr_at_3":0.71305,"mrr_at_5":0.72204,"mrr_at_10":0.72832,"mrr_at_100":0.73151,"mrr_at_1000":0.73168}},"main_score":"ndcg_at_10"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/TV2Nord_Retrieval.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/TV2Nord_Retrieval.json new file mode 100644 index 00000000..fdb34900 --- /dev/null +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/TV2Nord_Retrieval.json @@ -0,0 +1 @@ +{"task_name":"TV2Nord Retrieval","task_description":"News Article and corresponding summaries extracted from the Danish newspaper TV2 Nord.","task_version":"1.1.1","time_of_run":"2024-02-15T13:58:03.889586","scores":{"da":{"ndcg_at_1":0.85498,"ndcg_at_3":0.9012,"ndcg_at_5":0.90648,"ndcg_at_10":0.91188,"ndcg_at_100":0.91849,"ndcg_at_1000":0.91947,"map_at_1":0.85498,"map_at_3":0.89014,"map_at_5":0.89309,"map_at_10":0.89534,"map_at_100":0.89681,"map_at_1000":0.89685,"recall_at_1":0.85498,"recall_at_3":0.93311,"recall_at_5":0.9458,"recall_at_10":0.9624,"recall_at_100":0.99219,"recall_at_1000":0.99951,"precision_at_1":0.85498,"precision_at_3":0.31104,"precision_at_5":0.18916,"precision_at_10":0.09624,"precision_at_100":0.00992,"precision_at_1000":0.001,"mrr_at_1":0.85303,"mrr_at_3":0.88949,"mrr_at_5":0.89259,"mrr_at_10":0.89477,"mrr_at_100":0.8961,"mrr_at_1000":0.89614}},"main_score":"ndcg_at_10"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/Twitterhjerne.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Twitterhjerne.json new file mode 100644 index 00000000..bc2e5da8 --- /dev/null +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/Twitterhjerne.json @@ -0,0 +1 @@ +{"task_name":"Twitterhjerne","task_description":"Danish question asked on Twitter with the Hashtag #Twitterhjerne ('Twitter brain') and their corresponding answer.","task_version":"1.1.1","time_of_run":"2024-02-15T13:58:16.127917","scores":{"da":{"ndcg_at_1":0.82051,"ndcg_at_3":0.69927,"ndcg_at_5":0.66955,"ndcg_at_10":0.71133,"ndcg_at_100":0.77396,"ndcg_at_1000":0.78685,"map_at_1":0.25278,"map_at_3":0.52632,"map_at_5":0.57716,"map_at_10":0.61643,"map_at_100":0.64326,"map_at_1000":0.64445,"recall_at_1":0.25278,"recall_at_3":0.55641,"recall_at_5":0.63056,"recall_at_10":0.72179,"recall_at_100":0.91645,"recall_at_1000":0.98718,"precision_at_1":0.82051,"precision_at_3":0.61966,"precision_at_5":0.43077,"precision_at_10":0.25,"precision_at_100":0.03167,"precision_at_1000":0.00336,"mrr_at_1":0.80769,"mrr_at_3":0.85256,"mrr_at_5":0.85513,"mrr_at_10":0.85673,"mrr_at_100":0.85916,"mrr_at_1000":0.85936}},"main_score":"ndcg_at_10"} \ No newline at end of file diff --git a/src/seb/cache/intfloat__e5-mistral-7b-instruct/VG_Clustering.json b/src/seb/cache/intfloat__e5-mistral-7b-instruct/VG_Clustering.json new file mode 100644 index 00000000..60948f5f --- /dev/null +++ b/src/seb/cache/intfloat__e5-mistral-7b-instruct/VG_Clustering.json @@ -0,0 +1 @@ +{"task_name":"VG Clustering","task_description":"Articles and their classes (e.g. sports) from VG news articles extracted from Norsk Aviskorpus.","task_version":"0.0.1","time_of_run":"2024-02-15T14:09:15.944153","scores":{"nb":{"v_measure":0.2945685732520364,"v_measure_std":0.0050451838636235155}},"main_score":"v_measure"} \ No newline at end of file diff --git a/src/seb/cache/translate-e5-large/SwednClustering.json b/src/seb/cache/translate-e5-large/SwednClustering.json new file mode 100644 index 00000000..a45cb85c --- /dev/null +++ b/src/seb/cache/translate-e5-large/SwednClustering.json @@ -0,0 +1 @@ +{"task_name":"SwednClustering","task_description":"The SWE-DN corpus is based on 1,963,576 news articles from the Swedish newspaper Dagens Nyheter (DN) during the years 2000--2020. The articles are filtered to resemble the CNN/DailyMail dataset both regarding textual structure. This dataset uses the category labels as clusters.","task_version":"0.0.1","time_of_run":"2024-02-15T14:52:49.862534","scores":{"sv":{"v_measure":0.05927437912877753,"v_measure_std":0.09584714451682672}},"main_score":"v_measure"} \ No newline at end of file diff --git a/src/seb/registered_models/e5_mistral.py b/src/seb/registered_models/e5_mistral.py index 48408388..27627072 100644 --- a/src/seb/registered_models/e5_mistral.py +++ b/src/seb/registered_models/e5_mistral.py @@ -12,6 +12,8 @@ from seb.interfaces.task import Task from seb.registries import models +from tqdm import tqdm + import logging logger = logging.getLogger(__name__) @@ -88,7 +90,7 @@ def task_to_instruction(task: Task) -> str: class E5Mistral(Encoder): max_length = 4096 - max_batch_size = 32 + max_batch_size = 4 def __init__(self): logger.info("Started loading e5 Mistral") @@ -98,7 +100,7 @@ def __init__(self): def load_model(self): self.tokenizer = AutoTokenizer.from_pretrained("intfloat/e5-mistral-7b-instruct") - self.model = AutoModel.from_pretrained("intfloat/e5-mistral-7b-instruct") + self.model = AutoModel.from_pretrained("intfloat/e5-mistral-7b-instruct", torch_dtype=torch.float16) def preprocess(self, sentences: Sequence[str], instruction: str, encode_type: EncodeTypes) -> BatchEncoding: if encode_type == "query": @@ -117,7 +119,9 @@ def preprocess(self, sentences: Sequence[str], instruction: str, encode_type: En ] batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors="pt") - return batch_dict + + + return batch_dict.to(self.model.device) # but it does not work slightly better than this: # return sentences # noqa @@ -151,17 +155,16 @@ def encode( instruction = task_to_instruction(task) else: instruction = "" - for batch in batched(sentences, batch_size): - batch_dict = self.preprocess(batch, instruction=instruction, encode_type=encode_type) + for batch in tqdm(batched(sentences, batch_size)): - #with torch.no_grad(): with torch.inference_mode(): + batch_dict = self.preprocess(batch, instruction=instruction, encode_type=encode_type) outputs = self.model(**batch_dict) embeddings = self.last_token_pool( outputs.last_hidden_state, batch_dict["attention_mask"], # type: ignore ) - batched_embeddings.append(embeddings) + batched_embeddings.append(embeddings.detach().cpu()) return torch.cat(batched_embeddings)