Skip to content

Commit

Permalink
CMR-9582 Making the values a def config so that they can be set in AW… (
Browse files Browse the repository at this point in the history
#2020)

* CMR-9582 Making the values a def config so that they can be set in AWS, setting the maximum search result value to 200K.

* CMR-9582 Fixing typo.

* CMR-9582 Fixing the page size to what it was.

* CMR-9582 Fixing getting the configuration value
  • Loading branch information
eereiter authored and jwteague committed Dec 12, 2023
1 parent 633dd1a commit b4c3aab
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 14 deletions.
13 changes: 13 additions & 0 deletions common-app-lib/src/cmr/common_app/config.clj
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,16 @@
"CMR support email address"
{:default "[email protected]"})

(defconfig es-unlimited-page-size
"This is the number of items we will request from elastic search at a time when
the page size is set to unlimited."
{:default 10000
:type Long})

(defconfig es-max-unlimited-hits
"Sets an upper limit in order to get all results from elastic search
without paging. This is used by CMR applications to load data into their
caches."
{:default 200000
:type Long})

Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
[clojurewerkz.elastisch.rest.document :as esd]
[clojurewerkz.elastisch.rest.index :as esri]
[clojurewerkz.elastisch.rest.response :as esrsp]
[cmr.common-app.config :as config]
[cmr.common-app.services.search.query-model :as qm]
[cmr.common-app.services.search.query-to-elastic :as q2e]
[cmr.common-app.services.search.results-model :as results]
Expand Down Expand Up @@ -217,16 +218,6 @@
concept-type)))))
all-concepts))))))

(def unlimited-page-size
"This is the number of items we will request at a time when the page size is set to unlimited"
10000)

(def max-unlimited-hits
"This is the maximum number of hits we can fetch if the page size is set to unlimited. We need to
support fetching fields on every single collection in the CMR. This is set to a number safely above
what we'll need to support with GCMD (~35K) and ECHO (~5k) collections."
100000)

;; Implements querying against elasticsearch when the page size is set to :unlimited. It works by
;; calling the default implementation multiple times until all results have been found. It uses
;; the constants defined above to control how many are requested at a time and the maximum number
Expand All @@ -238,14 +229,14 @@

(loop [offset 0 prev-items [] took-total 0 timed-out false]
(let [results (send-query-to-elastic
context (assoc query :offset offset :page-size unlimited-page-size))
context (assoc query :offset offset :page-size (config/es-unlimited-page-size)))
total-hits (get-in results [:hits :total :value])
current-items (get-in results [:hits :hits])]

(when (> total-hits max-unlimited-hits)
(when (> total-hits (config/es-max-unlimited-hits))
(errors/internal-error!
(format "Query with unlimited page size matched %s items which exceeds maximum of %s. Query: %s"
total-hits max-unlimited-hits (pr-str query))))
total-hits (config/es-max-unlimited-hits) (pr-str query))))

(if (>= (+ (count prev-items) (count current-items)) total-hits)
;; We've got enough results now. We'll return the query like we got all of them back in one request
Expand All @@ -254,7 +245,7 @@
(update-in [:hits :hits] concat prev-items)
(assoc :timed_out timed-out))
;; We need to keep searching subsequent pages
(recur (long (+ offset unlimited-page-size))
(recur (long (+ offset (config/es-unlimited-page-size)))
(concat prev-items current-items)
(long (+ took-total (:took results)))
(or timed-out (:timed_out results)))))))
Expand Down

0 comments on commit b4c3aab

Please sign in to comment.