diff --git a/pyserini/prebuilt_index_info.py b/pyserini/prebuilt_index_info.py
index 039067ad97..e7474c9ad3 100644
--- a/pyserini/prebuilt_index_info.py
+++ b/pyserini/prebuilt_index_info.py
@@ -30,6 +30,7 @@
     "robust04": {
         "description": "TREC Disks 4 & 5 (minus Congressional Records), used in the TREC 2004 Robust Track",
         "filename": "index-robust04-20191213.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-robust04-20191213-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-robust04-20191213.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/eqFacNeSGc4pLLH/download"
@@ -43,6 +44,7 @@
     "msmarco-passage": {
         "description": "MS MARCO passage corpus",
         "filename": "index-msmarco-passage-20201117-f87c94.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-passage-20201117-f87c94-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-msmarco-passage-20201117-f87c94.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/QQsZMFG8MpF4P8M/download"
@@ -57,6 +59,7 @@
     "msmarco-passage-slim": {
         "description": "MS MARCO passage corpus (slim version, no documents)",
         "filename": "index-msmarco-passage-slim-20201202-ab6e28.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-passage-slim-20201202-ab6e28-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-msmarco-passage-slim-20201202-ab6e28.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/Kx6K9NJFmwnaAP8/download"
@@ -71,6 +74,7 @@
     "msmarco-passage-expanded": {
         "description": "MS MARCO passage corpus (+ docTTTTTquery expansion)",
         "filename": "index-msmarco-passage-expanded-20201121-e127fb.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-passage-expanded-20201121-e127fb-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-msmarco-passage-expanded-20201121-e127fb.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/pm7cisJtRxiAMHd/download"
@@ -85,6 +89,7 @@
     "msmarco-passage-ltr": {
         "description": "MS MARCO passage corpus (4 extra preprocessed fields) used for LTR pipeline",
         "filename": "index-msmarco-passage-ltr-20210519-e25e33f.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-passage-ltr-20210519-e25e33f-readme.txt",
         "urls": [
             "https://vault.cs.uwaterloo.ca/s/8qFCaCtwabRfYQD/download" # too big for UWaterloo GitLab
         ],
@@ -98,6 +103,7 @@
     "msmarco-doc": {
         "description": "MS MARCO document corpus",
         "filename": "index-msmarco-doc-20201117-f87c94.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-doc-20201117-f87c94-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-msmarco-doc-20201117-f87c94.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/5NC7A2wAL7opJKH/download"
@@ -112,6 +118,7 @@
     "msmarco-doc-slim": {
         "description": "MS MARCO document corpus (slim version, no documents)",
         "filename": "index-msmarco-doc-slim-20201202-ab6e28.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-doc-slim-20201202-ab6e28-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-msmarco-doc-slim-20201202-ab6e28.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/BMZ6oYBoEPgTFqs/download"
@@ -126,6 +133,7 @@
     "msmarco-doc-per-passage": {
         "description": "MS MARCO document corpus, segmented into passages",
         "filename": "index-msmarco-doc-per-passage-20201204-f50dcc.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-20201204-f50dcc-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-msmarco-doc-per-passage-20201204-f50dcc.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/q6sAxE6q57q2TBo/download"
@@ -140,6 +148,7 @@
     "msmarco-doc-per-passage-slim": {
         "description": "MS MARCO document corpus, segmented into passages (slim version, no documents)",
         "filename": "index-msmarco-doc-per-passage-slim-20201204-f50dcc.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-slim-20201204-f50dcc-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-msmarco-doc-per-passage-slim-20201204-f50dcc.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/mKTjbTKMwWF9kY3/download"
@@ -154,6 +163,7 @@
     "msmarco-doc-expanded-per-doc": {
         "description": "MS MARCO document corpus, with per-doc docTTTTTquery expansion",
         "filename": "index-msmarco-doc-expanded-per-doc-20201126-1b4d0a.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-doc-20201126-1b4d0a-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-msmarco-doc-expanded-per-doc-20201126-1b4d0a.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/3BQz6ZAXAxtfne8/download"
@@ -168,6 +178,7 @@
     "msmarco-doc-expanded-per-passage": {
         "description": "MS MARCO document corpus, with per-passage docTTTTTquery expansion",
         "filename": "index-msmarco-doc-expanded-per-passage-20201126-1b4d0a.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-passage-20201126-1b4d0a-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-msmarco-doc-expanded-per-passage-20201126-1b4d0a.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/eZLbPWcnB7LzKnQ/download"
@@ -434,6 +445,7 @@
     "wikipedia-dpr": {
         "description": "Wikipedia (DPR 100 word splits) Anserini index",
         "filename": "index-wikipedia-dpr-20210120-d1b9e6.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-wikipedia-dpr-20210120-d1b9e6-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-wikipedia-dpr-20210120-d1b9e6.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/t6tDJmpoxPw9tH8/download"
@@ -448,6 +460,7 @@
     "wikipedia-dpr-slim": {
         "description": "Wikipedia (DPR 100 word splits) Anserini index, without raw texts stored",
         "filename": "index-wikipedia-dpr-slim-20210120-d1b9e6.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-wikipedia-dpr-slim-20210120-d1b9e6-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-wikipedia-dpr-slim-20210120-d1b9e6.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/Gk2sfTyJCyaTrYH/download"
@@ -462,6 +475,7 @@
     "wikipedia-kilt-doc": {
         "description": "Wikipedia snapshot used as KILT's knowledge source. Indexed by documents.",
         "filename": "index-wikipedia-kilt-doc-20210421-f29307.tar.gz",
+        "readme": "https://github.com/castorini/pyserini/blob/master/pyserini/resources/index-metadata/index-wikipedia-kilt-doc-20210421-f29307-readme.txt",
         "urls": [
             "https://git.uwaterloo.ca/jimmylin/anserini-indexes/raw/master/index-wikipedia-kilt-doc-20210421-f29307.tar.gz",
             "https://vault.cs.uwaterloo.ca/s/RqtLg3CZT38k32c/download"
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-20201117-f87c94-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-20201117-f87c94-readme.txt
new file mode 100644
index 0000000000..cd7fe03745
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-20201117-f87c94-readme.txt
@@ -0,0 +1,15 @@
+This index was generated on 2020/11/17 at commit f87c945fd1c1e4174468194c72e3c05688dc45dd Mon Nov 16 16:17:20 2020 -0500
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-doc \
+ -index index-msmarco-doc-20201117-f87c94 -threads 1 -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw documents are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-msmarco-doc-20201117-f87c94.tar.gz MD5 checksum = ac747860e7a37aed37cc30ed3990f273
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-doc-20201126-1b4d0a-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-doc-20201126-1b4d0a-readme.txt
new file mode 100644
index 0000000000..db57732f8a
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-doc-20201126-1b4d0a-readme.txt
@@ -0,0 +1,14 @@
+This index was generated on 2020/11/26 at
+
++ docTTTTTquery commit d2704c025c2bf6db652b4b27f49c4e59714ba898 (2020/11/24).
++ anserini commit 1b4d0a29879a867ca5d1f003f924acc3279455ba (2020/11/25).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input msmarco-doc-expanded -index index-msmarco-doc-expanded-per-doc-20201126-1b4d0a -optimize
+
+Note that this index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-doc-expanded-per-doc-20201126-1b4d0a.tar.gz MD5 checksum = f7056191842ab77a01829cff68004782
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-passage-20201126-1b4d0a-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-passage-20201126-1b4d0a-readme.txt
new file mode 100644
index 0000000000..29362ba570
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-expanded-per-passage-20201126-1b4d0a-readme.txt
@@ -0,0 +1,14 @@
+This index was generated on 2020/11/26 at
+
++ docTTTTTquery commit d2704c025c2bf6db652b4b27f49c4e59714ba898 (2020/11/24).
++ anserini commit 1b4d0a29879a867ca5d1f003f924acc3279455ba (2020/11/25).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input msmarco-doc-expanded-passage -index index-msmarco-doc-expanded-per-passage-20201126-1b4d0a -optimize
+
+Note that this index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-doc-expanded-per-passage-20201126-1b4d0a.tar.gz MD5 checksum = 54ea30c64515edf3c3741291b785be53
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-20201204-f50dcc-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-20201204-f50dcc-readme.txt
new file mode 100644
index 0000000000..6f250a5de3
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-20201204-f50dcc-readme.txt
@@ -0,0 +1,19 @@
+This index was generated on 2020/12/04 at
+
++ docTTTTTquery commit 5be1af130b4657ea117781f761c4e5d15c77cb42 (2020/12/01).
++ anserini commit f50dcceb6cd0ec3403c1e77066aa51bb3275d24e (2020/12/04).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input msmarco-doc-passage -index index-msmarco-doc-per-passage-20201204-f50dcc -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw documents are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-msmarco-doc-per-passage-20201204-f50dcc.tar.gz MD5 checksum = 797367406a7542b649cefa6b41cf4c33
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-slim-20201204-f50dcc-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-slim-20201204-f50dcc-readme.txt
new file mode 100644
index 0000000000..565915c8b7
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-per-passage-slim-20201204-f50dcc-readme.txt
@@ -0,0 +1,14 @@
+This index was generated on 2020/12/04 at
+
++ docTTTTTquery commit 5be1af130b4657ea117781f761c4e5d15c77cb42 (2020/12/01).
++ anserini commit f50dcceb6cd0ec3403c1e77066aa51bb3275d24e (2020/12/04).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 1 \
+  -input msmarco-doc-passage -index index-msmarco-doc-per-passage-slim-20201204-f50dcc -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-doc-per-passage-slim-20201204-f50dcc.tar.gz MD5 checksum = 77c2409943a8c9faffabf57cb6adca69
diff --git a/pyserini/resources/index-metadata/index-msmarco-doc-slim-20201202-ab6e28-readme.txt b/pyserini/resources/index-metadata/index-msmarco-doc-slim-20201202-ab6e28-readme.txt
new file mode 100644
index 0000000000..7e79f60ca7
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-doc-slim-20201202-ab6e28-readme.txt
@@ -0,0 +1,10 @@
+This index was generated on 2020/12/02 at commit ab6e280b06a7a6476d001a5eb2319c191010c0e1 (2020/12/01)
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection CleanTrecCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-doc \
+ -index index-msmarco-doc-slim-20201202-ab6e28 -threads 1 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-doc-slim-20201202-ab6e28.tar.gz MD5 checksum = c56e752f7992bf6149761097641d515a
diff --git a/pyserini/resources/index-metadata/index-msmarco-passage-20201117-f87c94-readme.txt b/pyserini/resources/index-metadata/index-msmarco-passage-20201117-f87c94-readme.txt
new file mode 100644
index 0000000000..a3a08f586a
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-passage-20201117-f87c94-readme.txt
@@ -0,0 +1,15 @@
+This index was generated on 2020/11/17 at commit f87c945fd1c1e4174468194c72e3c05688dc45dd Mon Nov 16 16:17:20 2020 -0500
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection JsonCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-passage/collection_jsonl \
+ -index index-msmarco-passage-20201117-f87c94 -threads 9 -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw passages are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-msmarco-passage-20201117-f87c94.tar.gz MD5 checksum = 1efad4f1ae6a77e235042eff4be1612d
diff --git a/pyserini/resources/index-metadata/index-msmarco-passage-expanded-20201121-e127fb-readme.txt b/pyserini/resources/index-metadata/index-msmarco-passage-expanded-20201121-e127fb-readme.txt
new file mode 100644
index 0000000000..23fa654428
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-passage-expanded-20201121-e127fb-readme.txt
@@ -0,0 +1,14 @@
+This index was generated on 2020/11/21 at
+
++ docTTTTTquery commit 701ea0a72beeb8db46aa409352a72ba52cd2c36b Tue Nov 17 07:13:27 2020 -0500
++ anserini commit e127fbea6f5515d60eb7c325cd866657dbf13cc6 Sat Nov 21 07:58:03 2020 -0500
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection \
+  -collection JsonCollection -generator DefaultLuceneDocumentGenerator \
+  -input msmarco-passage-expanded -index index-msmarco-passage-expanded-20201121-e127fb -threads 9 -optimize
+
+Note that this index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-passage-expanded-20201121-e127fb.tar.gz MD5 checksum = e5762e9e065b6fe5000f9c18da778565
diff --git a/pyserini/resources/index-metadata/index-msmarco-passage-ltr-20210519-e25e33f-readme.txt b/pyserini/resources/index-metadata/index-msmarco-passage-ltr-20210519-e25e33f-readme.txt
new file mode 100644
index 0000000000..4a5e758a89
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-passage-ltr-20210519-e25e33f-readme.txt
@@ -0,0 +1,11 @@
+This index was generated on 2021/05/19 at commit e25e33f4a06e9c1ab4d795908cae4474fa019643 2021-05-17 21:48:48 -0400 
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection JsonCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-ltr-passage/ltr_collection_jsonl \
+ -index index-msmarco-passage-ltr-20210519-e25e33f -threads 9 -storeRaw -optimize -storePositions -storeDocvectors -pretokenizdd
+
+Note, pretokenized option is used to keep preprocessed tokenization.
+This is built with spacy 3.0.6.
+
+index-msmarco-passage-ltr-20210519-e25e33f MD5 checksum = a5de642c268ac1ed5892c069bdc29ae3
diff --git a/pyserini/resources/index-metadata/index-msmarco-passage-slim-20201202-ab6e28-readme.txt b/pyserini/resources/index-metadata/index-msmarco-passage-slim-20201202-ab6e28-readme.txt
new file mode 100644
index 0000000000..010eaab227
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-msmarco-passage-slim-20201202-ab6e28-readme.txt
@@ -0,0 +1,10 @@
+This index was generated on 2020/12/02 at commit ab6e280b06a7a6476d001a5eb2319c191010c0e1 (2020/12/01)
+with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection JsonCollection \
+ -generator DefaultLuceneDocumentGenerator -input collections/msmarco-passage/collection_jsonl \
+ -index index-msmarco-passage-slim-20201202-ab6e28 -threads 9 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-msmarco-passage-slim-20201202-ab6e28.tar.gz MD5 checksum = 5e11da4cebd2e8dda2e73c589ffb0b4c
diff --git a/pyserini/resources/index-metadata/index-robust04-20191213-readme.txt b/pyserini/resources/index-metadata/index-robust04-20191213-readme.txt
new file mode 100644
index 0000000000..bc45b21c72
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-robust04-20191213-readme.txt
@@ -0,0 +1,7 @@
+This index was generated on 12/13/2019 with Anserini v0.7.0, with the following command:
+
+sh target/appassembler/bin/IndexCollection -collection TrecCollection \
+ -input /tuna1/collections/newswire/disk45/ -index index-robust04-20191213 \
+ -generator JsoupGenerator -threads 16 -storePositions -storeDocvectors -storeRawDocs -optimize
+
+index-robust04-20191213.tar.gz MD5 checksum = 15f3d001489c97849a010b0a4734d018
diff --git a/pyserini/resources/index-metadata/index-wikipedia-dpr-20210120-d1b9e6-readme.txt b/pyserini/resources/index-metadata/index-wikipedia-dpr-20210120-d1b9e6-readme.txt
new file mode 100644
index 0000000000..e449ad1048
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-wikipedia-dpr-20210120-d1b9e6-readme.txt
@@ -0,0 +1,18 @@
+This index was generated on 2021/01/20 at
+
++ anserini commit d1b9e67928aa60fa557113ace5d209b0c58e994c (2021/01/19).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 22 \
+  -input wikipedia-dpr-jsonl -index index-wikipedia-dpr-20210120-d1b9e6 -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw documents are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-wikipedia-dpr-20210120-d1b9e6.tar.gz MD5 checksum = c28f3a56b2dfcef25bf3bf755c264d04
diff --git a/pyserini/resources/index-metadata/index-wikipedia-dpr-slim-20210120-d1b9e6-readme.txt b/pyserini/resources/index-metadata/index-wikipedia-dpr-slim-20210120-d1b9e6-readme.txt
new file mode 100644
index 0000000000..9ff6af6e28
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-wikipedia-dpr-slim-20210120-d1b9e6-readme.txt
@@ -0,0 +1,13 @@
+This index was generated on 2021/01/20 at
+
++ anserini commit d1b9e67928aa60fa557113ace5d209b0c58e994c (2021/01/19).
+
+with the following command:
+
+sh anserini/target/appassembler/bin/IndexCollection -collection JsonCollection \
+  -generator DefaultLuceneDocumentGenerator -threads 22 \
+  -input wikipedia-dpr-jsonl -index index-wikipedia-dpr-slim-20210120-d1b9e6 -optimize
+
+This minimal index does not store any "extras" (positions, document vectors, raw documents, etc.).
+
+index-wikipedia-dpr-slim-20210120-d1b9e6.tar.gz MD5 checksum = 7d40604a824b5df37a1ae9d25ea38071
diff --git a/pyserini/resources/index-metadata/index-wikipedia-kilt-doc-20210421-f29307-readme.txt b/pyserini/resources/index-metadata/index-wikipedia-kilt-doc-20210421-f29307-readme.txt
new file mode 100644
index 0000000000..8449100a55
--- /dev/null
+++ b/pyserini/resources/index-metadata/index-wikipedia-kilt-doc-20210421-f29307-readme.txt
@@ -0,0 +1,18 @@
+This index was generated on 2021/04/22 at
+
++ anserini commit f29307a9fb162ec7bef4919a164929a673d2304e (2021/04/21).
+
+with the following command:
+
+python -m pyserini.index -collection JsonCollection -generator DefaultLuceneDocumentGenerator \
+ -threads 40 -input collections/wikipedia-kilt-doc \
+ -index indexes/index-wikipedia-kilt-doc-20210421-f29307 -storeRaw -optimize
+
+Note that to reduce index size:
+
++ positions are not indexed (so no phrase queries)
++ document vectors are not stored (so no query expansion)
+
+However, the raw documents are stored, so they can be fetched and fed to further downstream reranking components.
+
+index-wikipedia-kilt-doc-20210421-f29307.tar.gz MD5 checksum = b8ec8feb654f7aaa86f9901dc6c804a8