Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add_with_ids fail to work after merge is performed on ondisk IndexIVFPQ index #1258

Closed
zlfeng83 opened this issue Jun 23, 2020 · 2 comments
Closed
Labels

Comments

@zlfeng83
Copy link

zlfeng83 commented Jun 23, 2020

Summary

add_with_ids fail to work after merge is performed on ondisk IndexIVFPQ index;
The error message is as follows:
Assertion failed: (end_prev == inf || offset >= end_prev), function free_slot, file OnDiskInvertedLists.cpp, line 565.

Platform

OS: macOS 10.15.2/Ubuntu 16.04
Faiss version: V 1.6.3

Faiss compilation options:
./configure --without-cuda
make && make install

Running on:

  • CPU

Interface:

  • C++

Reproduction instructions

  1. Download the attached zip file and unzip the file.
    ivfpqcnn0.index.zip

  2. Use below code to reproduce the issue. (File path need to be changed accordingly.)
    #include
    #include
    #include <unordered_map>
    #include <pthread.h>
    #include <gtest/gtest.h>
    #include <faiss/IndexIVFFlat.h>
    #include <faiss/utils/random.h>
    #include <faiss/IndexFlat.h>
    #include <faiss/IndexIVFFlat.h>
    #include <faiss/IndexIVFPQ.h>
    #include <faiss/index_io.h>
    #include <faiss/AutoTune.h>
    #include <faiss/clone_index.h>
    #include <faiss/OnDiskInvertedLists.h>
    #include <faiss/impl/AuxIndexStructures.h>
    #include <faiss/utils/distances.h>
    #include <faiss/IVFlib.h>
    #include <faiss/MetaIndexes.h>
    #include <spdlog/fmt/bundled/format.h>

using namespace std;
typedef faiss::Index::idx_t idx_t;

// parameters to use for the test
int d = 512;
size_t nb = 1000;
size_t nq = 100;
int nindex = 4;
int k = 10;
int nlist = 40;

struct CommonData {

std::vector <float> database;
std::vector <float> queries;
std::vector<idx_t> ids;
faiss::IndexFlatL2 quantizer;

CommonData(): database (nb * d), queries (nq * d), ids(nb), quantizer (d) {

    for (size_t i = 0; i < nb * d; i++) {
        database[i] = drand48();
    }
    for (size_t i = 0; i < nq * d; i++) {
        queries[i] = drand48();
    }
    for (int i = 0; i < nb; i++) {
        ids[i] = 123 + 456 * i;
    }
    { // just to train the quantizer
        faiss::IndexIVFFlat iflat (&quantizer, d, nlist);
        iflat.train(nb, database.data());
    }
}

};

CommonData cd;
std::string trainedPQFilePath= "/opt/data/trainedIndex/ivfpqcnn0.index";
string strHome="/opt/data/imagedb";
string pqfile = "ivfpq";
faiss::IndexIVFPQ * getEmptyIndex(int workerId)
{

int dimCnt=512;


time_t now = time(0);
tm *ltm = localtime(&now);
string strNow = fmt::format("{}{}{}{}{}{}",1900 + ltm->tm_year,1 + ltm->tm_mon, ltm->tm_mday, 1 + ltm->tm_hour, 1 + ltm->tm_min, 1 + ltm->tm_sec);

string pqMainFile="";
string pqDataFile="";
if(workerId == 0)
{
    pqMainFile = strHome + "/"  + pqfile + ".main" ;
    pqDataFile = strHome + "/"  + pqfile + ".dat" ;
} else
{
    pqMainFile = strHome + "/"  + pqfile  +".main.part" + std::to_string(workerId) +"_" + strNow;
    pqDataFile = strHome + "/" + pqfile  + ".dat.part" + std::to_string(workerId) + "_" + strNow;

}

faiss::IndexIVFPQ * pqIndex= nullptr;
pqIndex = dynamic_cast<faiss::IndexIVFPQ *>(faiss::read_index(trainedPQFilePath.data()));
faiss::OnDiskInvertedLists ivf(
        pqIndex->nlist, pqIndex->code_size,
        pqDataFile.data());
pqIndex->replace_invlists(&ivf,false);


std::vector<float> initdata;
for (int i = 0; i < dimCnt; i++) {
    initdata.push_back(0);
}
pqIndex->add_with_ids(1, initdata.data(), new int64_t[1]{0});

std::vector<float> initdata2;
for (int i = 0; i < dimCnt; i++) {
    initdata2.push_back(1);
}
pqIndex->add_with_ids(1, initdata2.data(), new int64_t[1]{2});

faiss::write_index(pqIndex, pqMainFile.data());
delete pqIndex;
pqIndex = dynamic_cast<faiss::IndexIVFPQ *>(faiss::read_index(pqMainFile.data()));

return pqIndex;

}

int mergeIndex (faiss::IndexShards *index_shards ,string strDataFile, bool shift_ids,
bool standard_merge) {
typedef faiss::Index::idx_t idx_t;

int nindex = index_shards->count();
if (standard_merge) {
    for (int i = 1; i < nindex; i++) {
        faiss::ivflib::merge_into(
                index_shards->at(0), index_shards->at(i),
                shift_ids);
    }

//index_shards->sync_with_shard_indexes();
} else {
std::vector<const faiss::InvertedLists *> lists;
faiss::IndexIVFPQ *index0 = nullptr;
size_t ntotal = 0;
for (int i = 0; i < nindex; i++) {
auto index_ivf = dynamic_cast<faiss::IndexIVFPQ *>(index_shards->at(i));
assert (index_ivf);
if (i == 0) {
index0 = index_ivf;
}
lists.push_back(index_ivf->invlists);
ntotal += index_ivf->ntotal;
}

    auto il = new faiss::OnDiskInvertedLists(
            index0->nlist, index0->code_size,
            strDataFile.c_str());

    il->merge_from(lists.data(), lists.size());

    index0->replace_invlists(il, true);
    index0->ntotal = ntotal;
}

}

TEST(ONDISK,merge)
{

std::vector<idx_t> newI(k * nq);
std::vector<float> newD(k * nq);

int dimCnt=512;

string pqMainFile = strHome + "/"  + pqfile + ".main" ;
string pqDataFile = strHome + "/"  + pqfile + ".dat" ;

faiss::IndexShards index_shards(dimCnt, false, false);
index_shards.own_fields = false;

for (int i = 0; i < 4; i++) {
    index_shards.add_shard (getEmptyIndex(i));
}
mergeIndex(&index_shards,pqDataFile,false,false);
auto index_ivf = dynamic_cast<faiss::IndexIVFPQ*>(index_shards.at(0));
faiss::write_index(index_ivf, pqMainFile.data());
delete index_ivf;
index_ivf = dynamic_cast<faiss::IndexIVFPQ *>(faiss::read_index(pqMainFile.data()));

index_ivf->search(nq, cd.queries.data(),
                  k, newD.data(), newI.data());

auto idDel=new int64_t[1]{1};
faiss::IDSelectorBatch sel(1,idDel);
index_ivf->remove_ids(sel);

index_ivf->add_with_ids(nb,cd.database.data(),cd.ids.data());

index_ivf->search(nq, cd.queries.data(),
                  k, newD.data(), newI.data());

}

@mdouze
Copy link
Contributor

mdouze commented Jun 24, 2020

Adding to an on-disk index is not supported.
See https://github.com/facebookresearch/faiss/wiki/Storing-IVF-indexes-on-disk
If you need to add to an on-disk index, you can use a HStackInvertedLists.

@zlfeng83
Copy link
Author

Thanks mdouze. Please kindly archive this issue.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

No branches or pull requests

2 participants