Skip to content

Commit

Permalink
Merge pull request #244 from chroma-core/fixDuckDbLogging
Browse files Browse the repository at this point in the history
create persist dir if not exist, correct logging of persistence
  • Loading branch information
jeffchuber authored Mar 28, 2023
2 parents fa847be + e8db7aa commit e347476
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
5 changes: 4 additions & 1 deletion chromadb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,15 @@ def require(key):
return chromadb.db.clickhouse.Clickhouse(settings)
elif setting == "duckdb+parquet":
require("persist_directory")
logger.warning(
f"Using embedded DuckDB with persistence: data will be stored in: {settings.persist_directory}"
)
import chromadb.db.duckdb

return chromadb.db.duckdb.PersistentDuckDB(settings)
elif setting == "duckdb":
require("persist_directory")
logger.info("Using DuckDB in-memory for database. Data will be transient.")
logger.warning("Using embedded DuckDB without persistence: data will be transient")
import chromadb.db.duckdb

return chromadb.db.duckdb.DuckDB(settings)
Expand Down
9 changes: 6 additions & 3 deletions chromadb/db/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import duckdb
import uuid
import time
import os
import itertools
import logging

Expand Down Expand Up @@ -44,8 +45,6 @@ class DuckDB(Clickhouse):
# duckdb has a different way of connecting to the database
def __init__(self, settings):

logger.warning("Using embedded DuckDB without persistence: data will be transient")

self._conn = duckdb.connect()
self._create_table_collections()
self._create_table_embeddings()
Expand Down Expand Up @@ -391,6 +390,9 @@ def persist(self):
if self._conn is None:
return

if not os.path.exists(self._save_folder):
os.makedirs(self._save_folder)

# if the db is empty, dont save
if self._conn.query(f"SELECT COUNT() FROM embeddings") == 0:
return
Expand All @@ -417,7 +419,8 @@ def load(self):
"""
Load the database from disk
"""
import os
if not os.path.exists(self._save_folder):
os.makedirs(self._save_folder)

# load in the embeddings
if not os.path.exists(f"{self._save_folder}/chroma-embeddings.parquet"):
Expand Down

0 comments on commit e347476

Please sign in to comment.