Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ISSUE #1233: Feature refresh_database argument added to method find #1235

Merged
merged 2 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions deepface/DeepFace.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ def find(
threshold: Optional[float] = None,
normalization: str = "base",
silent: bool = False,
refresh_database: bool = True,
) -> List[pd.DataFrame]:
"""
Identify individuals in a database
Expand Down Expand Up @@ -299,6 +300,10 @@ def find(
silent (boolean): Suppress or allow some log messages for a quieter analysis process
(default is False).

refresh_database (boolean): Synchronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
(default is True).

Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image.
Expand Down Expand Up @@ -329,6 +334,7 @@ def find(
threshold=threshold,
normalization=normalization,
silent=silent,
refresh_database=refresh_database,
)


Expand Down
48 changes: 32 additions & 16 deletions deepface/modules/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def find(
threshold: Optional[float] = None,
normalization: str = "base",
silent: bool = False,
refresh_database: bool = True,
) -> List[pd.DataFrame]:
"""
Identify individuals in a database
Expand Down Expand Up @@ -67,6 +68,11 @@ def find(

silent (boolean): Suppress or allow some log messages for a quieter analysis process.

refresh_database (boolean): Synchronizes the images representation (pkl) file with the
directory/db files, if set to false, it will ignore any file changes inside the db_path
directory (default is True).


Returns:
results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds
to the identity information for an individual detected in the source image.
Expand Down Expand Up @@ -145,25 +151,35 @@ def find(
# Get the list of images on storage
storage_images = image_utils.list_images(path=db_path)

if len(storage_images) == 0:
raise ValueError(f"No item found in {db_path}")
must_save_pickle = False
new_images = []
old_images = []
replaced_images = []

if not refresh_database:
logger.info(f"There could be changes in {db_path} not tracked. Set refresh_database to true to assure that any changes will be tracked.")


# Enforce data consistency amongst on disk images and pickle file
must_save_pickle = False
new_images = list(set(storage_images) - set(pickled_images)) # images added to storage
old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage
if refresh_database:
if len(storage_images) == 0:
raise ValueError(f"No item found in {db_path}")

new_images = list(set(storage_images) - set(pickled_images)) # images added to storage

old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage

# detect replaced images
for current_representation in representations:
identity = current_representation["identity"]
if identity in old_images:
continue
alpha_hash = current_representation["hash"]
beta_hash = image_utils.find_image_hash(identity)
if alpha_hash != beta_hash:
logger.debug(f"Even though {identity} represented before, it's replaced later.")
replaced_images.append(identity)

# detect replaced images
replaced_images = []
for current_representation in representations:
identity = current_representation["identity"]
if identity in old_images:
continue
alpha_hash = current_representation["hash"]
beta_hash = image_utils.find_image_hash(identity)
if alpha_hash != beta_hash:
logger.debug(f"Even though {identity} represented before, it's replaced later.")
replaced_images.append(identity)

if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0):
logger.info(
Expand Down