From 3a6ab4820bdb5b4f9a8eb9eabe75b76f9c5e99a1 Mon Sep 17 00:00:00 2001 From: BrunoMarinhoM Date: Fri, 17 May 2024 15:54:33 -0400 Subject: [PATCH 1/2] ISSUE #1233: Feature refresh_database argument added to method find --- deepface/DeepFace.py | 6 ++++ deepface/modules/recognition.py | 49 ++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index 853d95fd5..282a6274e 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -260,6 +260,7 @@ def find( threshold: Optional[float] = None, normalization: str = "base", silent: bool = False, + refresh_database: bool = True, ) -> List[pd.DataFrame]: """ Identify individuals in a database @@ -299,6 +300,10 @@ def find( silent (boolean): Suppress or allow some log messages for a quieter analysis process (default is False). + refresh_data_base (boolean): Sincronizes the images representation (pkl) file with the + directory/db files, if set to false, it will ignore any file changes inside the db_path + (default is True). + Returns: results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds to the identity information for an individual detected in the source image. @@ -329,6 +334,7 @@ def find( threshold=threshold, normalization=normalization, silent=silent, + refresh_data_base=refresh_database, ) diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index 011863445..0ffdfb10e 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -29,6 +29,7 @@ def find( threshold: Optional[float] = None, normalization: str = "base", silent: bool = False, + refresh_data_base: bool = True, ) -> List[pd.DataFrame]: """ Identify individuals in a database @@ -67,6 +68,11 @@ def find( silent (boolean): Suppress or allow some log messages for a quieter analysis process. + refresh_data_base (boolean): Sincronizes the images representation (pkl) file with the + directory/db files, if set to false, it will ignore any file changes inside the db_path + directory (default is True). + + Returns: results (List[pd.DataFrame]): A list of pandas dataframes. Each dataframe corresponds to the identity information for an individual detected in the source image. @@ -145,25 +151,36 @@ def find( # Get the list of images on storage storage_images = image_utils.list_images(path=db_path) - if len(storage_images) == 0: - raise ValueError(f"No item found in {db_path}") + must_save_pickle = False + new_images = [] + old_images = [] + replaced_images = [] + + if not refresh_data_base: + logger.info(f"There could be changes in {db_path} not tracked. Set refresh_data_base to true to assure that any changes will be tracked.") + # Enforce data consistency amongst on disk images and pickle file - must_save_pickle = False - new_images = list(set(storage_images) - set(pickled_images)) # images added to storage - old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage + if refresh_data_base: + if len(storage_images) == 0: + raise ValueError(f"No item found in {db_path}") + + new_images = list(set(storage_images) - set(pickled_images)) # images added to storage + + old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage + + # detect replaced images + replaced_images = [] + for current_representation in representations: + identity = current_representation["identity"] + if identity in old_images: + continue + alpha_hash = current_representation["hash"] + beta_hash = image_utils.find_image_hash(identity) + if alpha_hash != beta_hash: + logger.debug(f"Even though {identity} represented before, it's replaced later.") + replaced_images.append(identity) - # detect replaced images - replaced_images = [] - for current_representation in representations: - identity = current_representation["identity"] - if identity in old_images: - continue - alpha_hash = current_representation["hash"] - beta_hash = image_utils.find_image_hash(identity) - if alpha_hash != beta_hash: - logger.debug(f"Even though {identity} represented before, it's replaced later.") - replaced_images.append(identity) if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0): logger.info( From b4a35fe80b6e28e3d8b1880ef3822298af90b836 Mon Sep 17 00:00:00 2001 From: BrunoMarinhoM Date: Fri, 17 May 2024 16:34:37 -0400 Subject: [PATCH 2/2] minor changes and typos corrected --- deepface/DeepFace.py | 4 ++-- deepface/modules/recognition.py | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index 282a6274e..fdee7e173 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -300,7 +300,7 @@ def find( silent (boolean): Suppress or allow some log messages for a quieter analysis process (default is False). - refresh_data_base (boolean): Sincronizes the images representation (pkl) file with the + refresh_database (boolean): Synchronizes the images representation (pkl) file with the directory/db files, if set to false, it will ignore any file changes inside the db_path (default is True). @@ -334,7 +334,7 @@ def find( threshold=threshold, normalization=normalization, silent=silent, - refresh_data_base=refresh_database, + refresh_database=refresh_database, ) diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index 0ffdfb10e..91a20d252 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -29,7 +29,7 @@ def find( threshold: Optional[float] = None, normalization: str = "base", silent: bool = False, - refresh_data_base: bool = True, + refresh_database: bool = True, ) -> List[pd.DataFrame]: """ Identify individuals in a database @@ -68,7 +68,7 @@ def find( silent (boolean): Suppress or allow some log messages for a quieter analysis process. - refresh_data_base (boolean): Sincronizes the images representation (pkl) file with the + refresh_database (boolean): Synchronizes the images representation (pkl) file with the directory/db files, if set to false, it will ignore any file changes inside the db_path directory (default is True). @@ -156,12 +156,12 @@ def find( old_images = [] replaced_images = [] - if not refresh_data_base: - logger.info(f"There could be changes in {db_path} not tracked. Set refresh_data_base to true to assure that any changes will be tracked.") + if not refresh_database: + logger.info(f"There could be changes in {db_path} not tracked. Set refresh_database to true to assure that any changes will be tracked.") # Enforce data consistency amongst on disk images and pickle file - if refresh_data_base: + if refresh_database: if len(storage_images) == 0: raise ValueError(f"No item found in {db_path}") @@ -170,7 +170,6 @@ def find( old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage # detect replaced images - replaced_images = [] for current_representation in representations: identity = current_representation["identity"] if identity in old_images: