From 07a2d5bf627eecc1feaf307b7a7ef18852971511 Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Fri, 8 Mar 2024 12:01:31 +0000 Subject: [PATCH 1/6] enhancement for find function - detect replaced files already in data store - store column names in the pickle --- deepface/commons/package_utils.py | 16 +++++ deepface/modules/recognition.py | 111 +++++++++++++++++++----------- 2 files changed, 88 insertions(+), 39 deletions(-) diff --git a/deepface/commons/package_utils.py b/deepface/commons/package_utils.py index 9326b94a3..2226e070c 100644 --- a/deepface/commons/package_utils.py +++ b/deepface/commons/package_utils.py @@ -1,3 +1,6 @@ +# built-in dependencies +import hashlib + # 3rd party dependencies import tensorflow as tf @@ -14,3 +17,16 @@ def get_tf_major_version() -> int: major_version (int) """ return int(tf.__version__.split(".", maxsplit=1)[0]) + + +def find_hash_of_file(file_path: str) -> str: + """ + Find hash of image file + Args: + file_path (str): exact image path + Returns: + hash (str): digest with sha1 algorithm + """ + with open(file_path, "rb") as f: + digest = hashlib.sha1(f.read()).hexdigest() + return digest diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index fa2b3b77f..b3a8a1132 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -1,7 +1,7 @@ # built-in dependencies import os import pickle -from typing import List, Union, Optional +from typing import List, Union, Optional, Dict, Any import time # 3rd party dependencies @@ -11,6 +11,7 @@ # project dependencies from deepface.commons.logger import Logger +from deepface.commons import package_utils from deepface.modules import representation, detection, modeling, verification from deepface.models.FacialRecognition import FacialRecognition @@ -97,14 +98,16 @@ def find( # --------------------------------------- - file_name = f"representations_{model_name}.pkl" - file_name = file_name.replace("-", "_").lower() + file_name = f"ds_{model_name}_{detector_backend}_v2.pkl" + file_name = file_name.replace("-", "").lower() datastore_path = os.path.join(db_path, file_name) representations = [] + # required columns for representations df_cols = [ "identity", - f"{model_name}_representation", + "hash", + "embedding", "target_x", "target_y", "target_w", @@ -120,14 +123,18 @@ def find( with open(datastore_path, "rb") as f: representations = pickle.load(f) - # Check if the representations are out-of-date - if len(representations) > 0: - if len(representations[0]) != len(df_cols): + # check each item of representations list has required keys + for i, current_representation in enumerate(representations): + missing_keys = list(set(df_cols) - set(current_representation.keys())) + if len(missing_keys) > 0: raise ValueError( - f"Seems existing {datastore_path} is out-of-the-date." - "Please delete it and re-run." + f"{i}-th item does not have some required keys - {missing_keys}." + f"Consider to delete {datastore_path}" ) - pickled_images = [representation[0] for representation in representations] + + # Check if the representations are out-of-date + if len(representations) > 0: + pickled_images = [representation["identity"] for representation in representations] else: pickled_images = [] @@ -136,19 +143,35 @@ def find( # Enforce data consistency amongst on disk images and pickle file must_save_pickle = False - new_images = list(set(storage_images) - set(pickled_images)) # images added to storage - old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage + new_images = list(set(storage_images) - set(pickled_images)) # images added to storage + old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage if not silent and (len(new_images) > 0 or len(old_images) > 0): logger.info(f"Found {len(new_images)} new images and {len(old_images)} removed images") + # detect replaced images + replaced_images = [] + for current_representation in representations: + identity = current_representation["identity"] + if identity in old_images: + continue + alpha_hash = current_representation["hash"] + beta_hash = package_utils.find_hash_of_file(identity) + if alpha_hash != beta_hash: + logger.warn(f"Even though {identity} represented before, it's replaced later.") + replaced_images.append(identity) + + # append replaced images into both old and new images. these will be dropped and re-added. + new_images = new_images + replaced_images + old_images = old_images + replaced_images + # remove old images first - if len(old_images)>0: - representations = [rep for rep in representations if rep[0] not in old_images] + if len(old_images) > 0: + representations = [rep for rep in representations if rep["identity"] not in old_images] must_save_pickle = True # find representations for new images - if len(new_images)>0: + if len(new_images) > 0: representations += __find_bulk_embeddings( employees=new_images, model_name=model_name, @@ -158,7 +181,7 @@ def find( align=align, normalization=normalization, silent=silent, - ) # add new images + ) # add new images must_save_pickle = True if must_save_pickle: @@ -176,10 +199,7 @@ def find( # ---------------------------- # now, we got representations for facial database - df = pd.DataFrame( - representations, - columns=df_cols, - ) + df = pd.DataFrame(representations) # img path might have more than once face source_objs = detection.extract_faces( @@ -216,9 +236,9 @@ def find( distances = [] for _, instance in df.iterrows(): - source_representation = instance[f"{model_name}_representation"] + source_representation = instance["embedding"] if source_representation is None: - distances.append(float("inf")) # no representation for this image + distances.append(float("inf")) # no representation for this image continue target_dims = len(list(target_representation)) @@ -254,7 +274,7 @@ def find( result_df["threshold"] = target_threshold result_df["distance"] = distances - result_df = result_df.drop(columns=[f"{model_name}_representation"]) + result_df = result_df.drop(columns=["embedding"]) # pylint: disable=unsubscriptable-object result_df = result_df[result_df["distance"] <= target_threshold] result_df = result_df.sort_values(by=["distance"], ascending=True).reset_index(drop=True) @@ -297,7 +317,7 @@ def __find_bulk_embeddings( expand_percentage: int = 0, normalization: str = "base", silent: bool = False, -): +) -> List[Dict["str", Any]]: """ Find embeddings of a list of images @@ -323,8 +343,8 @@ def __find_bulk_embeddings( silent (bool): enable or disable informative logging Returns: - representations (list): pivot list of embeddings with - image name and detected face area's coordinates + representations (list): pivot list of dict with + image name, hash, embedding and detected face area's coordinates """ representations = [] for employee in tqdm( @@ -332,6 +352,8 @@ def __find_bulk_embeddings( desc="Finding representations", disable=silent, ): + file_hash = package_utils.find_hash_of_file(employee) + try: img_objs = detection.extract_faces( img_path=employee, @@ -342,15 +364,23 @@ def __find_bulk_embeddings( align=align, expand_percentage=expand_percentage, ) + except ValueError as err: - logger.error( - f"Exception while extracting faces from {employee}: {str(err)}" - ) + logger.error(f"Exception while extracting faces from {employee}: {str(err)}") img_objs = [] if len(img_objs) == 0: - logger.warn(f"No face detected in {employee}. It will be skipped in detection.") - representations.append((employee, None, 0, 0, 0, 0)) + representations.append( + { + "identity": employee, + "hash": file_hash, + "embedding": None, + "target_x": 0, + "target_y": 0, + "target_w": 0, + "target_h": 0, + } + ) else: for img_obj in img_objs: img_content = img_obj["face"] @@ -365,13 +395,16 @@ def __find_bulk_embeddings( ) img_representation = embedding_obj[0]["embedding"] - representations.append(( - employee, - img_representation, - img_region["x"], - img_region["y"], - img_region["w"], - img_region["h"] - )) + representations.append( + { + "identity": employee, + "hash": file_hash, + "embedding": img_representation, + "target_x": img_region["x"], + "target_y": img_region["y"], + "target_w": img_region["w"], + "target_h": img_region["h"], + } + ) return representations From 2f9f9761d0845f903eb054cbac8bb6ab0e9cfbc0 Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Fri, 8 Mar 2024 12:01:45 +0000 Subject: [PATCH 2/6] cover uppercase links --- deepface/modules/preprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepface/modules/preprocessing.py b/deepface/modules/preprocessing.py index 42756c179..7daf248b5 100644 --- a/deepface/modules/preprocessing.py +++ b/deepface/modules/preprocessing.py @@ -34,7 +34,7 @@ def load_image(img: Union[str, np.ndarray]) -> Tuple[np.ndarray, str]: return load_base64(img), "base64 encoded string" # The image is a url - if img.startswith("http://") or img.startswith("https://"): + if img.lower().startswith("http://") or img.lower().startswith("https://"): return load_image_from_web(url=img), img # The image is a path From d7c2998e1bdd021a321d588fcf37bd7fc49bf4dc Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Fri, 8 Mar 2024 13:55:02 +0000 Subject: [PATCH 3/6] wrapper for find distance added --- deepface/modules/recognition.py | 18 +++------------- deepface/modules/verification.py | 37 +++++++++++++++++++++++--------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index b3a8a1132..ed8b47b50 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -250,21 +250,9 @@ def find( + " after pickle created. Delete the {file_name} and re-run." ) - if distance_metric == "cosine": - distance = verification.find_cosine_distance( - source_representation, target_representation - ) - elif distance_metric == "euclidean": - distance = verification.find_euclidean_distance( - source_representation, target_representation - ) - elif distance_metric == "euclidean_l2": - distance = verification.find_euclidean_distance( - verification.l2_normalize(source_representation), - verification.l2_normalize(target_representation), - ) - else: - raise ValueError(f"invalid distance metric passes - {distance_metric}") + distance = verification.find_distance( + source_representation, target_representation, distance_metric + ) distances.append(distance) diff --git a/deepface/modules/verification.py b/deepface/modules/verification.py index 300c2f99d..dc667138c 100644 --- a/deepface/modules/verification.py +++ b/deepface/modules/verification.py @@ -141,16 +141,7 @@ def verify( regions = [] for idx, img1_embedding in enumerate(img1_embeddings): for idy, img2_embedding in enumerate(img2_embeddings): - if distance_metric == "cosine": - distance = find_cosine_distance(img1_embedding, img2_embedding) - elif distance_metric == "euclidean": - distance = find_euclidean_distance(img1_embedding, img2_embedding) - elif distance_metric == "euclidean_l2": - distance = find_euclidean_distance( - l2_normalize(img1_embedding), l2_normalize(img2_embedding) - ) - else: - raise ValueError("Invalid distance_metric passed - ", distance_metric) + distance = find_distance(img1_embedding, img2_embedding, distance_metric) distances.append(distance) regions.append((img1_objs[idx]["facial_area"], img2_objs[idy]["facial_area"])) @@ -234,6 +225,32 @@ def l2_normalize(x: Union[np.ndarray, list]) -> np.ndarray: return x / np.sqrt(np.sum(np.multiply(x, x))) +def find_distance( + alpha_embedding: Union[np.ndarray, list], + beta_embedding: Union[np.ndarray, list], + distance_metric: str, +) -> np.float64: + """ + Wrapper to find distance between vectors according to the given distance metric + Args: + source_representation (np.ndarray or list): 1st vector + test_representation (np.ndarray or list): 2nd vector + Returns + distance (np.float64): calculated cosine distance + """ + if distance_metric == "cosine": + distance = find_cosine_distance(alpha_embedding, beta_embedding) + elif distance_metric == "euclidean": + distance = find_euclidean_distance(alpha_embedding, beta_embedding) + elif distance_metric == "euclidean_l2": + distance = find_euclidean_distance( + l2_normalize(alpha_embedding), l2_normalize(beta_embedding) + ) + else: + raise ValueError("Invalid distance_metric passed - ", distance_metric) + return distance + + def find_threshold(model_name: str, distance_metric: str) -> float: """ Retrieve pre-tuned threshold values for a model and distance metric pair From 6eced68e699a044981d115f6734f085b17ce3a17 Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Fri, 8 Mar 2024 15:22:58 +0000 Subject: [PATCH 4/6] support embedding input for verify --- deepface/DeepFace.py | 5 + deepface/modules/verification.py | 175 ++++++++++++++++++++++--------- tests/test_verify.py | 51 +++++++++ 3 files changed, 184 insertions(+), 47 deletions(-) diff --git a/deepface/DeepFace.py b/deepface/DeepFace.py index aafb297f5..e5e16cdaa 100644 --- a/deepface/DeepFace.py +++ b/deepface/DeepFace.py @@ -62,6 +62,7 @@ def verify( align: bool = True, expand_percentage: int = 0, normalization: str = "base", + silent: bool = False, ) -> Dict[str, Any]: """ Verify if an image pair represents the same person or different persons. @@ -91,6 +92,9 @@ def verify( normalization (string): Normalize the input image before feeding it to the model. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base) + silent (boolean): Suppress or allow some log messages for a quieter analysis process + (default is False). + Returns: result (dict): A dictionary containing verification results with following keys. @@ -126,6 +130,7 @@ def verify( align=align, expand_percentage=expand_percentage, normalization=normalization, + silent=silent, ) diff --git a/deepface/modules/verification.py b/deepface/modules/verification.py index dc667138c..6ab55310a 100644 --- a/deepface/modules/verification.py +++ b/deepface/modules/verification.py @@ -1,6 +1,6 @@ # built-in dependencies import time -from typing import Any, Dict, Union +from typing import Any, Dict, Union, List, Tuple # 3rd party dependencies import numpy as np @@ -8,11 +8,14 @@ # project dependencies from deepface.modules import representation, detection, modeling from deepface.models.FacialRecognition import FacialRecognition +from deepface.commons.logger import Logger + +logger = Logger(module="deepface/modules/verification.py") def verify( - img1_path: Union[str, np.ndarray], - img2_path: Union[str, np.ndarray], + img1_path: Union[str, np.ndarray, List[float]], + img2_path: Union[str, np.ndarray, List[float]], model_name: str = "VGG-Face", detector_backend: str = "opencv", distance_metric: str = "cosine", @@ -20,6 +23,7 @@ def verify( align: bool = True, expand_percentage: int = 0, normalization: str = "base", + silent: bool = False, ) -> Dict[str, Any]: """ Verify if an image pair represents the same person or different persons. @@ -30,10 +34,10 @@ def verify( Args: img1_path (str or np.ndarray): Path to the first image. Accepts exact image path - as a string, numpy array (BGR), or base64 encoded images. + as a string, numpy array (BGR), base64 encoded images or pre-calculated embeddings. img2_path (str or np.ndarray): Path to the second image. Accepts exact image path - as a string, numpy array (BGR), or base64 encoded images. + as a string, numpy array (BGR), base64 encoded images or pre-calculated embeddings. model_name (str): Model for face recognition. Options: VGG-Face, Facenet, Facenet512, OpenFace, DeepFace, DeepID, Dlib, ArcFace and SFace (default is VGG-Face). @@ -54,6 +58,9 @@ def verify( normalization (string): Normalize the input image before feeding it to the model. Options: base, raw, Facenet, Facenet2018, VGGFace, VGGFace2, ArcFace (default is base) + silent (boolean): Suppress or allow some log messages for a quieter analysis process + (default is False). + Returns: result (dict): A dictionary containing verification results. @@ -81,74 +88,96 @@ def verify( tic = time.time() - # -------------------------------- model: FacialRecognition = modeling.build_model(model_name) - target_size = model.input_shape - - try: - img1_objs = detection.extract_faces( + dims = model.output_shape + + if isinstance(img1_path, list): + # given image is already pre-calculated embedding + if not all(isinstance(dim, float) for dim in img1_path): + raise ValueError( + "When passing img1_path as a list, ensure that all its items are of type float." + ) + + if silent is False: + logger.warn( + "You passed 1st image as pre-calculated embeddings." + f"Please ensure that embeddings have been calculated for the {model_name} model." + ) + + if len(img1_path) != dims: + raise ValueError( + f"embeddings of {model_name} should have {dims} dimensions," + f" but it has {len(img1_path)} dimensions input" + ) + + img1_embeddings = [img1_path] + img1_facial_areas = [None] + else: + img1_embeddings, img1_facial_areas = __extract_faces_and_embeddings( img_path=img1_path, - target_size=target_size, + model_name=model_name, detector_backend=detector_backend, - grayscale=False, enforce_detection=enforce_detection, align=align, expand_percentage=expand_percentage, + normalization=normalization, ) - except ValueError as err: - raise ValueError("Exception while processing img1_path") from err - try: - img2_objs = detection.extract_faces( + if isinstance(img2_path, list): + # given image is already pre-calculated embedding + if not all(isinstance(dim, float) for dim in img2_path): + raise ValueError( + "When passing img2_path as a list, ensure that all its items are of type float." + ) + + if silent is False: + logger.warn( + "You passed 2nd image as pre-calculated embeddings." + f"Please ensure that embeddings have been calculated for the {model_name} model." + ) + + if len(img2_path) != dims: + raise ValueError( + f"embeddings of {model_name} should have {dims} dimensions," + f" but it has {len(img2_path)} dimensions input" + ) + + img2_embeddings = [img2_path] + img2_facial_areas = [None] + else: + img2_embeddings, img2_facial_areas = __extract_faces_and_embeddings( img_path=img2_path, - target_size=target_size, + model_name=model_name, detector_backend=detector_backend, - grayscale=False, enforce_detection=enforce_detection, align=align, expand_percentage=expand_percentage, - ) - except ValueError as err: - raise ValueError("Exception while processing img2_path") from err - - img1_embeddings = [] - for img1_obj in img1_objs: - img1_embedding_obj = representation.represent( - img_path=img1_obj["face"], - model_name=model_name, - enforce_detection=enforce_detection, - detector_backend="skip", - align=align, normalization=normalization, ) - img1_embedding = img1_embedding_obj[0]["embedding"] - img1_embeddings.append(img1_embedding) - img2_embeddings = [] - for img2_obj in img2_objs: - img2_embedding_obj = representation.represent( - img_path=img2_obj["face"], - model_name=model_name, - enforce_detection=enforce_detection, - detector_backend="skip", - align=align, - normalization=normalization, - ) - img2_embedding = img2_embedding_obj[0]["embedding"] - img2_embeddings.append(img2_embedding) + no_facial_area = { + "x": None, + "y": None, + "w": None, + "h": None, + "left_eye": None, + "right_eye": None, + } distances = [] - regions = [] + facial_areas = [] for idx, img1_embedding in enumerate(img1_embeddings): for idy, img2_embedding in enumerate(img2_embeddings): distance = find_distance(img1_embedding, img2_embedding, distance_metric) distances.append(distance) - regions.append((img1_objs[idx]["facial_area"], img2_objs[idy]["facial_area"])) + facial_areas.append( + (img1_facial_areas[idx] or no_facial_area, img2_facial_areas[idy] or no_facial_area) + ) # find the face pair with minimum distance threshold = find_threshold(model_name, distance_metric) distance = float(min(distances)) # best distance - facial_areas = regions[np.argmin(distances)] + facial_areas = facial_areas[np.argmin(distances)] toc = time.time() @@ -166,6 +195,58 @@ def verify( return resp_obj +def __extract_faces_and_embeddings( + img_path: Union[str, np.ndarray], + model_name: str = "VGG-Face", + detector_backend: str = "opencv", + enforce_detection: bool = True, + align: bool = True, + expand_percentage: int = 0, + normalization: str = "base", +) -> Tuple[List[List[float]], List[dict]]: + """ + Extract facial areas and find corresponding embeddings for given image + Returns: + embeddings (List[float]) + facial areas (List[dict]) + """ + embeddings = [] + facial_areas = [] + + model: FacialRecognition = modeling.build_model(model_name) + target_size = model.input_shape + + try: + img_objs = detection.extract_faces( + img_path=img_path, + target_size=target_size, + detector_backend=detector_backend, + grayscale=False, + enforce_detection=enforce_detection, + align=align, + expand_percentage=expand_percentage, + ) + except ValueError as err: + raise ValueError("Exception while processing img1_path") from err + + # find embeddings for each face + for img_obj in img_objs: + img_embedding_obj = representation.represent( + img_path=img_obj["face"], + model_name=model_name, + enforce_detection=enforce_detection, + detector_backend="skip", + align=align, + normalization=normalization, + ) + # already extracted face given, safe to access its 1st item + img_embedding = img_embedding_obj[0]["embedding"] + embeddings.append(img_embedding) + facial_areas.append(img_obj["facial_area"]) + + return embeddings, facial_areas + + def find_cosine_distance( source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list] ) -> np.float64: diff --git a/tests/test_verify.py b/tests/test_verify.py index 5d135417c..ff1f9fdfd 100644 --- a/tests/test_verify.py +++ b/tests/test_verify.py @@ -1,3 +1,4 @@ +import pytest import cv2 from deepface import DeepFace from deepface.commons.logger import Logger @@ -100,3 +101,53 @@ def test_verify_for_preloaded_image(): res = DeepFace.verify(img1, img2) assert res["verified"] is True logger.info("✅ test verify for pre-loaded image done") + + +def test_verify_for_precalculated_embeddings(): + model_name = "Facenet" + + img1_path = "dataset/img1.jpg" + img2_path = "dataset/img2.jpg" + + img1_embedding = DeepFace.represent(img_path=img1_path, model_name=model_name)[0]["embedding"] + img2_embedding = DeepFace.represent(img_path=img2_path, model_name=model_name)[0]["embedding"] + + result = DeepFace.verify( + img1_path=img1_embedding, img2_path=img2_embedding, model_name=model_name, silent=True + ) + + assert result["verified"] is True + assert result["distance"] < result["threshold"] + assert result["model"] == model_name + + logger.info("✅ test verify for pre-calculated embeddings done") + + +def test_verify_with_precalculated_embeddings_for_incorrect_model(): + # generate embeddings with VGG (default) + img1_path = "dataset/img1.jpg" + img2_path = "dataset/img2.jpg" + img1_embedding = DeepFace.represent(img_path=img1_path)[0]["embedding"] + img2_embedding = DeepFace.represent(img_path=img2_path)[0]["embedding"] + + with pytest.raises( + ValueError, + match="embeddings of Facenet should have 128 dimensions, but it has 4096 dimensions input", + ): + _ = DeepFace.verify( + img1_path=img1_embedding, img2_path=img2_embedding, model_name="Facenet", silent=True + ) + + logger.info("✅ test verify with pre-calculated embeddings for incorrect model done") + + +def test_verify_for_broken_embeddings(): + img1_embeddings = ["a", "b", "c"] + img2_embeddings = [1, 2, 3] + + with pytest.raises( + ValueError, + match="When passing img1_path as a list, ensure that all its items are of type float.", + ): + _ = DeepFace.verify(img1_path=img1_embeddings, img2_path=img2_embeddings) + logger.info("✅ test verify for broken embeddings content is done") From b3c98e3b1f4f616eeca6910dd779d33015e4623a Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Fri, 8 Mar 2024 16:14:09 +0000 Subject: [PATCH 5/6] details for contribution --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index d0ea40a4a..317cbec23 100644 --- a/README.md +++ b/README.md @@ -312,9 +312,11 @@ $ deepface analyze -img_path tests/dataset/img1.jpg You can also run these commands if you are running deepface with docker. Please follow the instructions in the [shell script](https://github.com/serengil/deepface/blob/master/scripts/dockerize.sh#L17). -## Contribution [![Tests](https://github.com/serengil/deepface/actions/workflows/tests.yml/badge.svg)](https://github.com/serengil/deepface/actions/workflows/tests.yml) +## Contribution -Pull requests are more than welcome! You should run the unit tests and linting locally by running `make test && make lint` before creating a PR. Once a PR sent, GitHub test workflow will be run automatically and unit test results will be available in [GitHub actions](https://github.com/serengil/deepface/actions) before approval. Besides, workflow will evaluate the code with pylint as well. +Pull requests are more than welcome! If you are planning to contribute a large patch, please create an issue first to get any upfront questions or design decisions out of the way first. + +Before creating a PR, you should run the unit tests and linting locally by running `make test && make lint` command. Once a PR sent, GitHub test workflow will be run automatically and unit test and linting jobs will be available in [GitHub actions](https://github.com/serengil/deepface/actions) before approval. ## Support From 259add4a13ca9ca282025e6690955fa874044455 Mon Sep 17 00:00:00 2001 From: Sefik Ilkin Serengil Date: Fri, 8 Mar 2024 19:52:38 +0000 Subject: [PATCH 6/6] simplify getting pickled images --- deepface/modules/recognition.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/deepface/modules/recognition.py b/deepface/modules/recognition.py index ed8b47b50..dcc5abc2d 100644 --- a/deepface/modules/recognition.py +++ b/deepface/modules/recognition.py @@ -132,23 +132,20 @@ def find( f"Consider to delete {datastore_path}" ) - # Check if the representations are out-of-date - if len(representations) > 0: - pickled_images = [representation["identity"] for representation in representations] - else: - pickled_images = [] + # embedded images + pickled_images = [representation["identity"] for representation in representations] # Get the list of images on storage storage_images = __list_images(path=db_path) + if len(storage_images) == 0: + raise ValueError(f"No item found in {db_path}") + # Enforce data consistency amongst on disk images and pickle file must_save_pickle = False new_images = list(set(storage_images) - set(pickled_images)) # images added to storage old_images = list(set(pickled_images) - set(storage_images)) # images removed from storage - if not silent and (len(new_images) > 0 or len(old_images) > 0): - logger.info(f"Found {len(new_images)} new images and {len(old_images)} removed images") - # detect replaced images replaced_images = [] for current_representation in representations: @@ -158,9 +155,16 @@ def find( alpha_hash = current_representation["hash"] beta_hash = package_utils.find_hash_of_file(identity) if alpha_hash != beta_hash: - logger.warn(f"Even though {identity} represented before, it's replaced later.") + logger.debug(f"Even though {identity} represented before, it's replaced later.") replaced_images.append(identity) + if not silent and (len(new_images) > 0 or len(old_images) > 0 or len(replaced_images) > 0): + logger.info( + f"Found {len(new_images)} newly added image(s)" + f", {len(old_images)} removed image(s)" + f", {len(replaced_images)} replaced image(s)." + ) + # append replaced images into both old and new images. these will be dropped and re-added. new_images = new_images + replaced_images old_images = old_images + replaced_images @@ -201,6 +205,9 @@ def find( # now, we got representations for facial database df = pd.DataFrame(representations) + if silent is False: + logger.info(f"Searching {img_path} in {df.shape[0]} length datastore") + # img path might have more than once face source_objs = detection.extract_faces( img_path=img_path,