Skip to content

Commit

Permalink
[web] ML cleanup - Part 3/x (#1737)
Browse files Browse the repository at this point in the history
  • Loading branch information
mnvr authored May 16, 2024
2 parents 95d2c54 + 3327277 commit a0cb8b8
Show file tree
Hide file tree
Showing 41 changed files with 1,235 additions and 2,062 deletions.
6 changes: 3 additions & 3 deletions desktop/src/main/ipc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ import {
clipImageEmbedding,
clipTextEmbeddingIfAvailable,
} from "./services/ml-clip";
import { detectFaces, faceEmbedding } from "./services/ml-face";
import { detectFaces, faceEmbeddings } from "./services/ml-face";
import { encryptionKey, saveEncryptionKey } from "./services/store";
import {
clearPendingUploads,
Expand Down Expand Up @@ -182,8 +182,8 @@ export const attachIPCHandlers = () => {
detectFaces(input),
);

ipcMain.handle("faceEmbedding", (_, input: Float32Array) =>
faceEmbedding(input),
ipcMain.handle("faceEmbeddings", (_, input: Float32Array) =>
faceEmbeddings(input),
);

ipcMain.handle("legacyFaceCrop", (_, faceID: string) =>
Expand Down
2 changes: 1 addition & 1 deletion desktop/src/main/services/ml-face.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const cachedFaceEmbeddingSession = makeCachedInferenceSession(
5286998 /* 5 MB */,
);

export const faceEmbedding = async (input: Float32Array) => {
export const faceEmbeddings = async (input: Float32Array) => {
// Dimension of each face (alias)
const mobileFaceNetFaceSize = 112;
// Smaller alias
Expand Down
6 changes: 3 additions & 3 deletions desktop/src/preload.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ const clipTextEmbeddingIfAvailable = (text: string) =>
const detectFaces = (input: Float32Array) =>
ipcRenderer.invoke("detectFaces", input);

const faceEmbedding = (input: Float32Array) =>
ipcRenderer.invoke("faceEmbedding", input);
const faceEmbeddings = (input: Float32Array) =>
ipcRenderer.invoke("faceEmbeddings", input);

const legacyFaceCrop = (faceID: string) =>
ipcRenderer.invoke("legacyFaceCrop", faceID);
Expand Down Expand Up @@ -343,7 +343,7 @@ contextBridge.exposeInMainWorld("electron", {
clipImageEmbedding,
clipTextEmbeddingIfAvailable,
detectFaces,
faceEmbedding,
faceEmbeddings,
legacyFaceCrop,

// - Watch
Expand Down
1 change: 0 additions & 1 deletion web/apps/photos/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
"chrono-node": "^2.2.6",
"date-fns": "^2",
"debounce": "^2.0.0",
"density-clustering": "^1.3.0",
"eventemitter3": "^4.0.7",
"exifr": "^7.1.3",
"fast-srp-hap": "^2.0.4",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { t } from "i18next";
import { AppContext } from "pages/_app";
import { useContext } from "react";
import { components } from "react-select";
import { IndexStatus } from "services/ml/db";
import { IndexStatus } from "services/face/db";
import { Suggestion, SuggestionType } from "types/search";

const { Menu } = components;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import { useCallback, useContext, useEffect, useRef, useState } from "react";
import { components } from "react-select";
import AsyncSelect from "react-select/async";
import { InputActionMeta } from "react-select/src/types";
import { Person } from "services/face/types";
import { City } from "services/locationSearchService";
import { Person } from "services/ml/types";
import {
getAutoCompleteSuggestions,
getDefaultOptions,
Expand Down
7 changes: 2 additions & 5 deletions web/apps/photos/src/components/ml/PeopleList.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ import { Skeleton, styled } from "@mui/material";
import { Legend } from "components/PhotoViewer/styledComponents/Legend";
import { t } from "i18next";
import React, { useEffect, useState } from "react";
import mlIDbStorage from "services/ml/db";
import { Face, Person, type MlFileData } from "services/ml/types";
import mlIDbStorage from "services/face/db";
import { Face, Person, type MlFileData } from "services/face/types";
import { EnteFile } from "types/file";

const FaceChipContainer = styled("div")`
Expand Down Expand Up @@ -167,10 +167,7 @@ const FaceCropImageView: React.FC<FaceCropImageViewProps> = ({
.legacyFaceCrop(faceID)
/*
cachedOrNew("face-crops", cacheKey, async () => {
const user = await ensureLocalUser();
return machineLearningService.regenerateFaceCrop(
user.token,
user.id,
faceId,
);
})*/
Expand Down
4 changes: 4 additions & 0 deletions web/apps/photos/src/services/embeddingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ export const syncCLIPEmbeddings = async () => {
if (!response.diff?.length) {
return;
}
// Note: in rare cases we might get a diff entry for an embedding
// corresponding to a file which has been deleted (but whose
// embedding is enqueued for deletion). Client should expect such a
// scenario (all it has to do is just ignore them).
const newEmbeddings = await Promise.all(
response.diff.map(async (embedding) => {
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,31 +1,8 @@
import { Matrix } from "ml-matrix";
import { Point } from "services/ml/geom";
import {
FaceAlignment,
FaceAlignmentMethod,
FaceAlignmentService,
FaceDetection,
Versioned,
} from "services/ml/types";
import { Point } from "services/face/geom";
import { FaceAlignment, FaceDetection } from "services/face/types";
import { getSimilarityTransformation } from "similarity-transformation";

class ArcfaceAlignmentService implements FaceAlignmentService {
public method: Versioned<FaceAlignmentMethod>;

constructor() {
this.method = {
value: "ArcFace",
version: 1,
};
}

public getFaceAlignment(faceDetection: FaceDetection): FaceAlignment {
return getArcfaceAlignment(faceDetection);
}
}

export default new ArcfaceAlignmentService();

const ARCFACE_LANDMARKS = [
[38.2946, 51.6963],
[73.5318, 51.5014],
Expand All @@ -43,9 +20,12 @@ const ARC_FACE_5_LANDMARKS = [
[70.7299, 92.2041],
] as Array<[number, number]>;

export function getArcfaceAlignment(
faceDetection: FaceDetection,
): FaceAlignment {
/**
* Compute and return an {@link FaceAlignment} for the given face detection.
*
* @param faceDetection A geometry indicating a face detected in an image.
*/
export const faceAlignment = (faceDetection: FaceDetection): FaceAlignment => {
const landmarkCount = faceDetection.landmarks.length;
return getFaceAlignmentUsingSimilarityTransform(
faceDetection,
Expand All @@ -54,12 +34,11 @@ export function getArcfaceAlignment(
ARCFACE_LANDMARKS_FACE_SIZE,
),
);
}
};

function getFaceAlignmentUsingSimilarityTransform(
faceDetection: FaceDetection,
alignedLandmarks: Array<[number, number]>,
// alignmentMethod: Versioned<FaceAlignmentMethod>
): FaceAlignment {
const landmarksMat = new Matrix(
faceDetection.landmarks
Expand Down Expand Up @@ -90,7 +69,6 @@ function getFaceAlignmentUsingSimilarityTransform(
simTransform.rotation.get(0, 1),
simTransform.rotation.get(0, 0),
);
// log.info({ affineMatrix, meanTranslation, centerMat, center, toMean: simTransform.toMean, fromMean: simTransform.fromMean, size });

return {
affineMatrix,
Expand Down
187 changes: 187 additions & 0 deletions web/apps/photos/src/services/face/blur.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
import { Face } from "services/face/types";
import { createGrayscaleIntMatrixFromNormalized2List } from "utils/image";
import { mobileFaceNetFaceSize } from "./embed";

/**
* Laplacian blur detection.
*/
export const detectBlur = (
alignedFaces: Float32Array,
faces: Face[],
): number[] => {
const numFaces = Math.round(
alignedFaces.length /
(mobileFaceNetFaceSize * mobileFaceNetFaceSize * 3),
);
const blurValues: number[] = [];
for (let i = 0; i < numFaces; i++) {
const face = faces[i];
const direction = faceDirection(face);
const faceImage = createGrayscaleIntMatrixFromNormalized2List(
alignedFaces,
i,
);
const laplacian = applyLaplacian(faceImage, direction);
blurValues.push(matrixVariance(laplacian));
}
return blurValues;
};

type FaceDirection = "left" | "right" | "straight";

const faceDirection = (face: Face): FaceDirection => {
const landmarks = face.detection.landmarks;
const leftEye = landmarks[0];
const rightEye = landmarks[1];
const nose = landmarks[2];
const leftMouth = landmarks[3];
const rightMouth = landmarks[4];

const eyeDistanceX = Math.abs(rightEye.x - leftEye.x);
const eyeDistanceY = Math.abs(rightEye.y - leftEye.y);
const mouthDistanceY = Math.abs(rightMouth.y - leftMouth.y);

const faceIsUpright =
Math.max(leftEye.y, rightEye.y) + 0.5 * eyeDistanceY < nose.y &&
nose.y + 0.5 * mouthDistanceY < Math.min(leftMouth.y, rightMouth.y);

const noseStickingOutLeft =
nose.x < Math.min(leftEye.x, rightEye.x) &&
nose.x < Math.min(leftMouth.x, rightMouth.x);

const noseStickingOutRight =
nose.x > Math.max(leftEye.x, rightEye.x) &&
nose.x > Math.max(leftMouth.x, rightMouth.x);

const noseCloseToLeftEye =
Math.abs(nose.x - leftEye.x) < 0.2 * eyeDistanceX;
const noseCloseToRightEye =
Math.abs(nose.x - rightEye.x) < 0.2 * eyeDistanceX;

if (noseStickingOutLeft || (faceIsUpright && noseCloseToLeftEye)) {
return "left";
} else if (noseStickingOutRight || (faceIsUpright && noseCloseToRightEye)) {
return "right";
}

return "straight";
};

/**
* Return a new image by applying a Laplacian blur kernel to each pixel.
*/
const applyLaplacian = (
image: number[][],
direction: FaceDirection,
): number[][] => {
const paddedImage: number[][] = padImage(image, direction);
const numRows = paddedImage.length - 2;
const numCols = paddedImage[0].length - 2;

// Create an output image initialized to 0.
const outputImage: number[][] = Array.from({ length: numRows }, () =>
new Array(numCols).fill(0),
);

// Define the Laplacian kernel.
const kernel: number[][] = [
[0, 1, 0],
[1, -4, 1],
[0, 1, 0],
];

// Apply the kernel to each pixel
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < numCols; j++) {
let sum = 0;
for (let ki = 0; ki < 3; ki++) {
for (let kj = 0; kj < 3; kj++) {
sum += paddedImage[i + ki][j + kj] * kernel[ki][kj];
}
}
// Adjust the output value if necessary (e.g., clipping).
outputImage[i][j] = sum;
}
}

return outputImage;
};

const padImage = (image: number[][], direction: FaceDirection): number[][] => {
const removeSideColumns = 56; /* must be even */

const numRows = image.length;
const numCols = image[0].length;
const paddedNumCols = numCols + 2 - removeSideColumns;
const paddedNumRows = numRows + 2;

// Create a new matrix with extra padding.
const paddedImage: number[][] = Array.from({ length: paddedNumRows }, () =>
new Array(paddedNumCols).fill(0),
);

if (direction === "straight") {
// Copy original image into the center of the padded image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] =
image[i][j + Math.round(removeSideColumns / 2)];
}
}
} else if (direction === "left") {
// If the face is facing left, we only take the right side of the face image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j + removeSideColumns];
}
}
} else if (direction === "right") {
// If the face is facing right, we only take the left side of the face image.
for (let i = 0; i < numRows; i++) {
for (let j = 0; j < paddedNumCols - 2; j++) {
paddedImage[i + 1][j + 1] = image[i][j];
}
}
}

// Reflect padding
// Top and bottom rows
for (let j = 1; j <= paddedNumCols - 2; j++) {
paddedImage[0][j] = paddedImage[2][j]; // Top row
paddedImage[numRows + 1][j] = paddedImage[numRows - 1][j]; // Bottom row
}
// Left and right columns
for (let i = 0; i < numRows + 2; i++) {
paddedImage[i][0] = paddedImage[i][2]; // Left column
paddedImage[i][paddedNumCols - 1] = paddedImage[i][paddedNumCols - 3]; // Right column
}

return paddedImage;
};

const matrixVariance = (matrix: number[][]): number => {
const numRows = matrix.length;
const numCols = matrix[0].length;
const totalElements = numRows * numCols;

// Calculate the mean.
let mean: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
mean += value;
});
});
mean /= totalElements;

// Calculate the variance.
let variance: number = 0;
matrix.forEach((row) => {
row.forEach((value) => {
const diff: number = value - mean;
variance += diff * diff;
});
});
variance /= totalElements;

return variance;
};
Loading

0 comments on commit a0cb8b8

Please sign in to comment.