From fa6dfc3813a3b3561ececa2492cb34d2230da6a0 Mon Sep 17 00:00:00 2001 From: Manoj Kumar H S Date: Wed, 15 Nov 2023 16:33:12 +0530 Subject: [PATCH] Updated Repo Structure - Manoj --- main2.py => FaceAnalysis/main2.py | 332 +++++++++--------- README.md | 16 +- .../IIsc_Voice_Sample.mp3 | Bin .../speaker_verification.py | 0 4 files changed, 174 insertions(+), 174 deletions(-) rename main2.py => FaceAnalysis/main2.py (97%) rename IIsc_Voice_Sample.mp3 => Speaker_Verification/IIsc_Voice_Sample.mp3 (100%) rename speaker_verification.py => Speaker_Verification/speaker_verification.py (100%) diff --git a/main2.py b/FaceAnalysis/main2.py similarity index 97% rename from main2.py rename to FaceAnalysis/main2.py index 01e12fc..c3477ee 100644 --- a/main2.py +++ b/FaceAnalysis/main2.py @@ -1,166 +1,166 @@ -import cv2 -import mediapipe as mp -import numpy as np -import pandas as pd -# place holders and global variables -x = 0 # X axis head pose -y = 0 # Y axis head pose - -X_AXIS_CHEAT = 0 -Y_AXIS_CHEAT = 0 - -statements = [] - - -def pose(): - global VOLUME_NORM, x, y, X_AXIS_CHEAT, Y_AXIS_CHEAT - ############################# - mp_face_mesh = mp.solutions.face_mesh - face_mesh = mp_face_mesh.FaceMesh( - min_detection_confidence=0.5, min_tracking_confidence=0.5) - cap = cv2.VideoCapture(0) - mp_drawing = mp.solutions.drawing_utils - # mp_drawing_styles = mp.solutions - - while cap.isOpened(): - success, image = cap.read() - # Flip the image horizontally for a later selfie-view display - # Also convert the color space from BGR to RGB - image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB) - - # To improve performance - image.flags.writeable = False - - # Get the result - results = face_mesh.process(image) - - # To improve performance - image.flags.writeable = True - - # Convert the color space from RGB to BGR - image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) - - img_h, img_w, img_c = image.shape - face_3d = [] - face_2d = [] - - face_ids = [33, 263, 1, 61, 291, 199] - - if results.multi_face_landmarks: - for face_landmarks in results.multi_face_landmarks: - mp_drawing.draw_landmarks( - image=image, - landmark_list=face_landmarks, - connections=mp_face_mesh.FACEMESH_CONTOURS, - landmark_drawing_spec=None) - for idx, lm in enumerate(face_landmarks.landmark): - # print(lm) - if idx in face_ids: - if idx == 1: - nose_2d = (lm.x * img_w, lm.y * img_h) - nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 8000) - - x, y = int(lm.x * img_w), int(lm.y * img_h) - - # Get the 2D Coordinates - face_2d.append([x, y]) - - # Get the 3D Coordinates - face_3d.append([x, y, lm.z]) - - # Convert it to the NumPy array - face_2d = np.array(face_2d, dtype=np.float64) - - # Convert it to the NumPy array - face_3d = np.array(face_3d, dtype=np.float64) - - # The camera matrix - focal_length = 1 * img_w - - cam_matrix = np.array([[focal_length, 0, img_h / 2], - [0, focal_length, img_w / 2], - [0, 0, 1]]) - - # The Distance Matrix - dist_matrix = np.zeros((4, 1), dtype=np.float64) - - # Solve PnP - success, rot_vec, trans_vec = cv2.solvePnP( - face_3d, face_2d, cam_matrix, dist_matrix) - - # Get rotational matrix - rmat, jac = cv2.Rodrigues(rot_vec) - - # Get angles - angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat) - - # Get the y rotation degree - x = angles[0] * 360 - y = angles[1] * 360 - - # print(y) - - # See where the user's head tilting - if y < -10: - text = "Looking Left" - statements.append(text) - # print("looking left") - elif y > 10: - text = "Looking Right" - statements.append(text) - # print("looking right") - elif x < -10: - text = "Looking Down" - statements.append(text) - # print("looking down") - else: - text = "Looking Straight" - statements.append(text) - # print("looking straight") - text = str(int(x)) + "::" + str(int(y)) + text - # print(str(int(x)) + "::" + str(int(y))) - # print("x: {x} | y: {y} | sound amplitude: {amp}".format(x=int(x), y=int(y), amp=audio.SOUND_AMPLITUDE)) - - # Y is left / right - # X is up / down - if y < -10 or y > 10: - X_AXIS_CHEAT = 1 - else: - X_AXIS_CHEAT = 0 - - if x < -5: - Y_AXIS_CHEAT = 1 - else: - Y_AXIS_CHEAT = 0 - - # print(X_AXIS_CHEAT, Y_AXIS_CHEAT) - # Display the nose direction - nose_3d_projection, jacobian = cv2.projectPoints( - nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix) - - p1 = (int(nose_2d[0]), int(nose_2d[1])) - p2 = (int(nose_3d_projection[0][0][0]), int( - nose_3d_projection[0][0][1])) - - cv2.line(image, p1, p2, (255, 0, 0), 2) - - # Add the text on the image - cv2.putText(image, text, (20, 20), - cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) - - cv2.imshow('Head Pose Estimation', image) - - if cv2.waitKey(1) & 0xFF == ord('q'): - break - - df = pd.DataFrame(data={"statement": statements}) - statement_counts = df['statement'].value_counts() - statement_percentages = (statement_counts / len(statements)) * 100 - print(statement_percentages) - - cap.release() - - -############################# -if __name__ == "__main__": - pose() +import cv2 +import mediapipe as mp +import numpy as np +import pandas as pd +# place holders and global variables +x = 0 # X axis head pose +y = 0 # Y axis head pose + +X_AXIS_CHEAT = 0 +Y_AXIS_CHEAT = 0 + +statements = [] + + +def pose(): + global VOLUME_NORM, x, y, X_AXIS_CHEAT, Y_AXIS_CHEAT + ############################# + mp_face_mesh = mp.solutions.face_mesh + face_mesh = mp_face_mesh.FaceMesh( + min_detection_confidence=0.5, min_tracking_confidence=0.5) + cap = cv2.VideoCapture(0) + mp_drawing = mp.solutions.drawing_utils + # mp_drawing_styles = mp.solutions + + while cap.isOpened(): + success, image = cap.read() + # Flip the image horizontally for a later selfie-view display + # Also convert the color space from BGR to RGB + image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB) + + # To improve performance + image.flags.writeable = False + + # Get the result + results = face_mesh.process(image) + + # To improve performance + image.flags.writeable = True + + # Convert the color space from RGB to BGR + image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + + img_h, img_w, img_c = image.shape + face_3d = [] + face_2d = [] + + face_ids = [33, 263, 1, 61, 291, 199] + + if results.multi_face_landmarks: + for face_landmarks in results.multi_face_landmarks: + mp_drawing.draw_landmarks( + image=image, + landmark_list=face_landmarks, + connections=mp_face_mesh.FACEMESH_CONTOURS, + landmark_drawing_spec=None) + for idx, lm in enumerate(face_landmarks.landmark): + # print(lm) + if idx in face_ids: + if idx == 1: + nose_2d = (lm.x * img_w, lm.y * img_h) + nose_3d = (lm.x * img_w, lm.y * img_h, lm.z * 8000) + + x, y = int(lm.x * img_w), int(lm.y * img_h) + + # Get the 2D Coordinates + face_2d.append([x, y]) + + # Get the 3D Coordinates + face_3d.append([x, y, lm.z]) + + # Convert it to the NumPy array + face_2d = np.array(face_2d, dtype=np.float64) + + # Convert it to the NumPy array + face_3d = np.array(face_3d, dtype=np.float64) + + # The camera matrix + focal_length = 1 * img_w + + cam_matrix = np.array([[focal_length, 0, img_h / 2], + [0, focal_length, img_w / 2], + [0, 0, 1]]) + + # The Distance Matrix + dist_matrix = np.zeros((4, 1), dtype=np.float64) + + # Solve PnP + success, rot_vec, trans_vec = cv2.solvePnP( + face_3d, face_2d, cam_matrix, dist_matrix) + + # Get rotational matrix + rmat, jac = cv2.Rodrigues(rot_vec) + + # Get angles + angles, mtxR, mtxQ, Qx, Qy, Qz = cv2.RQDecomp3x3(rmat) + + # Get the y rotation degree + x = angles[0] * 360 + y = angles[1] * 360 + + # print(y) + + # See where the user's head tilting + if y < -10: + text = "Looking Left" + statements.append(text) + # print("looking left") + elif y > 10: + text = "Looking Right" + statements.append(text) + # print("looking right") + elif x < -10: + text = "Looking Down" + statements.append(text) + # print("looking down") + else: + text = "Looking Straight" + statements.append(text) + # print("looking straight") + text = str(int(x)) + "::" + str(int(y)) + text + # print(str(int(x)) + "::" + str(int(y))) + # print("x: {x} | y: {y} | sound amplitude: {amp}".format(x=int(x), y=int(y), amp=audio.SOUND_AMPLITUDE)) + + # Y is left / right + # X is up / down + if y < -10 or y > 10: + X_AXIS_CHEAT = 1 + else: + X_AXIS_CHEAT = 0 + + if x < -5: + Y_AXIS_CHEAT = 1 + else: + Y_AXIS_CHEAT = 0 + + # print(X_AXIS_CHEAT, Y_AXIS_CHEAT) + # Display the nose direction + nose_3d_projection, jacobian = cv2.projectPoints( + nose_3d, rot_vec, trans_vec, cam_matrix, dist_matrix) + + p1 = (int(nose_2d[0]), int(nose_2d[1])) + p2 = (int(nose_3d_projection[0][0][0]), int( + nose_3d_projection[0][0][1])) + + cv2.line(image, p1, p2, (255, 0, 0), 2) + + # Add the text on the image + cv2.putText(image, text, (20, 20), + cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) + + cv2.imshow('Head Pose Estimation', image) + + if cv2.waitKey(1) & 0xFF == ord('q'): + break + + df = pd.DataFrame(data={"statement": statements}) + statement_counts = df['statement'].value_counts() + statement_percentages = (statement_counts / len(statements)) * 100 + print(statement_percentages) + + cap.release() + + +############################# +if __name__ == "__main__": + pose() diff --git a/README.md b/README.md index 8800aaf..953ee2b 100644 --- a/README.md +++ b/README.md @@ -22,21 +22,21 @@ To run this code, you need the following libraries and dependencies: - You can install the required Python libraries using pip and requirements.txt ``` -pip install requirements.txt + pip install -r requirements.txt ``` ### Installation 1. Clone this repository or download the script to your local machine. ``` -git clone https://github.com/Manoj-2702/FacialAnalysis-IISc.git + git clone https://github.com/Manoj-2702/FacialAnalysis-IISc.git ``` 2. Navigate to the project directory. ``` -cd FacialAnalysis-IISc + cd FacialAnalysis-IISc/FaceAnalysis ``` 3. Run the script ``` -python main2.py + python main2.py ``` @@ -71,7 +71,7 @@ This script uses the pyannote library to perform speaker verification on audio s - You can install the required libraries using pip and requirements.txt ``` -pip install requirements.txt + pip install -r requirements.txt ``` @@ -79,15 +79,15 @@ pip install requirements.txt ### Installation 1. Clone this repository or download the script to your local machine. ``` -git clone https://github.com/Manoj-2702/FacialAnalysis-IISc.git + git clone https://github.com/Manoj-2702/FacialAnalysis-IISc.git ``` 2. Navigate to the project directory. ``` -cd FacialAnalysis-IISc + cd FacialAnalysis-IISc/Speaker_Verification ``` 3. Run the script ``` -python speaker_verification.py + python speaker_verification.py ``` ### Parameters diff --git a/IIsc_Voice_Sample.mp3 b/Speaker_Verification/IIsc_Voice_Sample.mp3 similarity index 100% rename from IIsc_Voice_Sample.mp3 rename to Speaker_Verification/IIsc_Voice_Sample.mp3 diff --git a/speaker_verification.py b/Speaker_Verification/speaker_verification.py similarity index 100% rename from speaker_verification.py rename to Speaker_Verification/speaker_verification.py