diff --git a/head_pose.py b/head_pose.py index 6184bd1..9bfd850 100644 --- a/head_pose.py +++ b/head_pose.py @@ -3,6 +3,8 @@ import cv2 import dlib import numpy as np +import os +import pickle from PIL import Image, ImageDraw @@ -16,6 +18,146 @@ predictor_path = "shape_predictor_68_face_landmarks.dat" detector = dlib.get_frontal_face_detector() predictor = dlib.shape_predictor(predictor_path) +screenDrawCorners = np.array([[10,60], [90, 60], [10, 110], [90, 110]]) + + +def create_perspective_transform_matrix(src, dst): + """ Creates a perspective transformation matrix which transforms points + in quadrilateral ``src`` to the corresponding points on quadrilateral + ``dst``. + + Will raise a ``np.linalg.LinAlgError`` on invalid input. + """ + # See: + # * http://xenia.media.mit.edu/~cwren/interpolator/ + # * http://stackoverflow.com/a/14178717/71522 + in_matrix = [] + for (x, y), (X, Y) in zip(src, dst): + in_matrix.extend([ + [x, y, 1, 0, 0, 0, -X * x, -X * y], + [0, 0, 0, x, y, 1, -Y * x, -Y * y], + ]) + + A = np.matrix(in_matrix, dtype=np.float) + B = np.array(dst).reshape(8) + af = np.dot(np.linalg.inv(A.T * A) * A.T, B) + m = np.append(np.array(af).reshape(8), 1).reshape((3, 3)) + print("Created transformmatrix:", src, dst, m) + return m + +def transMatrix2(fromMatrix, toMatrix): + matrix = [] + for p1, p2 in zip(toMatrix, fromMatrix): + matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]]) + matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]]) + + A = np.matrix(matrix, dtype=np.float) + B = np.array(fromMatrix).reshape(8) + + res = np.dot(np.linalg.inv(A.T * A) * A.T, B) + m = np.append(np.array(res).reshape(8), 1).reshape((3,3)) + return m + + + +def create_perspective_transform(src, dst, round=False, splat_args=False): + """ Returns a function which will transform points in quadrilateral + ``src`` to the corresponding points on quadrilateral ``dst``:: + + >>> transform = create_perspective_transform( + ... [(0, 0), (10, 0), (10, 10), (0, 10)], + ... [(50, 50), (100, 50), (100, 100), (50, 100)], + ... ) + >>> transform((5, 5)) + (74.99999999999639, 74.999999999999957) + + If ``round`` is ``True`` then points will be rounded to the nearest + integer and integer values will be returned. + + >>> transform = create_perspective_transform( + ... [(0, 0), (10, 0), (10, 10), (0, 10)], + ... [(50, 50), (100, 50), (100, 100), (50, 100)], + ... round=True, + ... ) + >>> transform((5, 5)) + (75, 75) + + If ``splat_args`` is ``True`` the function will accept two arguments + instead of a tuple. + + >>> transform = create_perspective_transform( + ... [(0, 0), (10, 0), (10, 10), (0, 10)], + ... [(50, 50), (100, 50), (100, 100), (50, 100)], + ... splat_args=True, + ... ) + >>> transform(5, 5) + (74.99999999999639, 74.999999999999957) + + If the input values yield an invalid transformation matrix an identity + function will be returned and the ``error`` attribute will be set to a + description of the error:: + + >>> tranform = create_perspective_transform( + ... np.zeros((4, 2)), + ... np.zeros((4, 2)), + ... ) + >>> transform((5, 5)) + (5.0, 5.0) + >>> transform.error + 'invalid input quads (...): Singular matrix + """ + try: + transform_matrix = create_perspective_transform_matrix(src, dst) + error = None + except np.linalg.LinAlgError as e: + transform_matrix = np.identity(3, dtype=np.float) + error = "invalid input quads (%s and %s): %s" %(src, dst, e) + error = error.replace("\n", "") + + to_eval = "def perspective_transform(%s):\n" %( + splat_args and "*pt" or "pt", + ) + to_eval += " res = np.dot(transform_matrix, ((pt[0], ), (pt[1], ), (1, )))\n" + to_eval += " res = res / res[2]\n" + if round: + to_eval += " return (int(round(res[0][0])), int(round(res[1][0])))\n" + else: + to_eval += " return (res[0][0], res[1][0])\n" + locals = { + "transform_matrix": transform_matrix, + } + locals.update(globals()) + exec to_eval in locals, locals + res = locals["perspective_transform"] + res.matrix = transform_matrix + res.error = error + return res + +def coordinatesToSrc(coordinates): + return np.array([coordinates['tl'], coordinates['tr'],coordinates['bl'], coordinates['br']]) + +# coordinates of the screen boundaries +if os.path.exists("coordinates.p"): + coordinates = pickle.load(open("coordinates.p", "rb")) + transformationMatrix = create_perspective_transform_matrix(coordinatesToSrc(coordinates), screenDrawCorners) + transformationMatrix2 = transMatrix2(coordinatesToSrc(coordinates), screenDrawCorners) + transform = create_perspective_transform(coordinatesToSrc(coordinates), screenDrawCorners) + a = [np.array([ 1312.15541183]), np.array([ 244.56278002]), 0] + # a = [np.array([ 100.15541183]), np.array([ 244.56278002]), 0] + print("Coords", coordinatesToSrc(coordinates)) + print("Corners:", screenDrawCorners) + print("src", a) + print("C", transformationMatrix) + print("C2", transformationMatrix2) + print("new point", np.dot(a, transformationMatrix)) + print("new point 2", np.dot(a, transformationMatrix2)) + print("new point 2", np.dot(transformationMatrix2, a)) + print("new point 3", transform(a[0:2])) + # exit() +else: + coordinates = {'tl': None, 'tr': None, 'bl': None, 'br': None} + transformationMatrix = None + while True: _, im = c.read() size = im.shape @@ -27,6 +169,10 @@ while True: print("Number of faces detected: {}".format(len(dets))) + # We use this later for calibrating + currentPoint = None + currentPoints = [] + if len(dets) > 0: for d in dets: @@ -42,7 +188,7 @@ while True: (shape.part(48).x,shape.part(48).y), # Left Mouth corner (shape.part(54).x,shape.part(54).y) # Right mouth corner ], dtype="double") - + # 3D model points. model_points = np.array([ (0.0, 0.0, 0.0), # Nose tip @@ -54,7 +200,6 @@ while True: ]) - # Camera internals focal_length = size[1] center = (size[1]/2, size[0]/2) @@ -63,9 +208,9 @@ while True: [0, focal_length, center[1]], [0, 0, 1]], dtype = "double" ) - + # print ("Camera Matrix :\n {0}".format(camera_matrix)) - + dist_coeffs = np.zeros((4,1)) # Assuming no lens distortion (success, rotation_vector, translation_vector) = cv2.solvePnP(model_points, image_points, camera_matrix, dist_coeffs, flags=cv2.SOLVEPNP_ITERATIVE) @@ -82,13 +227,11 @@ while True: for p in image_points: cv2.circle(im, (int(p[0]), int(p[1])), 3, (0,0,255), -1) - - + p1 = ( int(image_points[0][0]), int(image_points[0][1])) p2 = ( int(nose_end_point2D[0][0][0]), int(nose_end_point2D[0][0][1])) cv2.line(im, p1, p2, (255,0,0), 2) - rotMatrix = np.zeros([3,3]) cv2.Rodrigues(rotation_vector, rotMatrix, jacobian=0) @@ -102,9 +245,6 @@ while True: rz = np.arctan2(rotMatrix[0,1]/np.cos(ry), rotMatrix[0,0]/np.cos(ry)) print("rotation ml", rx, ry, rz) # seems better? - # rotatedVector = np.dot(rotMatrix, translation_vector) - # print("rvec", rotatedVector) - # draw little floorplan for x: 10 -> 50 maps to z: 0 -> 10000, x: -2000 -> 2000 mapPosX = int((translation_vector[0] + 500) / 1000 * 40) mapPosY = int((translation_vector[1] + 500) / 1000 * 40) @@ -120,16 +260,10 @@ while True: # draw rotation vector cv2.circle(im, (mapPosZ + 60, mapPosY + 10), 2, (0,0,255), -1) - - # cv2.line(im, (mapPosZ + 10, mapPosX + 10), (mapPosZ + 10 + int(rotation_vector[2]*5), mapPosX + 10 + int(rotation_vector[0]*5)), (255,255,0), 1) - # cv2.line(im, (mapPosZ + 60, mapPosY + 10), (mapPosZ + 60 + int(rotation_vector[2]*5), mapPosY + 10 + int(rotation_vector[1]*200)), (255,255,0), 1) - - # print(rotMatrix) viewDirectionVector = np.dot(np.array([0.0, 0.0, 1000.0]), rotMatrix) cv2.line(im, (mapPosZ + 10, mapPosX + 10), (mapPosZ + 10 + int(viewDirectionVector[2] * 100), mapPosX + 10 + int(viewDirectionVector[0] * 100)), (255,255,0), 1) cv2.line(im, (mapPosZ + 60, mapPosY + 10), (mapPosZ + 60 + int(viewDirectionVector[2] * 100), mapPosY + 10 - int(viewDirectionVector[1] * 100)), (255,0,255), 1) - # Translation vector gives position in space: # x, y z: 0,0,0 is center of camera # line: (x,y,z) = f(a) = (t1 + r1*a, t2+r2*a, t3 + r3*a) @@ -148,22 +282,88 @@ while True: a = - translation_vector[2] / viewDirectionVector[2] x = translation_vector[0] + viewDirectionVector[0] * a y = translation_vector[1] + viewDirectionVector[1] * a + point = np.array([x,y]) + currentPoint = point + currentPoints.append(point) + # TODO only draw nose line now, so we can change color depending whether on screen or not + + # processed all faces, now draw on screen: - print (a, x, y) - # draw little floorplan for 10 -> 50, sideplan 60 -> 100 (40x40 px) cv2.rectangle(im, (9, 9), (51, 51), (255,255,255), 1) cv2.rectangle(im, (59, 9), (101, 51), (255,255,255), 1) cv2.line(im, (10,10), (10,50), (200,200,200), 2) cv2.line(im, (60,10), (60,50), (200,200,200), 2) + # screen is 16:10 + cv2.rectangle(im, (9, 59), (91, 111), (255,255,255), 1) + + if transformationMatrix is None: + cv2.putText(im, "1", (10,70), cv2.FONT_HERSHEY_PLAIN, .7, (255,255,255) if coordinates['tl'] is not None else (0,0,255)) + cv2.putText(im, "2", (85,70), cv2.FONT_HERSHEY_PLAIN, .7, (255,255,255) if coordinates['tr'] is not None else (0,0,255)) + cv2.putText(im, "3", (10,110), cv2.FONT_HERSHEY_PLAIN, .7, (255,255,255) if coordinates['bl'] is not None else (0,0,255)) + cv2.putText(im, "4", (85,110), cv2.FONT_HERSHEY_PLAIN, .7, (255,255,255) if coordinates['br'] is not None else (0,0,255)) + else: + for point in currentPoints: + # check if within coordinates: + # dot1 = np.dot(coordinates['tl'] - point, coordinates['tl'] - coordinates['br']) + # dot2 = np.dot(coordinates['bl'] - point, coordinates['tl'] - coordinates['br']) + # pointIn3 = [point[0], point[1], 0] + # targetPoint = np.dot(pointIn3, transformationMatrix) + # print("Looking at", pointIn3, np.dot( transformationMatrix, pointIn3)) + targetPoint = transform(point) + print("Looking at", point, targetPoint) + # cv2.circle(im, (int(targetPoint[0]), int(targetPoint[1])), 2, (0,255,0), -1) + cv2.circle(im, (int(targetPoint[0]), int(targetPoint[1])), 2, (0,255,0), -1) + # Display image cv2.imshow("Output", im) keyPress = cv2.waitKey(5) if keyPress==27: break + elif keyPress > -1 and currentPoint is not None: + if keyPress == ord('1'): + coordinates['tl'] = currentPoint + elif keyPress == ord('2'): + coordinates['tr'] = currentPoint + elif keyPress == ord('3'): + coordinates['bl'] = currentPoint + elif keyPress == ord('4'): + coordinates['br'] = currentPoint + + print(coordinates.values()) + pickle.dump( coordinates, open( "coordinates.p", "wb" ) ) + print("Saved coordinates") + if not any (x is None for x in coordinates.values()): + # measured corners for corner pin + # fromMatrix = np.array(coordinates.values()) + # # Drawing area: + # toMatrix = screenDrawCorners + + # matrix = [] + # for p1, p2 in zip(toMatrix, fromMatrix): + # matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0]*p1[0], -p2[0]*p1[1]]) + # matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1]*p1[0], -p2[1]*p1[1]]) + + # A = np.matrix(matrix, dtype=np.float) + # B = np.array(fromMatrix).reshape(8) + + # res = np.dot(np.linalg.inv(A.T * A) * A.T, B) + # transformationMatrix = np.array(res).reshape(8) + transformationMatrix = create_perspective_transform_matrix(coordinatesToSrc(coordinates), screenDrawCorners) + + # measured corners for corner pin + # fromMatrix = np.array(coordinates.values()) + # # Drawing area: + # toMatrix = np.array([[10,60], [90, 60], [10, 110], [90, 110]]) + # print(fromMatrix, toMatrix) + # print(np.linalg.inv(toMatrix)) + # # matrix to transform from measured to drawed space + # transformationMatrix = np.dot(fromMatrix, np.linalg.inv(toMatrix)) + cv2.destroyAllWindows() +