Initial commit of Python alignment portions.

cmusatyalab · Sep 24, 2015 · df40fa2 · hbredin · Nov 5, 2015 · bamos
1 parent 4973245
commit df40fa2
Show file tree

Hide file tree

Showing 8 changed files with 468 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,5 @@ data
 
 models/facenet/*.t7
 models/dlib/shape_predictor_68_face_landmarks.dat
+
+*.pyc
diff --git a/facenet/__init__.py b/facenet/__init__.py
diff --git a/facenet/alignment/__init__.py b/facenet/alignment/__init__.py
@@ -0,0 +1 @@
+from .naive_dlib import NaiveDlib
diff --git a/facenet/alignment/naive_dlib.py b/facenet/alignment/naive_dlib.py
@@ -0,0 +1,183 @@
+# Copyright 2015 Carnegie Mellon University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import cv2
+import dlib
+import numpy as np
+import os
+import random
+import sys
+
+from skimage import io
+
+from .. import helper
+from .. import data
+
+
+class NaiveDlib:
+    def __init__(self, modelDir, facePredictorName):
+        """Initialize the dlib-based alignment."""
+        self.detector = dlib.get_frontal_face_detector()
+        self.normMeanAlignPoints = loadMeanPoints(modelDir)
+        self.predictor = dlib.shape_predictor(os.path.join(modelDir,
+                                                           facePredictorName))
+
+    def getAllFaceBoundingBoxes(self, img):
+        return self.detector(img, 1)
+
+    def getLargestFaceBoundingBox(self, img):
+        faces = self.detector(img, 1)
+        if len(faces) > 0:
+            return max(faces, key=lambda rect: rect.width() * rect.height())
+
+    def align(self, img, bb):
+        points = self.predictor(img, bb)
+        return list(map(lambda p: (p.x, p.y), points.parts()))
+
+    def alignImg(self, method, size, img, bb=None,
+                 outputPrefix=None, outputDebug=False,
+                 expandBox=False):
+        if outputPrefix:
+            helper.mkdirP(os.path.dirname(outputPrefix))
+            def getName(tag=None):
+                if tag is None:
+                    return "{}.png".format(outputPrefix)
+                else:
+                    return "{}-{}.png".format(outputPrefix, tag)
+
+        if bb is None:
+            try:
+                bb = self.getLargestFaceBoundingBox(img)
+            except Exception as e:
+                print("Warning: {}".format(e))
+                # In rare cases, exceptions are thrown.
+                return
+            if bb is None:
+                # Most failed detection attempts return here.
+                return
+
+        alignPoints = self.align(img, bb)
+        meanAlignPoints = transformPoints(self.normMeanAlignPoints, bb, True)
+
+        (xs, ys) = zip(*meanAlignPoints)
+        tightBb = dlib.rectangle(left=min(xs), right=max(xs),
+                                 top=min(ys), bottom=max(ys))
+
+        if method != 'tightcrop':
+            npAlignPoints = np.float32(alignPoints)
+            npMeanAlignPoints = np.float32(meanAlignPoints)
+
+        if method == 'tightcrop':
+            warpedImg = img
+        elif method == 'affine':
+            ss = np.array([39, 42, 57]) # Eyes and tip of nose.
+            npAlignPointsSS = npAlignPoints[ss]
+            npMeanAlignPointsSS = npMeanAlignPoints[ss]
+            H = cv2.getAffineTransform(npAlignPointsSS, npMeanAlignPointsSS)
+            warpedImg = cv2.warpAffine(img, H, np.shape(img)[0:2])
+        elif method == 'perspective':
+            ss = np.array([39,42,48,54]) # Eyes and corners of mouth.
+            npAlignPointsSS = npAlignPoints[ss]
+            npMeanAlignPointsSS = npMeanAlignPoints[ss]
+            H = cv2.getPerspectiveTransform(npAlignPointsSS, npMeanAlignPointsSS)
+            warpedImg = cv2.warpPerspective(img, H, np.shape(img)[0:2])
+        elif method == 'homography':
+            (H,mask) = cv2.findHomography(npAlignPoints, npMeanAlignPoints,
+                                        method=cv2.LMEDS)
+            warpedImg = cv2.warpPerspective(img, H, np.shape(img)[0:2])
+        else:
+            print("Error: method '{}' is unimplemented.".format(method))
+            sys.exit(-1)
+
+        if method == 'tightcrop':
+            wAlignPoints = alignPoints
+        else:
+            wBb = self.getLargestFaceBoundingBox(warpedImg)
+            if wBb is None:
+                return
+            wAlignPoints = self.align(warpedImg, wBb)
+            wMeanAlignPoints = transformPoints(self.normMeanAlignPoints, wBb, True)
+
+        if outputDebug:
+            annotatedImg = annotate(img, bb, alignPoints, meanAlignPoints)
+            io.imsave(getName("orig"), img)
+            io.imsave(getName("annotated"), annotatedImg)
+
+            if args.method != 'tightcrop':
+                wAnnotatedImg = annotate(warpedImg, wBb,
+                                         wAlignPoints, wMeanAlignPoints)
+                io.imsave(getName("warped"), warpedImg)
+                io.imsave(getName("warped-annotated"), wAnnotatedImg)
+
+        if len(warpedImg.shape) != 3:
+            print("  + Warning: Result does not have 3 dimensions.")
+            return None
+
+        (xs, ys) = zip(*wAlignPoints)
+        xRange = max(xs)-min(xs)
+        yRange = max(ys)-min(ys)
+        if expandBox:
+            (l, r, t, b) = (min(xs)-0.20*xRange, max(xs)+0.20*xRange,
+                            min(ys)-0.65*yRange, max(ys)+0.20*yRange)
+        else:
+            (l, r, t, b) = (min(xs), max(xs), min(ys), max(ys))
+        (w, h, _) = warpedImg.shape
+        if 0 <= l <= w and 0 <= r <= w and 0 <= b <= h and 0 <= t <= h:
+            cwImg = cv2.resize(warpedImg[t:b, l:r], (size, size))
+            h, edges= np.histogram(cwImg.ravel(), 16, [0,256])
+            s = sum(h)
+            if any(h > 0.65*s):
+                print("Warning: Image is likely a single color.")
+                return
+        else:
+            print("Warning: Unable to align and crop to the "
+                  "face's bounding box.")
+            return
+
+        if outputPrefix:
+            io.imsave(getName(), cwImg)
+        return cwImg
+
+def transformPoints(points, bb, toImgCoords):
+    if toImgCoords:
+        def scale(p):
+            (x,y) = p
+            return ( int((x*bb.width())+bb.left()),
+                    int((y*bb.height())+bb.top()) )
+    else:
+        def scale(p):
+            (x,y) = p
+            return ( float(x-bb.left())/bb.width(),
+                    float(y-bb.top())/bb.height() )
+    return list(map(scale, points))
+
+
+def loadMeanPoints(modelDir):
+    def parse(line):
+        (x,y) = line.strip().split(",")
+        return (float(x), float(y))
+    with open("{}/mean.csv".format(modelDir),'r') as f:
+        return [parse(line) for line in f]
+
+def annotate(img, box, points=None, meanPoints=None):
+    a = np.copy(img)
+    bl = (box.left(), box.bottom())
+    tr = (box.right(), box.top())
+    cv2.rectangle(a, bl, tr, color=(153, 255, 204), thickness=3)
+    for p in points:
+        cv2.circle(a, center=p, radius=3, color=(102, 204, 255), thickness=-1)
+    for p in meanPoints:
+        cv2.circle(a, center=p, radius=3, color=(0,0,0), thickness=-1)
+    return a
diff --git a/facenet/data.py b/facenet/data.py
@@ -0,0 +1,50 @@
+# Copyright 2015 Carnegie Mellon University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+from skimage import io
+
+class Image:
+    def __init__(self, cls, name, path):
+        self.cls = cls
+        self.name = name
+        self.path = path
+        self.rgb = None
+
+    def getRGB(self, cache=False):
+        if self.rgb is not None:
+            return self.rgb
+        else:
+            try:
+                rgb = io.imread(self.path)
+            except:
+                rgb = None
+            if cache:
+                self.rgb = rgb
+            return rgb
+
+    def __repr__(self):
+        return "({}, {})".format(self.cls, self.name)
+
+
+def iterImgs(d):
+    exts = [".jpg", ".png"]
+
+    for subdir, dirs, files in os.walk(d):
+        for path in files:
+            (imageClass, fName) = (os.path.basename(subdir), path)
+            (imageName, ext) = os.path.splitext(fName)
+            if ext in exts:
+                yield Image(imageClass, imageName, os.path.join(subdir, fName))
diff --git a/facenet/helper.py b/facenet/helper.py
@@ -0,0 +1,10 @@
+import errno
+import os
+
+def mkdirP(path):
+    try:
+        os.makedirs(path)
+    except OSError as exc: # Python >2.5
+        if exc.errno == errno.EEXIST and os.path.isdir(path):
+            pass
+        else: raise
diff --git a/util/align-dlib.py b/util/align-dlib.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python2
+#
+# Copyright 2015 Carnegie Mellon University
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+sys.path.append(".")
+
+import argparse
+import cv2
+import os
+import random
+import shutil
+
+from skimage import io
+
+def write(vals, fName):
+    if os.path.isfile(fName):
+        print("{} exists. Backing up.".format(fName))
+        os.rename(fName, "{}.bak".format(fName))
+    with open(fName, 'w') as f:
+        for p in vals:
+            f.write(",".join(str(x) for x in p))
+            f.write("\n")
+
+def computeMeanMain(args):
+    dlibAlign = NaiveDlib(args.facePredictorPath)
+
+    imgs = list(iterImgs(args.inputDir))
+    if args.numImages > 0:
+        imgs = random.sample(imgs, args.numImages)
+
+    facePoints = []
+    for img in imgs:
+        rgb = img.getRGB()
+        bb = dlibAlign.getLargestFaceBoundingBox(rgb)
+        alignedPoints = dlibAlign.align(rgb, bb)
+        if alignedPoints:
+            facePoints.append(alignedPoints)
+
+    facePointsNp = np.array(facePoints)
+    mean = np.mean(facePointsNp, axis=0)
+    std = np.std(facePointsNp, axis=0)
+
+    write(mean, "{}/mean.csv".format(args.modelDir))
+    write(std, "{}/std.csv".format(args.modelDir))
+
+    # Only import in this mode.
+    import matplotlib as mpl
+    mpl.use('Agg')
+    import matplotlib.pyplot as plt
+
+    fig, ax = plt.subplots()
+    ax.scatter(mean[:,0], -mean[:,1], color='k')
+    ax.axis('equal')
+    for i,p in enumerate(mean):
+        ax.annotate(str(i), (p[0]+0.005, -p[1]+0.005), fontsize=8)
+    plt.savefig("{}/mean.png".format(args.modelDir))
+
+
+def alignMain(args):
+    facenet.helper.mkdirP(args.outputDir)
+
+    imgs = list(iterImgs(args.inputDir))
+
+    # Shuffle so multiple versions can be run at once.
+    random.shuffle(imgs)
+
+    dlibAlign = facenet.alignment.NaiveDlib(args.modelDir,
+                                            args.facePredictorName)
+
+    nFallbacks = 0
+    for imgObject in imgs:
+        outDir = os.path.join(args.outputDir, imgObject.cls)
+        imgName = "{}/{}.png".format(outDir, imgObject.name)
+        facenet.helper.mkdirP(outDir)
+        if not os.path.isfile(imgName):
+            rgb = imgObject.getRGB(cache=False)
+            out = dlibAlign.alignImg(args.method, args.size, rgb)
+            if args.fallbackLfw and out is None:
+                nFallbacks += 1
+                deepFunneled = "{}/{}.jpg".format(os.path.join(args.fallbackLfw,
+                                                               imgObject.cls),
+                                                  imgObject.name)
+                shutil.copy(deepFunneled, "{}/{}.jpg".format(os.path.join(args.outputDir,
+                                                                          imgObject.cls),
+                                                             imgObject.name))
+
+            if out is not None:
+                io.imsave(imgName, out)
+    print('nFallbacks:', nFallbacks)
+
+if __name__=='__main__':
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('inputDir', type=str, help="Input image directory.")
+    parser.add_argument('--modelDir', type=str, help="Directory of dlib's predictor and mean image models.",
+                        default="./models/dlib/")
+    parser.add_argument('--facePredictorName', type=str, help="Name of the face predictor.",
+                        default="shape_predictor_68_face_landmarks.dat")
+    parser.add_argument('--dlibRoot', type=str,
+                        default="/home/bamos/src/dlib-18.15/python_examples",
+                        help="dlib directory with the dlib.so Python library.")
+
+    subparsers = parser.add_subparsers(dest='mode', help="Mode")
+    computeMeanParser = subparsers.add_parser('computeMean', help='Compute the image mean of a directory of images.')
+    computeMeanParser.add_argument('--numImages', type=int, help="The number of images. '0' for all images.",
+                                   default=0) # <= 0 ===> all imgs
+    alignmentParser = subparsers.add_parser('align', help='Align a directory of images.')
+    alignmentParser.add_argument('method', type=str,
+                                      choices=['tightcrop', 'affine',
+                                               'perspective', 'homography'],
+                                 help="Alignment method.")
+    alignmentParser.add_argument('outputDir', type=str, help="Output directory of aligned images.")
+    alignmentParser.add_argument('--outputDebugImages', action='store_true',
+                                 help='Output annotated images for debugging and presenting.')
+    alignmentParser.add_argument('--size', type=int, help="Default image size.",
+                                 default=152)
+    alignmentParser.add_argument('--fallbackLfw', type=str,
+                                 help="If alignment doesn't work, fallback to copying the deep funneled version from this directory..")
+
+    args = parser.parse_args()
+
+    sys.path.append(args.dlibRoot)
+    import facenet
+    import facenet.helper
+    from facenet.data import iterImgs
+    from facenet.alignment import NaiveDlib
+
+    if args.mode == 'computeMean':
+        computeMeanMain(args)
+    else:
+        alignMain(args)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -5,3 +5,5 @@ data

		models/facenet/*.t7
		models/dlib/shape_predictor_68_face_landmarks.dat

		*.pyc