Merge pull request #11198 from JiayiFeng/dev_resize_short

Add image_resize_short and refine resize API
PaddlePaddle · Jun 6, 2018 · 9dc3ed4 · 9dc3ed4
2 parents df87e63 + f3db005
commit 9dc3ed4
Showing 1 changed file with 75 additions and 15 deletions.
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
@@ -81,6 +81,8 @@
     'label_smooth',
     'roi_pool',
     'dice_loss',
+    'image_resize',
+    'image_resize_short',
     'resize_bilinear',
     'gather',
     'random_crop',
@@ -3929,22 +3931,25 @@ def dice_loss(input, label, epsilon=0.00001):
     return reduce_mean(dice_score)
 
 
-def resize_bilinear(input, out_shape=None, scale=None, name=None):
+def image_resize(input,
+                 out_shape=None,
+                 scale=None,
+                 name=None,
+                 resample='BILINEAR'):
     """
-    The mathematical meaning of resize bilinear layer is
-    Bilinear interpolation.
-    Bilinear interpolation is an extension of linear interpolation for
-    interpolating functions of two variables (e.g. H-direction and
-    W-direction in this layer) on a rectilinear 2D grid.
+    Resize a batch of images.
 
-    For details, please refer to Wikipedia:
-    https://en.wikipedia.org/wiki/Bilinear_interpolation
+    The input must be a tensor of the shape (num_batches, channels, in_h, in_w), 
+    and the resizing only applies on the last two dimensions(hight and width).
+
+    Supporting resample methods:
+        'BILINEAR' : Bilinear interpolation
 
     Args:
-        input (Variable): The input tensor of resize bilinear layer,
+        input (Variable): The input tensor of image resize layer,
                           This is a 4-D tensor of the shape
                           (num_batches, channels, in_h, in_w).
-        out_shape(list|tuple|Variable|None): Output shape of resize bilinear
+        out_shape(list|tuple|Variable|None): Output shape of image resize
                                     layer, the shape is (out_h, out_w).
                                     Default: None
         scale(float|None): The multiplier for the input height or width.
@@ -3953,6 +3958,8 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
                          Default: None
         name(str|None): A name for this layer(optional). If set None, the layer
                         will be named automatically.
+        resample(str): The resample method. It can only be 'BILINEAR' currently.
+                       Default: 'BILINEAR'
 
     Returns:
         out (Variable): The output is a 4-D tensor of the shape
@@ -3961,8 +3968,12 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
     Examples:
         .. code-block:: python
 
-            out = fluid.layers.resize_bilinear(input, out_shape=[12, 12])
+            out = fluid.layers.image_resize(input, out_shape=[12, 12])
     """
+    resample_methods = {'BILINEAR': 'bilinear_interp'}
+    if resample not in resample_methods:
+        raise ValueError(
+            "The 'resample' of image_resize can only be 'BILINEAR' currently.")
     if out_shape is None and scale is None:
         raise ValueError("One of out_shape and scale must not be None")
     helper = LayerHelper('bilinear_interp', **locals())
@@ -3990,31 +4001,80 @@ def _is_list_or_turple_(data):
 
     out = helper.create_tmp_variable(dtype)
     helper.append_op(
-        type="bilinear_interp",
+        type=resample_methods[resample],
         inputs=inputs,
         outputs={"Out": out},
         attrs={"out_h": out_h,
                "out_w": out_w})
     return out
 
 
+def resize_bilinear(input, out_shape=None, scale=None, name=None):
+    """
+    This is an alias of layer 'image_resize' with bilinear interpolation.
+
+    The mathematical meaning of resize bilinear layer is
+    Bilinear interpolation.
+    Bilinear interpolation is an extension of linear interpolation for
+    interpolating functions of two variables (e.g. H-direction and
+    W-direction in this layer) on a rectilinear 2D grid.
+
+    For details, please refer to Wikipedia:
+    https://en.wikipedia.org/wiki/Bilinear_interpolation
+    """
+
+    return image_resize(input, out_shape, scale, name, 'BILINEAR')
+
+
+def image_resize_short(input, out_short_len, resample='BILINEAR'):
+    """
+    Resize a batch of images. The short edge of input images will be 
+    resized to the given 'out_short_len'. The long edge of input images 
+    will be resized proportionately to make images' length-width ratio 
+    constant.
+
+    Args:
+        input (Variable): The input tensor of image resize layer,
+                          This is a 4-D tensor of the shape
+                          (num_batches, channels, in_h, in_w).
+        out_short_len(int): The length of output images' short edge.
+
+    Returns:
+        out (Variable): The output is a 4-D tensor of the shape
+                        (num_batches, channls, out_h, out_w).
+    """
+    in_shape = input.shape
+    if len(in_shape) != 4:
+        raise ValueError(
+            "The rank of input must be 4 (num_batches, channels, in_h, in_w).")
+    hw = in_shape[2:4]
+    short_idx = hw.index(min(hw))
+    long_idx = 1 - short_idx
+    out_shape = list(hw)
+    out_shape[short_idx] = out_short_len
+    out_shape[long_idx] = int(
+        float(out_shape[long_idx]) * (float(out_short_len) / float(hw[
+            short_idx])) + 0.5)
+    return image_resize(input=input, out_shape=out_shape, resample=resample)
+
+
 def gather(input, index):
     """
     Output is obtained by gathering entries of the outer-most dimension 
     of X indexed by `index` and concatenate them together.
 
     .. math::
 
-	Out = X[Index]
+        Out = X[Index]
 
 
     .. code-block:: text
 
 
                 Given:
 
-    		X = [[1, 2],
-         	     [3, 4],
+                X = [[1, 2],
+                     [3, 4],
                      [5, 6]]
 
                 Index = [1, 2]