Skip to content

Commit

Permalink
Merge branch 'master' into models/mobilenetv3
Browse files Browse the repository at this point in the history
  • Loading branch information
datumbox authored Jan 3, 2021
2 parents e4d130f + 7b9d30e commit 5d0a664
Show file tree
Hide file tree
Showing 40 changed files with 304 additions and 985 deletions.
2 changes: 1 addition & 1 deletion references/detection/group_by_aspect_ratio.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class GroupedBatchSampler(BatchSampler):
It enforces that the batch only contain elements from the same group.
It also tries to provide mini-batches which follows an ordering which is
as close as possible to the ordering from the original sampler.
Arguments:
Args:
sampler (Sampler): Base sampler.
group_ids (list[int]): If the sampler produces indices in range [0, N),
`group_ids` must be a list of `N` ints which contains the group id of each sample.
Expand Down
8 changes: 5 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@ def get_dist(pkgname):
return None


version = '0.9.0a0'
cwd = os.path.dirname(os.path.abspath(__file__))

version_txt = os.path.join(cwd, 'version.txt')
with open(version_txt, 'r') as f:
version = f.readline().strip()
sha = 'Unknown'
package_name = 'torchvision'

cwd = os.path.dirname(os.path.abspath(__file__))

try:
sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
except Exception:
Expand Down
2 changes: 1 addition & 1 deletion torchvision/csrc/io/image/cpu/readjpeg_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ torch::Tensor decodeJPEG(const torch::Tensor& data, ImageReadMode mode) {
*/
default:
jpeg_destroy_decompress(&cinfo);
TORCH_CHECK(false, "Provided mode not supported");
TORCH_CHECK(false, "The provided mode is not supported for JPEG files");
}

jpeg_calc_output_dimensions(&cinfo);
Expand Down
2 changes: 1 addition & 1 deletion torchvision/csrc/io/image/cpu/readpng_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ torch::Tensor decodePNG(const torch::Tensor& data, ImageReadMode mode) {
break;
default:
png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
TORCH_CHECK(false, "Provided mode not supported");
TORCH_CHECK(false, "The provided mode is not supported for PNG files");
}

png_read_update_info(png_ptr, info_ptr);
Expand Down
10 changes: 5 additions & 5 deletions torchvision/csrc/io/image/image_read_mode.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

/* Should be kept in-sync with Python ImageReadMode enum */
using ImageReadMode = int64_t;
#define IMAGE_READ_MODE_UNCHANGED 0
#define IMAGE_READ_MODE_GRAY 1
#define IMAGE_READ_MODE_GRAY_ALPHA 2
#define IMAGE_READ_MODE_RGB 3
#define IMAGE_READ_MODE_RGB_ALPHA 4
const ImageReadMode IMAGE_READ_MODE_UNCHANGED = 0;
const ImageReadMode IMAGE_READ_MODE_GRAY = 1;
const ImageReadMode IMAGE_READ_MODE_GRAY_ALPHA = 2;
const ImageReadMode IMAGE_READ_MODE_RGB = 3;
const ImageReadMode IMAGE_READ_MODE_RGB_ALPHA = 4;
4 changes: 2 additions & 2 deletions torchvision/datasets/samplers/clip_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ class UniformClipSampler(Sampler):
When number of unique clips in the video is fewer than num_video_clips_per_video,
repeat the clips until `num_video_clips_per_video` clips are collected
Arguments:
Args:
video_clips (VideoClips): video clips to sample from
num_clips_per_video (int): number of clips to be sampled per video
"""
Expand Down Expand Up @@ -151,7 +151,7 @@ class RandomClipSampler(Sampler):
"""
Samples at most `max_video_clips_per_video` clips for each video randomly
Arguments:
Args:
video_clips (VideoClips): video clips to sample from
max_clips_per_video (int): maximum number of clips to be sampled per video
"""
Expand Down
6 changes: 3 additions & 3 deletions torchvision/datasets/video_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ class VideoClips(object):
Recreating the clips for different clip lengths is fast, and can be done
with the `compute_clips` method.
Arguments:
Args:
video_paths (List[str]): paths to the video files
clip_length_in_frames (int): size of a clip in number of frames
frames_between_clips (int): step (in frames) between each clip
Expand Down Expand Up @@ -227,7 +227,7 @@ def compute_clips(self, num_frames, step, frame_rate=None):
Always returns clips of size `num_frames`, meaning that the
last few frames in a video can potentially be dropped.
Arguments:
Args:
num_frames (int): number of frames for the clip
step (int): distance between two clips
"""
Expand Down Expand Up @@ -285,7 +285,7 @@ def get_clip(self, idx):
"""
Gets a subclip from a list of videos.
Arguments:
Args:
idx (int): index of the subclip. Must be between 0 and num_clips().
Returns:
Expand Down
49 changes: 27 additions & 22 deletions torchvision/io/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@


class ImageReadMode(Enum):
"""
Support for various modes while reading images.
Use `ImageReadMode.UNCHANGED` for loading the image as-is,
`ImageReadMode.GRAY` for converting to grayscale,
`ImageReadMode.GRAY_ALPHA` for grayscale with transparency,
`ImageReadMode.RGB` for RGB and `ImageReadMode.RGB_ALPHA` for
RGB with transparency.
"""
UNCHANGED = 0
GRAY = 1
GRAY_ALPHA = 2
Expand All @@ -62,7 +71,7 @@ def read_file(path: str) -> torch.Tensor:
Reads and outputs the bytes contents of a file as a uint8 Tensor
with one dimension.
Arguments:
Args:
path (str): the path to the file to be read
Returns:
Expand All @@ -77,7 +86,7 @@ def write_file(filename: str, data: torch.Tensor) -> None:
Writes the contents of a uint8 tensor with one dimension to a
file.
Arguments:
Args:
filename (str): the path to the file to be written
data (Tensor): the contents to be written to the output file
"""
Expand All @@ -90,15 +99,13 @@ def decode_png(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGE
Optionally converts the image to the desired format.
The values of the output tensor are uint8 between 0 and 255.
Arguments:
Args:
input (Tensor[1]): a one dimensional uint8 tensor containing
the raw bytes of the PNG image.
mode (ImageReadMode): the read mode used for optionally
converting the image. Use `ImageReadMode.UNCHANGED` for loading
the image as-is, `ImageReadMode.GRAY` for converting to grayscale,
`ImageReadMode.GRAY_ALPHA` for grayscale with transparency,
`ImageReadMode.RGB` for RGB and `ImageReadMode.RGB_ALPHA` for
RGB with transparency. Default: `ImageReadMode.UNCHANGED`
converting the image. Default: `ImageReadMode.UNCHANGED`.
See `ImageReadMode` class for more information on various
available modes.
Returns:
output (Tensor[image_channels, image_height, image_width])
Expand Down Expand Up @@ -155,13 +162,13 @@ def decode_jpeg(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANG
Optionally converts the image to the desired format.
The values of the output tensor are uint8 between 0 and 255.
Arguments:
Args:
input (Tensor[1]): a one dimensional uint8 tensor containing
the raw bytes of the JPEG image.
mode (ImageReadMode): the read mode used for optionally
converting the image. Use `ImageReadMode.UNCHANGED` for loading
the image as-is, `ImageReadMode.GRAY` for converting to grayscale
and `ImageReadMode.RGB` for RGB. Default: `ImageReadMode.UNCHANGED`
converting the image. Default: `ImageReadMode.UNCHANGED`.
See `ImageReadMode` class for more information on various
available modes.
Returns:
output (Tensor[image_channels, image_height, image_width])
Expand Down Expand Up @@ -229,11 +236,10 @@ def decode_image(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHAN
a one dimensional uint8 tensor containing the raw bytes of the
PNG or JPEG image.
mode: ImageReadMode
the read mode used for optionally converting the image. JPEG
and PNG images have different permitted values. The default
value is `ImageReadMode.UNCHANGED` and it keeps the image as-is.
See `decode_jpeg()` and `decode_png()` for more information.
Default: `ImageReadMode.UNCHANGED`
the read mode used for optionally converting the image.
Default: `ImageReadMode.UNCHANGED`.
See `ImageReadMode` class for more information on various
available modes.
Returns
-------
Expand All @@ -254,11 +260,10 @@ def read_image(path: str, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torc
path: str
path of the JPEG or PNG image.
mode: ImageReadMode
the read mode used for optionally converting the image. JPEG
and PNG images have different permitted values. The default
value is `ImageReadMode.UNCHANGED` and it keeps the image as-is.
See `decode_jpeg()` and `decode_png()` for more information.
Default: `ImageReadMode.UNCHANGED`
the read mode used for optionally converting the image.
Default: `ImageReadMode.UNCHANGED`.
See `ImageReadMode` class for more information on various
available modes.
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion torchvision/models/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class IntermediateLayerGetter(nn.ModuleDict):
assigned to the model. So if `model` is passed, `model.feature1` can
be returned, but not `model.feature1.layer2`.
Arguments:
Args:
model (nn.Module): model on which we will extract the features
return_layers (Dict[name, new_name]): a dict containing the names
of the modules for which the activations will be returned as
Expand Down
14 changes: 7 additions & 7 deletions torchvision/models/detection/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class BalancedPositiveNegativeSampler(object):
def __init__(self, batch_size_per_image, positive_fraction):
# type: (int, float) -> None
"""
Arguments:
Args:
batch_size_per_image (int): number of elements to be selected per image
positive_fraction (float): percentace of positive elements per batch
"""
Expand All @@ -25,7 +25,7 @@ def __init__(self, batch_size_per_image, positive_fraction):
def __call__(self, matched_idxs):
# type: (List[Tensor]) -> Tuple[List[Tensor], List[Tensor]]
"""
Arguments:
Args:
matched idxs: list of tensors containing -1, 0 or positive values.
Each tensor corresponds to a specific image.
-1 values are ignored, 0 are considered as negatives and > 0 as
Expand Down Expand Up @@ -83,7 +83,7 @@ def encode_boxes(reference_boxes, proposals, weights):
Encode a set of proposals with respect to some
reference boxes
Arguments:
Args:
reference_boxes (Tensor): reference boxes
proposals (Tensor): boxes to be encoded
"""
Expand Down Expand Up @@ -133,7 +133,7 @@ class BoxCoder(object):
def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
# type: (Tuple[float, float, float, float], float) -> None
"""
Arguments:
Args:
weights (4-element tuple)
bbox_xform_clip (float)
"""
Expand All @@ -153,7 +153,7 @@ def encode_single(self, reference_boxes, proposals):
Encode a set of proposals with respect to some
reference boxes
Arguments:
Args:
reference_boxes (Tensor): reference boxes
proposals (Tensor): boxes to be encoded
"""
Expand Down Expand Up @@ -183,7 +183,7 @@ def decode_single(self, rel_codes, boxes):
From a set of original boxes and encoded relative box offsets,
get the decoded boxes.
Arguments:
Args:
rel_codes (Tensor): encoded boxes
boxes (Tensor): reference boxes.
"""
Expand Down Expand Up @@ -361,7 +361,7 @@ def overwrite_eps(model, eps):
only when the pretrained weights are loaded to maintain compatibility
with previous versions.
Arguments:
Args:
model (nn.Module): The model on which we perform the overwrite.
eps (float): The new value of eps.
"""
Expand Down
2 changes: 1 addition & 1 deletion torchvision/models/detection/anchor_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class AnchorGenerator(nn.Module):
and AnchorGenerator will output a set of sizes[i] * aspect_ratios[i] anchors
per spatial location for feature map i.
Arguments:
Args:
sizes (Tuple[Tuple[int]]):
aspect_ratios (Tuple[Tuple[float]]):
"""
Expand Down
4 changes: 2 additions & 2 deletions torchvision/models/detection/backbone_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class BackboneWithFPN(nn.Module):
Internally, it uses torchvision.models._utils.IntermediateLayerGetter to
extract a submodel that returns the feature maps specified in return_layers.
The same limitations of IntermediatLayerGetter apply here.
Arguments:
Args:
backbone (nn.Module)
return_layers (Dict[name, new_name]): a dict containing the names
of the modules for which the activations will be returned as
Expand Down Expand Up @@ -73,7 +73,7 @@ def resnet_fpn_backbone(
>>> ('3', torch.Size([1, 256, 2, 2])),
>>> ('pool', torch.Size([1, 256, 1, 1]))]
Arguments:
Args:
backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50',
'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2'
norm_layer (torchvision.ops): it is recommended to use the default value. For details visit:
Expand Down
8 changes: 4 additions & 4 deletions torchvision/models/detection/faster_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class FasterRCNN(GeneralizedRCNN):
- labels (Int64Tensor[N]): the predicted labels for each image
- scores (Tensor[N]): the scores or each prediction
Arguments:
Args:
backbone (nn.Module): the network used to compute the features for the model.
It should contain a out_channels attribute, which indicates the number of output
channels that each feature map has (and it should be the same for all feature maps).
Expand Down Expand Up @@ -239,7 +239,7 @@ class TwoMLPHead(nn.Module):
"""
Standard heads for FPN-based models
Arguments:
Args:
in_channels (int): number of input channels
representation_size (int): size of the intermediate representation
"""
Expand All @@ -264,7 +264,7 @@ class FastRCNNPredictor(nn.Module):
Standard classification + bounding box regression layers
for Fast R-CNN.
Arguments:
Args:
in_channels (int): number of input channels
num_classes (int): number of output classes (including background)
"""
Expand Down Expand Up @@ -341,7 +341,7 @@ def fasterrcnn_resnet50_fpn(pretrained=False, progress=True,
>>> # optionally, if you want to export the model to ONNX:
>>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)
Arguments:
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
progress (bool): If True, displays a progress bar of the download to stderr
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
Expand Down
4 changes: 2 additions & 2 deletions torchvision/models/detection/generalized_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class GeneralizedRCNN(nn.Module):
"""
Main class for Generalized R-CNN.
Arguments:
Args:
backbone (nn.Module):
rpn (nn.Module):
roi_heads (nn.Module): takes the features + the proposals from the RPN and computes
Expand Down Expand Up @@ -43,7 +43,7 @@ def eager_outputs(self, losses, detections):
def forward(self, images, targets=None):
# type: (List[Tensor], Optional[List[Dict[str, Tensor]]]) -> Tuple[Dict[str, Tensor], List[Dict[str, Tensor]]]
"""
Arguments:
Args:
images (list[Tensor]): images to be processed
targets (list[Dict[Tensor]]): ground-truth boxes present in the image (optional)
Expand Down
2 changes: 1 addition & 1 deletion torchvision/models/detection/image_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class ImageList(object):

def __init__(self, tensors: Tensor, image_sizes: List[Tuple[int, int]]):
"""
Arguments:
Args:
tensors (tensor)
image_sizes (list[tuple[int, int]])
"""
Expand Down
4 changes: 2 additions & 2 deletions torchvision/models/detection/keypoint_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class KeypointRCNN(FasterRCNN):
- scores (Tensor[N]): the scores or each prediction
- keypoints (FloatTensor[N, K, 3]): the locations of the predicted keypoints, in [x, y, v] format.
Arguments:
Args:
backbone (nn.Module): the network used to compute the features for the model.
It should contain a out_channels attribute, which indicates the number of output
channels that each feature map has (and it should be the same for all feature maps).
Expand Down Expand Up @@ -309,7 +309,7 @@ def keypointrcnn_resnet50_fpn(pretrained=False, progress=True,
>>> # optionally, if you want to export the model to ONNX:
>>> torch.onnx.export(model, x, "keypoint_rcnn.onnx", opset_version = 11)
Arguments:
Args:
pretrained (bool): If True, returns a model pre-trained on COCO train2017
progress (bool): If True, displays a progress bar of the download to stderr
pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
Expand Down
Loading

0 comments on commit 5d0a664

Please sign in to comment.