Commit bf361560 by Ting PAN

Bump to 0.1.1

1 parent c8535116
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.1.1 (20190409)
Dragon Minimum Required (Version 0.3.0.0)
Changes:
Preview Features:
- Add RandomCrop/RandomPad for ScaleJittering.
- Add ResNet18/ResNet34/AirNet for R-CNN and RetinaNet.
- Use C++ Implemented Decoder for RetinaNet instead.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.1.0 (20190314)
Dragon Minimum Required (Version 0.3.0.0)
......@@ -13,4 +33,4 @@ Preview Features:
Bugs fixed:
- None
- None
\ No newline at end of file
......@@ -67,9 +67,11 @@ python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
| :------: | :------: |
| [VGG16.SSD](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.SSD.pth)| SSD |
| [VGG16.RCNN](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.RCNN.pth)| R-CNN |
| [R-18.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-18.Affine.pth)| R-CNN, RetinaNet |
| [R-34.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-34.Affine.pth)| R-CNN, RetinaNet |
| [R-50.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-50.Affine.pth)| R-CNN, RetinaNet |
| [R-101.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-101.Affine.pth)| R-CNN, RetinaNet |
| [AirNet.SSD](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/AirNet.SSD.pth)| SSD |
| [AirNet.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/AirNet.Affine.pth)| R-CNN, RetinaNet, SSD |
## References
......
......@@ -33,14 +33,14 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb'
DATABASE: '/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH: 2
USE_DIFF: False # Do not use crowd objects
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
TEST:
DATABASE: 'taas:/data/coco_2014_minival_lmdb'
DATABASE: '/data/coco_2014_minival_lmdb'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
......
......@@ -33,14 +33,14 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb'
DATABASE: '/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH: 2
USE_DIFF: False # Do not use crowd objects
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
TEST:
DATABASE: 'taas:/data/coco_2014_minival_lmdb'
DATABASE: '/data/coco_2014_minival_lmdb'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000
......
......@@ -24,13 +24,13 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
DATABASE: 'taas:/data/voc_0712_trainval_lmdb'
DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 2
BATCH_SIZE: 128
SCALES: [600]
MAX_SIZE: 1000
TEST:
DATABASE: 'taas:/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000
SCALES: [600]
......
......@@ -29,14 +29,14 @@ FRCNN:
MLP_HEAD_DIM: 4096
TRAIN:
WEIGHTS: '/data/models/imagenet/VGG16.RCNN.pth'
DATABASE: 'taas:/data/voc_0712_trainval_lmdb'
DATABASE: '/data/voc_0712_trainval_lmdb'
RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2
BATCH_SIZE: 128
SCALES: [600]
MAX_SIZE: 1000
TEST:
DATABASE: 'taas:/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300
......
......@@ -33,12 +33,12 @@ FPN:
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb'
DATABASE: '/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH: 8
SCALES: [400]
MAX_SIZE: 666
TEST:
DATABASE: 'taas:/data/coco_2014_minival_lmdb'
DATABASE: '/data/coco_2014_minival_lmdb'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
......
......@@ -37,15 +37,15 @@ DROPBLOCK:
DECREMENT: 0.000005 # * 20000 = 0.1
TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb'
DATABASE: '/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH: 8
SCALES: [400]
MAX_SIZE: 666
SCALE_JITTERING: True
COLOR_JITTERING: True
SCALE_RANGE: [0.8, 1.2]
SCALE_RANGE: [0.75, 1.33]
TEST:
DATABASE: 'taas:/data/coco_2014_minival_lmdb'
DATABASE: '/data/coco_2014_minival_lmdb'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
......
NUM_GPUS: 1
VIS: False
VIS_ON_FILE: False
MODEL:
TYPE: retinanet
BACKBONE: airnet.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [40000, 50000, 60000]
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0]
SCALE_JITTERING: True
COLOR_JITTERING: True
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1
VIS: False
VIS_ON_FILE: False
MODEL:
TYPE: retinanet
BACKBONE: resnet18.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [40000, 50000, 60000]
WARM_UP_ITERS: 2000
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-18.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0]
SCALE_JITTERING: True
COLOR_JITTERING: True
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1
VIS: False
VIS_ON_FILE: False
MODEL:
TYPE: retinanet
BACKBONE: resnet34.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [40000, 50000, 60000]
WARM_UP_ITERS: 2000
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-34.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0]
SCALE_JITTERING: True
COLOR_JITTERING: True
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45
\ No newline at end of file
......@@ -29,11 +29,11 @@ SSD:
STRIDES: [8, 16, 32]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN:
WEIGHTS: '/data/models/imagenet/AirNet.SSD.pth'
DATABASE: 'taas:/data/voc_0712_trainval_lmdb'
WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32
TEST:
DATABASE: 'taas:/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8
NMS_TOP_K: 400
......
......@@ -33,10 +33,10 @@ SSD:
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN:
WEIGHTS: '/data/models/imagenet/VGG16.SSD.pth'
DATABASE: 'taas:/data/voc_0712_trainval_lmdb'
DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32
TEST:
DATABASE: 'taas:/data/voc_2007_test_lmdb'
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8
NMS_TOP_K: 400
......
......@@ -13,6 +13,10 @@
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path as osp
import numpy as np
......@@ -104,9 +108,6 @@ __C.TRAIN.RPN_MIN_SIZE = 0
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
__C.TRAIN.RPN_STRADDLE_THRESH = 0
# Resume from the last checkpoint?
__C.TRAIN.RESUME = False
###########################################
# #
......@@ -184,6 +185,7 @@ __C.TEST.DETECTIONS_PER_IM = 100
# #
###########################################
__C.MODEL = edict()
# The type of the model
......@@ -211,11 +213,6 @@ __C.MODEL.CLASSES = ['__background__']
# Add StopGrad at a specified stage so the bottom layers are frozen
__C.MODEL.FREEZE_AT = 2
# Whether to use bias prior to improve the one-stage detector?
# Enabled if model type in ('ssd',)
# Retinanet is force to use bias prior
__C.MODEL.USE_BIAS_PRIOR = False
# Whether to use focal loss for one-stage detectors?
# Enabled if model type in ('ssd',)
# Retinanet is force to use focal loss
......@@ -234,6 +231,7 @@ __C.MODEL.COARSEST_STRIDE = -1
# #
###########################################
__C.RPN = edict()
# Strides for multiple rpn heads
......@@ -252,6 +250,7 @@ __C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
# #
###########################################
__C.RETINANET = edict()
# Anchor aspect ratios to use
......@@ -269,7 +268,7 @@ __C.RETINANET.ANCHOR_SCALE = 4
__C.RETINANET.NUM_CONVS = 4
# During inference, #locs to select based on cls score before NMS is performed
__C.RETINANET.PRE_NMS_TOP_N = 1000
__C.RETINANET.PRE_NMS_TOP_N = 5000
# IoU overlap ratio for labeling an anchor as positive
# Anchors with >= iou overlap are labeled positive
......@@ -279,9 +278,6 @@ __C.RETINANET.POSITIVE_OVERLAP = 0.5
# Anchors with < iou overlap are labeled negative
__C.RETINANET.NEGATIVE_OVERLAP = 0.4
# Whether softmax should be used in classification branch training
__C.RETINANET.SOFTMAX = False
###########################################
# #
......@@ -336,6 +332,7 @@ __C.FRCNN.ROI_XFORM_RESOLUTION = 7
# #
###########################################
__C.MRCNN = edict()
# Resolution of mask predictions
......@@ -354,6 +351,7 @@ __C.MRCNN.ROI_XFORM_RESOLUTION = 14
# #
###########################################
__C.SSD = edict()
# Whether to enable FPN enhancement?
......@@ -412,6 +410,7 @@ __C.SSD.SAMPLERS = [
# #
###########################################
__C.RESNET = edict()
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
......@@ -427,6 +426,7 @@ __C.RESNET.GROUP_WIDTH = 64
# #
###########################################
__C.DROPBLOCK = edict()
# Whether to use drop block for more regulization
......@@ -442,6 +442,7 @@ __C.DROPBLOCK.DECREMENT = 1e-6
# #
###########################################
__C.SOLVER = edict()
# Base learning rate for the specified schedule
......@@ -502,6 +503,7 @@ __C.SOLVER.SNAPSHOT_PREFIX = ''
# #
###########################################
# Number of GPUs to use (applies to both training and testing)
__C.NUM_GPUS = 1
......@@ -523,14 +525,6 @@ __C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
# Clip bounding box transformation predictions to prevent np.exp from
# overflowing
# Heuristic choice based on that would scale a 16 pixel anchor up to 1000 pixels
__C.BBOX_XFORM_CLIP = np.log(1000. / 16.)
# Clip ?
__C.USE_XFORM_CLIP = False
# Prior prob for the positives at the beginning of training.
# This is used to set the bias init for the logits layer
__C.PRIOR_PROB = 0.01
......
......@@ -13,6 +13,7 @@
#
# ------------------------------------------------------------
import os
from lib.datasets.taas import TaaS
......@@ -26,12 +27,12 @@ def get_imdb(name):
if len(keys) >= 2:
cls, source = keys[0], ':'.join(keys[1:])
if cls not in _GLOBAL_DATA_SETS:
raise KeyError('Unknown dataset: {}'.format(cls))
raise KeyError('Unknown DataSet: {}'.format(cls))
return _GLOBAL_DATA_SETS[cls](source)
elif len(keys) == 1:
return _GLOBAL_DATA_SETS[name]()
elif os.path.exists(name):
return _GLOBAL_DATA_SETS['taas'](name)
else:
raise ValueError('Illegal format of image database: {}'.format(name))
raise ValueError('Illegal Database: {}' + name)
def list_imdbs():
......
......@@ -45,7 +45,13 @@ class DataTransformer(Process):
self.Q_in = self.Q1_out = self.Q2_out = None
self.daemon = True
def make_roidb(self, ann_datum, im_scale, flip=False, offsets=None):
def make_record(
self,
ann_datum,
im_scale,
flip=False,
offsets=None,
):
annotations = ann_datum.annotation
n_objects = 0
if not self._use_diff:
......@@ -53,35 +59,43 @@ class DataTransformer(Process):
if not ann.difficult: n_objects += 1
else: n_objects = len(annotations)
roidb = {
record = {
'width': ann_datum.datum.width,
'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32),
'boxes': np.zeros((n_objects, 4), dtype=np.float32),
}
ix = 0
# Filter the difficult instances
instance_idx = 0
for ann in annotations:
if not self._use_diff and ann.difficult: continue
roidb['boxes'][ix, :] = [
max(0, ann.x1), max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1)]
roidb['gt_classes'][ix] = self._class_to_ind[ann.name]
ix += 1
if flip: roidb['boxes'] = _flip_boxes(roidb['boxes'], roidb['width'])
roidb['boxes'] *= im_scale
record['boxes'][instance_idx, :] = [
max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1),
]
record['gt_classes'][instance_idx] = self._class_to_ind[ann.name]
instance_idx += 1
# Flip the boxes if necessary
if flip:
record['boxes'] = _flip_boxes(
record['boxes'], record['width'])
# Scale the boxes to the detecting scale
record['boxes'] *= im_scale
# Apply the offsets from scale jitter
if offsets is not None:
roidb['boxes'][:, 0::2] += offsets[0]
roidb['boxes'][:, 1::2] += offsets[1]
roidb['boxes'][:, :] = np.minimum(
np.maximum(roidb['boxes'][:, :], 0),
record['boxes'][:, 0::2] += offsets[0]
record['boxes'][:, 1::2] += offsets[1]
record['boxes'][:, :] = np.minimum(
np.maximum(record['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2)
return roidb
return record
@classmethod
def get_image(cls, serialized):
......@@ -121,7 +135,14 @@ class DataTransformer(Process):
target_size = cfg.TRAIN.SCALES[scale_indices]
im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
# Crop or Pad
# Flip
flip = False
if self._use_flipped:
if npr.randint(0, 2) > 0:
im = im[:, ::-1, :]
flip = True
# Random Crop or RandomPad
offsets = None
if cfg.TRAIN.MAX_SIZE > 0:
if jitter != 1.0:
......@@ -132,20 +153,13 @@ class DataTransformer(Process):
# To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im)
# Flip
flip = False
if self._use_flipped:
if npr.randint(0, 2) > 0:
im = im[:, ::-1, :]
flip = True
# Datum -> RoIDB
roidb = self.make_roidb(datum, im_scale, flip, offsets)
# Datum -> Record
rec = self.make_record(datum, im_scale, flip, offsets)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(roidb['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, 0:4], gt_boxes[:, 4] = roidb['boxes'], roidb['gt_classes']
gt_boxes = np.empty((len(rec['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, 0:4], gt_boxes[:, 4] = rec['boxes'], rec['gt_classes']
return im, im_scale, gt_boxes
......@@ -175,16 +189,16 @@ def _flip_boxes(boxes, width):
def _get_image_with_target_size(target_size, im):
im_shape = list(im.shape)
width_diff = target_size[1] - im_shape[1]
offset_crop_width = max(-width_diff // 2, 0)
offset_pad_width = max(width_diff // 2, 0)
offset_crop_width = np.random.randint(0, max(-width_diff, 0) + 1)
offset_pad_width = np.random.randint(0, max(width_diff, 0) + 1)
height_diff = target_size[0] - im_shape[0]
offset_crop_height = max(-height_diff // 2, 0)
offset_pad_height = max(height_diff // 2, 0)
offset_crop_height = np.random.randint(0, max(-height_diff, 0) + 1)
offset_pad_height = np.random.randint(0, max(height_diff, 0) + 1)
im_shape[0 : 2] = target_size
new_im = np.empty(im_shape, dtype=im.dtype)
new_im.fill(127)
new_im[:] = cfg.PIXEL_MEANS
new_im[offset_pad_height:offset_pad_height + im.shape[0],
offset_pad_width:offset_pad_width + im.shape[1]] = \
......
......@@ -11,8 +11,11 @@
# Import custom modules
from lib.modeling.base import Bootstarp
from lib.modeling.base import RPNDecoder
from lib.modeling.base import RetinaNetDecoder
from lib.modeling.base import conv1x1, conv3x3, bn, affine
from lib.modeling.fpn import FPN
from lib.modeling.rpn import RPN
from lib.modeling.fast_rcnn import FastRCNN
from lib.modeling.retinanet import RetinaNet
from lib.modeling.ssd import SSD
\ No newline at end of file
from lib.modeling.ssd import SSD
......@@ -15,16 +15,16 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.modeling.base import conv1x1, conv3x3, bn
from lib.modeling import conv1x1, conv3x3, bn, affine
class WideResBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = bn(dim_out, eps=1e-3)
self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = bn(dim_out, eps=1e-3)
self.bn2 = affine(dim_out)
self.downsample = downsample
self.relu = torch.nn.ReLU(inplace=True)
......@@ -50,15 +50,15 @@ class InceptionBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__()
self.conv1 = conv1x1(dim_in, dim_out)
self.bn1 = bn(dim_out, eps=1e-3)
self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out // 2)
self.bn2 = bn(dim_out // 2, eps=1e-3)
self.bn2 = affine(dim_out // 2)
self.conv3a = conv3x3(dim_out // 2, dim_out)
self.bn3a = bn(dim_out, eps=1e-3)
self.bn3a = affine(dim_out)
self.conv3b = conv3x3(dim_out, dim_out)
self.bn3b = bn(dim_out, eps=1e-3)
self.bn3b = affine(dim_out)
self.conv4 = conv3x3(dim_out * 3, dim_out)
self.bn4 = bn(dim_out, eps=1e-3)
self.bn4 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True)
def forward(self, x):
......@@ -93,7 +93,8 @@ class AirNet(torch.nn.Module):
def __init__(self, blocks, num_stages):
super(AirNet, self).__init__()
self.dim_in, filters = 64, [64, 128, 256, 384]
self.feature_dims = filters[1:num_stages - 1]
self.feature_dims = [None, None] + \
filters[1:num_stages - 1]
self.conv1 = torch.nn.Conv2d(
3, 64,
kernel_size=7,
......@@ -101,7 +102,7 @@ class AirNet(torch.nn.Module):
padding=3,
bias=False,
)
self.bn1 = bn(self.dim_in, eps=1e-3)
self.bn1 = affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d(
kernel_size=2,
......@@ -128,7 +129,7 @@ class AirNet(torch.nn.Module):
def make_blocks(self, dim_out, blocks, stride=1):
downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride),
bn(dim_out, eps=1e-3),
affine(dim_out),
)
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out
......@@ -148,7 +149,7 @@ class AirNet(torch.nn.Module):
x = self.maxpool(x)
x = self.layer1(x)
outputs = [self.layer2(x)]
outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
......@@ -164,7 +165,7 @@ def airnet(num_stages):
)
return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5)
\ No newline at end of file
......@@ -44,20 +44,21 @@ class Bootstarp(torch.nn.Module):
return self.run(inputs, outputs)
class ProposalCXX(torch.nn.Module):
"""Extended operator to generate proposal regions."""
class RPNDecoder(torch.nn.Module):
"""Generate proposal regions from RPN."""
def __init__(self):
super(ProposalCXX, self).__init__()
super(RPNDecoder, self).__init__()
self.register_op()
self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \
cfg.FPN.ROI_MIN_LEVEL + 1) \
if len(cfg.RPN.STRIDES) > 1 else 1
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RCNN',
'strides': cfg.RPN.STRIDES,
'ratios': [float(e) for e in cfg.RPN.ASPECT_RATIOS],
'scales': [float(e) for e in cfg.RPN.SCALES],
......@@ -79,6 +80,38 @@ class ProposalCXX(torch.nn.Module):
return outputs if isinstance(outputs, list) else [outputs]
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
outputs = [self.register_output()]
return self.run(inputs, outputs)
def conv1x1(dim_in, dim_out, stride=1, bias=False):
"""1x1 convolution."""
return torch.nn.Conv2d(
......
......@@ -22,9 +22,12 @@ from lib.utils.logger import is_root
from lib.modeling.factory import get_body_func
from lib.modeling import (
Bootstarp, FPN, RPN,
Bootstarp,
FPN,
RPN,
FastRCNN,
RetinaNet, SSD,
RetinaNet,
SSD,
)
......@@ -144,6 +147,7 @@ class Detector(torch.nn.Module):
# 3.3 Feature -> SSD
if hasattr(self, 'ssd'):
features = list(filter(None, features))
outputs.update(
self.ssd(
features=features,
......
......@@ -55,7 +55,7 @@ for D in [16, 19]:
'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
# AirNet
for D in ['3b', '4b', '5b']:
for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D)
......
......@@ -17,7 +17,7 @@ import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling.base import ProposalCXX
from lib.modeling import RPNDecoder
class FastRCNN(torch.nn.Module):
......@@ -43,7 +43,7 @@ class FastRCNN(torch.nn.Module):
self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.proposal_cxx = ProposalCXX()
self.rpn_decoder = RPNDecoder()
self.proposal_layer = ProposalLayer()
self.proposal_target_layer = ProposalTargetLayer()
self.softmax = torch.nn.Softmax(dim=1)
......@@ -80,7 +80,7 @@ class FastRCNN(torch.nn.Module):
# Generate Proposals
# Apply the CXX implementation during inference
proposal_func = self.proposal_layer \
if self.training else self.proposal_cxx
if self.training else self.rpn_decoder
self.rcnn_data = {
'rois': proposal_func(
kwargs['features'],
......
......@@ -16,7 +16,7 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling.base import conv1x1, conv3x3
from lib.modeling import conv1x1, conv3x3
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
......
......@@ -20,12 +20,10 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling.base import conv1x1, conv3x3, affine
from lib.modeling import conv1x1, conv3x3, affine
class BasicBlock(torch.nn.Module):
expansion = 1
def __init__(self, dim_in, dim_out, stride=1,
downsample=None, dropblock=None):
super(BasicBlock, self).__init__()
......@@ -110,9 +108,9 @@ class Bottleneck(torch.nn.Module):
class ResNet(torch.nn.Module):
def __init__(self, block, layers):
def __init__(self, block, layers, filters):
super(ResNet, self).__init__()
self.dim_in, filters = 64, [256, 512, 1024, 2048]
self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d(
3, 64,
......@@ -200,9 +198,13 @@ def resnet(depth):
elif depth == 269: units = [3, 30, 48, 8]
else: raise ValueError('Unsupported depth: %d' % depth)
block = Bottleneck if depth >= 50 else BasicBlock
return ResNet(block, units)
filters = [64, 256, 512, 1024, 2048] \
if depth >= 50 else [64, 64, 128, 256, 512]
return ResNet(block, units, filters)
def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152)
\ No newline at end of file
......@@ -18,8 +18,8 @@ import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling.base import conv3x3
from lib.retinanet import AnchorTargetLayer, ProposalLayer
from lib.modeling import conv3x3, RetinaNetDecoder
from lib.retinanet import AnchorTargetLayer
class RetinaNet(torch.nn.Module):
......@@ -37,32 +37,23 @@ class RetinaNet(torch.nn.Module):
conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS))
# Packed as [C, A] not [A, C]
self.C = cfg.MODEL.NUM_CLASSES \
if cfg.RETINANET.SOFTMAX \
else cfg.MODEL.NUM_CLASSES - 1
self.C = cfg.MODEL.NUM_CLASSES - 1
A = len(cfg.RETINANET.ASPECT_RATIOS) * \
cfg.RETINANET.SCALES_PER_OCTAVE
self.cls_score = conv3x3(dim_in, self.C * A, bias=True)
self.bbox_pred = conv3x3(dim_in, 4 * A, bias=True)
self.cls_prob = torch.nn.Softmax(dim=1, inplace=True) \
if cfg.RETINANET.SOFTMAX else torch.nn.Sigmoid(inplace=True)
self.cls_prob = torch.nn.Sigmoid(inplace=True)
self.relu = torch.nn.ELU(inplace=True)
self.proposal_layer = ProposalLayer()
self.decoder = RetinaNetDecoder()
########################################
# RetinaNet losses #
########################################
self.anchor_target_layer = AnchorTargetLayer()
if cfg.RETINANET.SOFTMAX:
self.cls_loss = torch.nn.SoftmaxFocalLoss(
ignore_index=-1,
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
else:
self.cls_loss = torch.nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
self.cls_loss = torch.nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
self.bbox_loss = torch.nn.SmoothL1Loss(beta=1. / 9.)
self.reset_parameters()
......@@ -77,15 +68,8 @@ class RetinaNet(torch.nn.Module):
# Bias prior initialization for Focal Loss
# For details, See the official codes:
# https://github.com/facebookresearch/Detectron
if cfg.RETINANET.SOFTMAX:
bias = self.cls_score.bias.numpy()
bias = bias.reshape((cfg.MODEL.NUM_CLASSES, -1))
bias[0, :] = math.log(
(cfg.MODEL.NUM_CLASSES - 1) *
(1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB)
else:
self.cls_score.bias.fill_(
-math.log((1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB))
self.cls_score.bias.fill_(
-math.log((1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB))
def compute_outputs(self, features):
"""Compute the RetinaNet logits.
......@@ -114,9 +98,12 @@ class RetinaNet(torch.nn.Module):
return cls_score_wide[0], bbox_pred_wide[0]
def compute_losses(
self, features,
cls_score, bbox_pred,
gt_boxes, ims_info,
self,
features,
cls_score,
bbox_pred,
gt_boxes,
ims_info,
):
"""Compute the RetinaNet classification loss and regression loss.
......@@ -173,9 +160,10 @@ class RetinaNet(torch.nn.Module):
)
else:
outputs['detections'] = \
self.proposal_layer(
self.decoder(
kwargs['features'],
self.cls_prob(cls_score),
self.cls_prob(cls_score)
.permute(0, 2, 1),
bbox_pred,
kwargs['ims_info'],
)
......
......@@ -17,7 +17,7 @@ import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling.base import conv1x1, conv3x3
from lib.modeling import conv1x1, conv3x3
class RPN(torch.nn.Module):
......@@ -59,7 +59,6 @@ class RPN(torch.nn.Module):
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.normal_(m.weight, std=0.01)
torch.nn.init.constant_(m.bias, 0)
def compute_outputs(self, features):
"""Compute the RPN logits.
......@@ -91,9 +90,12 @@ class RPN(torch.nn.Module):
return cls_score_wide[0], bbox_pred_wide[0]
def compute_losses(
self, features,
cls_score, bbox_pred,
gt_boxes, ims_info,
self,
features,
cls_score,
bbox_pred,
gt_boxes,
ims_info,
):
"""Compute the RPN classification loss and regression loss.
......
......@@ -17,11 +17,13 @@ import dragon.vm.torch as torch
from collections import OrderedDict
from lib.core.config import cfg
from lib.modeling.base import conv3x3
from lib.modeling import conv3x3
from lib.ssd import (
PriorBoxLayer, MultiBoxMatchLayer,
HardMiningLayer, MultiBoxTargetLayer,
PriorBoxLayer,
MultiBoxMatchLayer,
HardMiningLayer,
MultiBoxTargetLayer,
)
......@@ -38,6 +40,8 @@ class SSD(torch.nn.Module):
self.softmax = torch.nn.Softmax(dim=2)
C = cfg.MODEL.NUM_CLASSES
feature_dims = list(filter(None, feature_dims))
for i, dim_in in enumerate(feature_dims):
A = len(cfg.SSD.MULTIBOX.ASPECT_RATIOS[i]) + 1
self.cls_score.append(conv3x3(dim_in, A * C, bias=True))
......@@ -89,8 +93,12 @@ class SSD(torch.nn.Module):
torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4)
def compute_losses(
self, prior_boxes, gt_boxes,
cls_score, bbox_pred, cls_prob,
self,
prior_boxes,
gt_boxes,
cls_score,
bbox_pred,
cls_prob,
):
"""Compute the SSD classification loss and regression loss.
......
......@@ -16,7 +16,7 @@ from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling.base import conv1x1, conv3x3
from lib.modeling import conv1x1, conv3x3
class VGG(torch.nn.Module):
......
......@@ -141,7 +141,7 @@ def test_net(net, server):
keep = soft_nms(cls_dets, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA)
else: keep = nms(cls_dets, cfg.TEST.NMS)
else: keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :]
all_boxes[j][i] = cls_dets
boxes_this_image.append(cls_dets)
......
......@@ -43,7 +43,7 @@ class Expander(object):
w_off = int(math.floor(npr.uniform(0.0, expand_w - im_w)))
new_im = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_im.fill(127)
new_im[:] = cfg.PIXEL_MEANS
new_im[h_off: h_off + im_h, w_off: w_off + im_w, :] = im
if gt_boxes is not None:
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!