Commit bf361560 by Ting PAN

Bump to 0.1.1

1 parent c8535116
------------------------------------------------------------------------ ------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet. The list of most significant changes made over time in SeetaDet.
SeetaDet 0.1.1 (20190409)
Dragon Minimum Required (Version 0.3.0.0)
Changes:
Preview Features:
- Add RandomCrop/RandomPad for ScaleJittering.
- Add ResNet18/ResNet34/AirNet for R-CNN and RetinaNet.
- Use C++ Implemented Decoder for RetinaNet instead.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.1.0 (20190314) SeetaDet 0.1.0 (20190314)
Dragon Minimum Required (Version 0.3.0.0) Dragon Minimum Required (Version 0.3.0.0)
......
...@@ -67,9 +67,11 @@ python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION> ...@@ -67,9 +67,11 @@ python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
| :------: | :------: | | :------: | :------: |
| [VGG16.SSD](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.SSD.pth)| SSD | | [VGG16.SSD](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.SSD.pth)| SSD |
| [VGG16.RCNN](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.RCNN.pth)| R-CNN | | [VGG16.RCNN](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.RCNN.pth)| R-CNN |
| [R-18.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-18.Affine.pth)| R-CNN, RetinaNet |
| [R-34.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-34.Affine.pth)| R-CNN, RetinaNet |
| [R-50.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-50.Affine.pth)| R-CNN, RetinaNet | | [R-50.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-50.Affine.pth)| R-CNN, RetinaNet |
| [R-101.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-101.Affine.pth)| R-CNN, RetinaNet | | [R-101.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-101.Affine.pth)| R-CNN, RetinaNet |
| [AirNet.SSD](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/AirNet.SSD.pth)| SSD | | [AirNet.Affine](http://dragon.seetatech.com/download/models/SeetaDet/imagenet/AirNet.Affine.pth)| R-CNN, RetinaNet, SSD |
## References ## References
......
...@@ -33,14 +33,14 @@ FRCNN: ...@@ -33,14 +33,14 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-101.Affine.pth' WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb' DATABASE: '/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
TEST: TEST:
DATABASE: 'taas:/data/coco_2014_minival_lmdb' DATABASE: '/data/coco_2014_minival_lmdb'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
......
...@@ -33,14 +33,14 @@ FRCNN: ...@@ -33,14 +33,14 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-101.Affine.pth' WEIGHTS: '/data/models/imagenet/R-101.Affine.pth'
DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb' DATABASE: '/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
BATCH_SIZE: 512 BATCH_SIZE: 512
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
TEST: TEST:
DATABASE: 'taas:/data/coco_2014_minival_lmdb' DATABASE: '/data/coco_2014_minival_lmdb'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
......
...@@ -24,13 +24,13 @@ FRCNN: ...@@ -24,13 +24,13 @@ FRCNN:
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth' WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
DATABASE: 'taas:/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
TEST: TEST:
DATABASE: 'taas:/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N: 1000 RPN_POST_NMS_TOP_N: 1000
SCALES: [600] SCALES: [600]
......
...@@ -29,14 +29,14 @@ FRCNN: ...@@ -29,14 +29,14 @@ FRCNN:
MLP_HEAD_DIM: 4096 MLP_HEAD_DIM: 4096
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/VGG16.RCNN.pth' WEIGHTS: '/data/models/imagenet/VGG16.RCNN.pth'
DATABASE: 'taas:/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval_lmdb'
RPN_MIN_SIZE: 16 RPN_MIN_SIZE: 16
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
BATCH_SIZE: 128 BATCH_SIZE: 128
SCALES: [600] SCALES: [600]
MAX_SIZE: 1000 MAX_SIZE: 1000
TEST: TEST:
DATABASE: 'taas:/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE: 16 RPN_MIN_SIZE: 16
RPN_POST_NMS_TOP_N: 300 RPN_POST_NMS_TOP_N: 300
......
...@@ -33,12 +33,12 @@ FPN: ...@@ -33,12 +33,12 @@ FPN:
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth' WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb' DATABASE: '/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
SCALES: [400] SCALES: [400]
MAX_SIZE: 666 MAX_SIZE: 666
TEST: TEST:
DATABASE: 'taas:/data/coco_2014_minival_lmdb' DATABASE: '/data/coco_2014_minival_lmdb'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
......
...@@ -37,15 +37,15 @@ DROPBLOCK: ...@@ -37,15 +37,15 @@ DROPBLOCK:
DECREMENT: 0.000005 # * 20000 = 0.1 DECREMENT: 0.000005 # * 20000 = 0.1
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/R-50.Affine.pth' WEIGHTS: '/data/models/imagenet/R-50.Affine.pth'
DATABASE: 'taas:/data/coco_2014_trainval35k_lmdb' DATABASE: '/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
SCALES: [400] SCALES: [400]
MAX_SIZE: 666 MAX_SIZE: 666
SCALE_JITTERING: True SCALE_JITTERING: True
COLOR_JITTERING: True COLOR_JITTERING: True
SCALE_RANGE: [0.8, 1.2] SCALE_RANGE: [0.75, 1.33]
TEST: TEST:
DATABASE: 'taas:/data/coco_2014_minival_lmdb' DATABASE: '/data/coco_2014_minival_lmdb'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
......
NUM_GPUS: 1
VIS: False
VIS_ON_FILE: False
MODEL:
TYPE: retinanet
BACKBONE: airnet.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [40000, 50000, 60000]
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0]
SCALE_JITTERING: True
COLOR_JITTERING: True
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1
VIS: False
VIS_ON_FILE: False
MODEL:
TYPE: retinanet
BACKBONE: resnet18.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [40000, 50000, 60000]
WARM_UP_ITERS: 2000
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-18.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0]
SCALE_JITTERING: True
COLOR_JITTERING: True
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45
\ No newline at end of file
NUM_GPUS: 1
VIS: False
VIS_ON_FILE: False
MODEL:
TYPE: retinanet
BACKBONE: resnet34.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [40000, 50000, 60000]
WARM_UP_ITERS: 2000
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 7
TRAIN:
WEIGHTS: '/data/models/imagenet/R-34.Affine.pth'
DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0]
SCALE_JITTERING: True
COLOR_JITTERING: True
TEST:
DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 1
SCALES: [300]
MAX_SIZE: 500
NMS: 0.45
\ No newline at end of file
...@@ -29,11 +29,11 @@ SSD: ...@@ -29,11 +29,11 @@ SSD:
STRIDES: [8, 16, 32] STRIDES: [8, 16, 32]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]] ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/AirNet.SSD.pth' WEIGHTS: '/data/models/imagenet/AirNet.Affine.pth'
DATABASE: 'taas:/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
TEST: TEST:
DATABASE: 'taas:/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
NMS_TOP_K: 400 NMS_TOP_K: 400
......
...@@ -33,10 +33,10 @@ SSD: ...@@ -33,10 +33,10 @@ SSD:
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]] [1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN: TRAIN:
WEIGHTS: '/data/models/imagenet/VGG16.SSD.pth' WEIGHTS: '/data/models/imagenet/VGG16.SSD.pth'
DATABASE: 'taas:/data/voc_0712_trainval_lmdb' DATABASE: '/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH: 32 IMS_PER_BATCH: 32
TEST: TEST:
DATABASE: 'taas:/data/voc_2007_test_lmdb' DATABASE: '/data/voc_2007_test_lmdb'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco' PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
NMS_TOP_K: 400 NMS_TOP_K: 400
......
...@@ -13,6 +13,10 @@ ...@@ -13,6 +13,10 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path as osp import os.path as osp
import numpy as np import numpy as np
...@@ -104,9 +108,6 @@ __C.TRAIN.RPN_MIN_SIZE = 0 ...@@ -104,9 +108,6 @@ __C.TRAIN.RPN_MIN_SIZE = 0
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors # Set to -1 or a large value, e.g. 100000, to disable pruning anchors
__C.TRAIN.RPN_STRADDLE_THRESH = 0 __C.TRAIN.RPN_STRADDLE_THRESH = 0
# Resume from the last checkpoint?
__C.TRAIN.RESUME = False
########################################### ###########################################
# # # #
...@@ -184,6 +185,7 @@ __C.TEST.DETECTIONS_PER_IM = 100 ...@@ -184,6 +185,7 @@ __C.TEST.DETECTIONS_PER_IM = 100
# # # #
########################################### ###########################################
__C.MODEL = edict() __C.MODEL = edict()
# The type of the model # The type of the model
...@@ -211,11 +213,6 @@ __C.MODEL.CLASSES = ['__background__'] ...@@ -211,11 +213,6 @@ __C.MODEL.CLASSES = ['__background__']
# Add StopGrad at a specified stage so the bottom layers are frozen # Add StopGrad at a specified stage so the bottom layers are frozen
__C.MODEL.FREEZE_AT = 2 __C.MODEL.FREEZE_AT = 2
# Whether to use bias prior to improve the one-stage detector?
# Enabled if model type in ('ssd',)
# Retinanet is force to use bias prior
__C.MODEL.USE_BIAS_PRIOR = False
# Whether to use focal loss for one-stage detectors? # Whether to use focal loss for one-stage detectors?
# Enabled if model type in ('ssd',) # Enabled if model type in ('ssd',)
# Retinanet is force to use focal loss # Retinanet is force to use focal loss
...@@ -234,6 +231,7 @@ __C.MODEL.COARSEST_STRIDE = -1 ...@@ -234,6 +231,7 @@ __C.MODEL.COARSEST_STRIDE = -1
# # # #
########################################### ###########################################
__C.RPN = edict() __C.RPN = edict()
# Strides for multiple rpn heads # Strides for multiple rpn heads
...@@ -252,6 +250,7 @@ __C.RPN.ASPECT_RATIOS = [0.5, 1, 2] ...@@ -252,6 +250,7 @@ __C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
# # # #
########################################### ###########################################
__C.RETINANET = edict() __C.RETINANET = edict()
# Anchor aspect ratios to use # Anchor aspect ratios to use
...@@ -269,7 +268,7 @@ __C.RETINANET.ANCHOR_SCALE = 4 ...@@ -269,7 +268,7 @@ __C.RETINANET.ANCHOR_SCALE = 4
__C.RETINANET.NUM_CONVS = 4 __C.RETINANET.NUM_CONVS = 4
# During inference, #locs to select based on cls score before NMS is performed # During inference, #locs to select based on cls score before NMS is performed
__C.RETINANET.PRE_NMS_TOP_N = 1000 __C.RETINANET.PRE_NMS_TOP_N = 5000
# IoU overlap ratio for labeling an anchor as positive # IoU overlap ratio for labeling an anchor as positive
# Anchors with >= iou overlap are labeled positive # Anchors with >= iou overlap are labeled positive
...@@ -279,9 +278,6 @@ __C.RETINANET.POSITIVE_OVERLAP = 0.5 ...@@ -279,9 +278,6 @@ __C.RETINANET.POSITIVE_OVERLAP = 0.5
# Anchors with < iou overlap are labeled negative # Anchors with < iou overlap are labeled negative
__C.RETINANET.NEGATIVE_OVERLAP = 0.4 __C.RETINANET.NEGATIVE_OVERLAP = 0.4
# Whether softmax should be used in classification branch training
__C.RETINANET.SOFTMAX = False
########################################### ###########################################
# # # #
...@@ -336,6 +332,7 @@ __C.FRCNN.ROI_XFORM_RESOLUTION = 7 ...@@ -336,6 +332,7 @@ __C.FRCNN.ROI_XFORM_RESOLUTION = 7
# # # #
########################################### ###########################################
__C.MRCNN = edict() __C.MRCNN = edict()
# Resolution of mask predictions # Resolution of mask predictions
...@@ -354,6 +351,7 @@ __C.MRCNN.ROI_XFORM_RESOLUTION = 14 ...@@ -354,6 +351,7 @@ __C.MRCNN.ROI_XFORM_RESOLUTION = 14
# # # #
########################################### ###########################################
__C.SSD = edict() __C.SSD = edict()
# Whether to enable FPN enhancement? # Whether to enable FPN enhancement?
...@@ -412,6 +410,7 @@ __C.SSD.SAMPLERS = [ ...@@ -412,6 +410,7 @@ __C.SSD.SAMPLERS = [
# # # #
########################################### ###########################################
__C.RESNET = edict() __C.RESNET = edict()
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt # Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
...@@ -427,6 +426,7 @@ __C.RESNET.GROUP_WIDTH = 64 ...@@ -427,6 +426,7 @@ __C.RESNET.GROUP_WIDTH = 64
# # # #
########################################### ###########################################
__C.DROPBLOCK = edict() __C.DROPBLOCK = edict()
# Whether to use drop block for more regulization # Whether to use drop block for more regulization
...@@ -442,6 +442,7 @@ __C.DROPBLOCK.DECREMENT = 1e-6 ...@@ -442,6 +442,7 @@ __C.DROPBLOCK.DECREMENT = 1e-6
# # # #
########################################### ###########################################
__C.SOLVER = edict() __C.SOLVER = edict()
# Base learning rate for the specified schedule # Base learning rate for the specified schedule
...@@ -502,6 +503,7 @@ __C.SOLVER.SNAPSHOT_PREFIX = '' ...@@ -502,6 +503,7 @@ __C.SOLVER.SNAPSHOT_PREFIX = ''
# # # #
########################################### ###########################################
# Number of GPUs to use (applies to both training and testing) # Number of GPUs to use (applies to both training and testing)
__C.NUM_GPUS = 1 __C.NUM_GPUS = 1
...@@ -523,14 +525,6 @@ __C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.) ...@@ -523,14 +525,6 @@ __C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets # Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0) __C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5.0, 5.0, 10.0)
# Clip bounding box transformation predictions to prevent np.exp from
# overflowing
# Heuristic choice based on that would scale a 16 pixel anchor up to 1000 pixels
__C.BBOX_XFORM_CLIP = np.log(1000. / 16.)
# Clip ?
__C.USE_XFORM_CLIP = False
# Prior prob for the positives at the beginning of training. # Prior prob for the positives at the beginning of training.
# This is used to set the bias init for the logits layer # This is used to set the bias init for the logits layer
__C.PRIOR_PROB = 0.01 __C.PRIOR_PROB = 0.01
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# #
# ------------------------------------------------------------ # ------------------------------------------------------------
import os
from lib.datasets.taas import TaaS from lib.datasets.taas import TaaS
...@@ -26,12 +27,12 @@ def get_imdb(name): ...@@ -26,12 +27,12 @@ def get_imdb(name):
if len(keys) >= 2: if len(keys) >= 2:
cls, source = keys[0], ':'.join(keys[1:]) cls, source = keys[0], ':'.join(keys[1:])
if cls not in _GLOBAL_DATA_SETS: if cls not in _GLOBAL_DATA_SETS:
raise KeyError('Unknown dataset: {}'.format(cls)) raise KeyError('Unknown DataSet: {}'.format(cls))
return _GLOBAL_DATA_SETS[cls](source) return _GLOBAL_DATA_SETS[cls](source)
elif len(keys) == 1: elif os.path.exists(name):
return _GLOBAL_DATA_SETS[name]() return _GLOBAL_DATA_SETS['taas'](name)
else: else:
raise ValueError('Illegal format of image database: {}'.format(name)) raise ValueError('Illegal Database: {}' + name)
def list_imdbs(): def list_imdbs():
......
...@@ -45,7 +45,13 @@ class DataTransformer(Process): ...@@ -45,7 +45,13 @@ class DataTransformer(Process):
self.Q_in = self.Q1_out = self.Q2_out = None self.Q_in = self.Q1_out = self.Q2_out = None
self.daemon = True self.daemon = True
def make_roidb(self, ann_datum, im_scale, flip=False, offsets=None): def make_record(
self,
ann_datum,
im_scale,
flip=False,
offsets=None,
):
annotations = ann_datum.annotation annotations = ann_datum.annotation
n_objects = 0 n_objects = 0
if not self._use_diff: if not self._use_diff:
...@@ -53,35 +59,43 @@ class DataTransformer(Process): ...@@ -53,35 +59,43 @@ class DataTransformer(Process):
if not ann.difficult: n_objects += 1 if not ann.difficult: n_objects += 1
else: n_objects = len(annotations) else: n_objects = len(annotations)
roidb = { record = {
'width': ann_datum.datum.width, 'width': ann_datum.datum.width,
'height': ann_datum.datum.height, 'height': ann_datum.datum.height,
'gt_classes': np.zeros((n_objects,), dtype=np.int32), 'gt_classes': np.zeros((n_objects,), dtype=np.int32),
'boxes': np.zeros((n_objects, 4), dtype=np.float32), 'boxes': np.zeros((n_objects, 4), dtype=np.float32),
} }
ix = 0 # Filter the difficult instances
instance_idx = 0
for ann in annotations: for ann in annotations:
if not self._use_diff and ann.difficult: continue if not self._use_diff and ann.difficult: continue
roidb['boxes'][ix, :] = [ record['boxes'][instance_idx, :] = [
max(0, ann.x1), max(0, ann.y1), max(0, ann.x1),
max(0, ann.y1),
min(ann.x2, ann_datum.datum.width - 1), min(ann.x2, ann_datum.datum.width - 1),
min(ann.y2, ann_datum.datum.height - 1)] min(ann.y2, ann_datum.datum.height - 1),
roidb['gt_classes'][ix] = self._class_to_ind[ann.name] ]
ix += 1 record['gt_classes'][instance_idx] = self._class_to_ind[ann.name]
instance_idx += 1
if flip: roidb['boxes'] = _flip_boxes(roidb['boxes'], roidb['width']) # Flip the boxes if necessary
if flip:
record['boxes'] = _flip_boxes(
record['boxes'], record['width'])
roidb['boxes'] *= im_scale # Scale the boxes to the detecting scale
record['boxes'] *= im_scale
# Apply the offsets from scale jitter
if offsets is not None: if offsets is not None:
roidb['boxes'][:, 0::2] += offsets[0] record['boxes'][:, 0::2] += offsets[0]
roidb['boxes'][:, 1::2] += offsets[1] record['boxes'][:, 1::2] += offsets[1]
roidb['boxes'][:, :] = np.minimum( record['boxes'][:, :] = np.minimum(
np.maximum(roidb['boxes'][:, :], 0), np.maximum(record['boxes'][:, :], 0),
[offsets[2][1] - 1, offsets[2][0] - 1] * 2) [offsets[2][1] - 1, offsets[2][0] - 1] * 2)
return roidb return record
@classmethod @classmethod
def get_image(cls, serialized): def get_image(cls, serialized):
...@@ -121,7 +135,14 @@ class DataTransformer(Process): ...@@ -121,7 +135,14 @@ class DataTransformer(Process):
target_size = cfg.TRAIN.SCALES[scale_indices] target_size = cfg.TRAIN.SCALES[scale_indices]
im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE) im, im_scale, jitter = prep_im_for_blob(im, target_size, cfg.TRAIN.MAX_SIZE)
# Crop or Pad # Flip
flip = False
if self._use_flipped:
if npr.randint(0, 2) > 0:
im = im[:, ::-1, :]
flip = True
# Random Crop or RandomPad
offsets = None offsets = None
if cfg.TRAIN.MAX_SIZE > 0: if cfg.TRAIN.MAX_SIZE > 0:
if jitter != 1.0: if jitter != 1.0:
...@@ -132,20 +153,13 @@ class DataTransformer(Process): ...@@ -132,20 +153,13 @@ class DataTransformer(Process):
# To a square (target_size, target_size) # To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im) im, offsets = _get_image_with_target_size([target_size] * 2, im)
# Flip # Datum -> Record
flip = False rec = self.make_record(datum, im_scale, flip, offsets)
if self._use_flipped:
if npr.randint(0, 2) > 0:
im = im[:, ::-1, :]
flip = True
# Datum -> RoIDB
roidb = self.make_roidb(datum, im_scale, flip, offsets)
# Post-Process for gt boxes # Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}] # Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes = np.empty((len(roidb['gt_classes']), 5), dtype=np.float32) gt_boxes = np.empty((len(rec['gt_classes']), 5), dtype=np.float32)
gt_boxes[:, 0:4], gt_boxes[:, 4] = roidb['boxes'], roidb['gt_classes'] gt_boxes[:, 0:4], gt_boxes[:, 4] = rec['boxes'], rec['gt_classes']
return im, im_scale, gt_boxes return im, im_scale, gt_boxes
...@@ -175,16 +189,16 @@ def _flip_boxes(boxes, width): ...@@ -175,16 +189,16 @@ def _flip_boxes(boxes, width):
def _get_image_with_target_size(target_size, im): def _get_image_with_target_size(target_size, im):
im_shape = list(im.shape) im_shape = list(im.shape)
width_diff = target_size[1] - im_shape[1] width_diff = target_size[1] - im_shape[1]
offset_crop_width = max(-width_diff // 2, 0) offset_crop_width = np.random.randint(0, max(-width_diff, 0) + 1)
offset_pad_width = max(width_diff // 2, 0) offset_pad_width = np.random.randint(0, max(width_diff, 0) + 1)
height_diff = target_size[0] - im_shape[0] height_diff = target_size[0] - im_shape[0]
offset_crop_height = max(-height_diff // 2, 0) offset_crop_height = np.random.randint(0, max(-height_diff, 0) + 1)
offset_pad_height = max(height_diff // 2, 0) offset_pad_height = np.random.randint(0, max(height_diff, 0) + 1)
im_shape[0 : 2] = target_size im_shape[0 : 2] = target_size
new_im = np.empty(im_shape, dtype=im.dtype) new_im = np.empty(im_shape, dtype=im.dtype)
new_im.fill(127) new_im[:] = cfg.PIXEL_MEANS
new_im[offset_pad_height:offset_pad_height + im.shape[0], new_im[offset_pad_height:offset_pad_height + im.shape[0],
offset_pad_width:offset_pad_width + im.shape[1]] = \ offset_pad_width:offset_pad_width + im.shape[1]] = \
......
...@@ -11,6 +11,9 @@ ...@@ -11,6 +11,9 @@
# Import custom modules # Import custom modules
from lib.modeling.base import Bootstarp from lib.modeling.base import Bootstarp
from lib.modeling.base import RPNDecoder
from lib.modeling.base import RetinaNetDecoder
from lib.modeling.base import conv1x1, conv3x3, bn, affine
from lib.modeling.fpn import FPN from lib.modeling.fpn import FPN
from lib.modeling.rpn import RPN from lib.modeling.rpn import RPN
from lib.modeling.fast_rcnn import FastRCNN from lib.modeling.fast_rcnn import FastRCNN
......
...@@ -15,16 +15,16 @@ from __future__ import print_function ...@@ -15,16 +15,16 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.modeling.base import conv1x1, conv3x3, bn from lib.modeling import conv1x1, conv3x3, bn, affine
class WideResBlock(torch.nn.Module): class WideResBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out, stride=1, downsample=None): def __init__(self, dim_in, dim_out, stride=1, downsample=None):
super(WideResBlock, self).__init__() super(WideResBlock, self).__init__()
self.conv1 = conv3x3(dim_in, dim_out, stride) self.conv1 = conv3x3(dim_in, dim_out, stride)
self.bn1 = bn(dim_out, eps=1e-3) self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out) self.conv2 = conv3x3(dim_out, dim_out)
self.bn2 = bn(dim_out, eps=1e-3) self.bn2 = affine(dim_out)
self.downsample = downsample self.downsample = downsample
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
...@@ -50,15 +50,15 @@ class InceptionBlock(torch.nn.Module): ...@@ -50,15 +50,15 @@ class InceptionBlock(torch.nn.Module):
def __init__(self, dim_in, dim_out): def __init__(self, dim_in, dim_out):
super(InceptionBlock, self).__init__() super(InceptionBlock, self).__init__()
self.conv1 = conv1x1(dim_in, dim_out) self.conv1 = conv1x1(dim_in, dim_out)
self.bn1 = bn(dim_out, eps=1e-3) self.bn1 = affine(dim_out)
self.conv2 = conv3x3(dim_out, dim_out // 2) self.conv2 = conv3x3(dim_out, dim_out // 2)
self.bn2 = bn(dim_out // 2, eps=1e-3) self.bn2 = affine(dim_out // 2)
self.conv3a = conv3x3(dim_out // 2, dim_out) self.conv3a = conv3x3(dim_out // 2, dim_out)
self.bn3a = bn(dim_out, eps=1e-3) self.bn3a = affine(dim_out)
self.conv3b = conv3x3(dim_out, dim_out) self.conv3b = conv3x3(dim_out, dim_out)
self.bn3b = bn(dim_out, eps=1e-3) self.bn3b = affine(dim_out)
self.conv4 = conv3x3(dim_out * 3, dim_out) self.conv4 = conv3x3(dim_out * 3, dim_out)
self.bn4 = bn(dim_out, eps=1e-3) self.bn4 = affine(dim_out)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
def forward(self, x): def forward(self, x):
...@@ -93,7 +93,8 @@ class AirNet(torch.nn.Module): ...@@ -93,7 +93,8 @@ class AirNet(torch.nn.Module):
def __init__(self, blocks, num_stages): def __init__(self, blocks, num_stages):
super(AirNet, self).__init__() super(AirNet, self).__init__()
self.dim_in, filters = 64, [64, 128, 256, 384] self.dim_in, filters = 64, [64, 128, 256, 384]
self.feature_dims = filters[1:num_stages - 1] self.feature_dims = [None, None] + \
filters[1:num_stages - 1]
self.conv1 = torch.nn.Conv2d( self.conv1 = torch.nn.Conv2d(
3, 64, 3, 64,
kernel_size=7, kernel_size=7,
...@@ -101,7 +102,7 @@ class AirNet(torch.nn.Module): ...@@ -101,7 +102,7 @@ class AirNet(torch.nn.Module):
padding=3, padding=3,
bias=False, bias=False,
) )
self.bn1 = bn(self.dim_in, eps=1e-3) self.bn1 = affine(self.dim_in)
self.relu = torch.nn.ReLU(inplace=True) self.relu = torch.nn.ReLU(inplace=True)
self.maxpool = torch.nn.MaxPool2d( self.maxpool = torch.nn.MaxPool2d(
kernel_size=2, kernel_size=2,
...@@ -128,7 +129,7 @@ class AirNet(torch.nn.Module): ...@@ -128,7 +129,7 @@ class AirNet(torch.nn.Module):
def make_blocks(self, dim_out, blocks, stride=1): def make_blocks(self, dim_out, blocks, stride=1):
downsample = torch.nn.Sequential( downsample = torch.nn.Sequential(
conv1x1(self.dim_in, dim_out, stride=stride), conv1x1(self.dim_in, dim_out, stride=stride),
bn(dim_out, eps=1e-3), affine(dim_out),
) )
layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)] layers = [WideResBlock(self.dim_in, dim_out, stride, downsample)]
self.dim_in = dim_out self.dim_in = dim_out
...@@ -148,7 +149,7 @@ class AirNet(torch.nn.Module): ...@@ -148,7 +149,7 @@ class AirNet(torch.nn.Module):
x = self.maxpool(x) x = self.maxpool(x)
x = self.layer1(x) x = self.layer1(x)
outputs = [self.layer2(x)] outputs = [None, None, self.layer2(x)]
if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])] if hasattr(self, 'layer3'): outputs += [self.layer3(outputs[-1])]
if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])] if hasattr(self, 'layer4'): outputs += [self.layer4(outputs[-1])]
...@@ -164,7 +165,7 @@ def airnet(num_stages): ...@@ -164,7 +165,7 @@ def airnet(num_stages):
) )
return AirNet(blocks, num_stages) return AirNet(blocks, num_stages)
def make_airnet_(): return airnet(5)
def make_airnet_3b(): return airnet(3) def make_airnet_3b(): return airnet(3)
def make_airnet_4b(): return airnet(4) def make_airnet_4b(): return airnet(4)
def make_airnet_5b(): return airnet(5) def make_airnet_5b(): return airnet(5)
\ No newline at end of file
...@@ -44,11 +44,11 @@ class Bootstarp(torch.nn.Module): ...@@ -44,11 +44,11 @@ class Bootstarp(torch.nn.Module):
return self.run(inputs, outputs) return self.run(inputs, outputs)
class ProposalCXX(torch.nn.Module): class RPNDecoder(torch.nn.Module):
"""Extended operator to generate proposal regions.""" """Generate proposal regions from RPN."""
def __init__(self): def __init__(self):
super(ProposalCXX, self).__init__() super(RPNDecoder, self).__init__()
self.register_op() self.register_op()
self.K = (cfg.FPN.ROI_MAX_LEVEL - self.K = (cfg.FPN.ROI_MAX_LEVEL -
cfg.FPN.ROI_MIN_LEVEL + 1) \ cfg.FPN.ROI_MIN_LEVEL + 1) \
...@@ -58,6 +58,7 @@ class ProposalCXX(torch.nn.Module): ...@@ -58,6 +58,7 @@ class ProposalCXX(torch.nn.Module):
self.op_meta = { self.op_meta = {
'op_type': 'Proposal', 'op_type': 'Proposal',
'arguments': { 'arguments': {
'det_type': 'RCNN',
'strides': cfg.RPN.STRIDES, 'strides': cfg.RPN.STRIDES,
'ratios': [float(e) for e in cfg.RPN.ASPECT_RATIOS], 'ratios': [float(e) for e in cfg.RPN.ASPECT_RATIOS],
'scales': [float(e) for e in cfg.RPN.SCALES], 'scales': [float(e) for e in cfg.RPN.SCALES],
...@@ -79,6 +80,38 @@ class ProposalCXX(torch.nn.Module): ...@@ -79,6 +80,38 @@ class ProposalCXX(torch.nn.Module):
return outputs if isinstance(outputs, list) else [outputs] return outputs if isinstance(outputs, list) else [outputs]
class RetinaNetDecoder(torch.nn.Module):
"""Generate proposal regions from retinanet."""
def __init__(self):
super(RetinaNetDecoder, self).__init__()
k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
self.strides = [int(2. ** lvl) for lvl in range(k_min, k_max + 1)]
self.scales = [cfg.RETINANET.ANCHOR_SCALE *
(2 ** (octave / float(scales_per_octave)))
for octave in range(scales_per_octave)]
self.register_op()
def register_op(self):
self.op_meta = {
'op_type': 'Proposal',
'arguments': {
'det_type': 'RETINANET',
'strides': self.strides,
'scales': self.scales,
'ratios': [float(e) for e in cfg.RETINANET.ASPECT_RATIOS],
'pre_nms_top_n': cfg.RETINANET.PRE_NMS_TOP_N,
'score_thresh': cfg.TEST.SCORE_THRESH,
}
}
def forward(self, features, cls_prob, bbox_pred, ims_info):
inputs = features + [cls_prob, bbox_pred, to_tensor(ims_info)]
outputs = [self.register_output()]
return self.run(inputs, outputs)
def conv1x1(dim_in, dim_out, stride=1, bias=False): def conv1x1(dim_in, dim_out, stride=1, bias=False):
"""1x1 convolution.""" """1x1 convolution."""
return torch.nn.Conv2d( return torch.nn.Conv2d(
......
...@@ -22,9 +22,12 @@ from lib.utils.logger import is_root ...@@ -22,9 +22,12 @@ from lib.utils.logger import is_root
from lib.modeling.factory import get_body_func from lib.modeling.factory import get_body_func
from lib.modeling import ( from lib.modeling import (
Bootstarp, FPN, RPN, Bootstarp,
FPN,
RPN,
FastRCNN, FastRCNN,
RetinaNet, SSD, RetinaNet,
SSD,
) )
...@@ -144,6 +147,7 @@ class Detector(torch.nn.Module): ...@@ -144,6 +147,7 @@ class Detector(torch.nn.Module):
# 3.3 Feature -> SSD # 3.3 Feature -> SSD
if hasattr(self, 'ssd'): if hasattr(self, 'ssd'):
features = list(filter(None, features))
outputs.update( outputs.update(
self.ssd( self.ssd(
features=features, features=features,
......
...@@ -55,7 +55,7 @@ for D in [16, 19]: ...@@ -55,7 +55,7 @@ for D in [16, 19]:
'lib.modeling.vgg.make_vgg_{}{}'.format(D, T) 'lib.modeling.vgg.make_vgg_{}{}'.format(D, T)
# AirNet # AirNet
for D in ['3b', '4b', '5b']: for D in ['', '3b', '4b', '5b']:
_STORE['BODY']['airnet{}'.format(D)] = \ _STORE['BODY']['airnet{}'.format(D)] = \
'lib.modeling.airnet.make_airnet_{}'.format(D) 'lib.modeling.airnet.make_airnet_{}'.format(D)
......
...@@ -17,7 +17,7 @@ import dragon.vm.torch as torch ...@@ -17,7 +17,7 @@ import dragon.vm.torch as torch
from collections import OrderedDict from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.base import ProposalCXX from lib.modeling import RPNDecoder
class FastRCNN(torch.nn.Module): class FastRCNN(torch.nn.Module):
...@@ -43,7 +43,7 @@ class FastRCNN(torch.nn.Module): ...@@ -43,7 +43,7 @@ class FastRCNN(torch.nn.Module):
self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM) self.fc7 = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES) self.cls_score = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES)
self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4) self.bbox_pred = torch.nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.MODEL.NUM_CLASSES * 4)
self.proposal_cxx = ProposalCXX() self.rpn_decoder = RPNDecoder()
self.proposal_layer = ProposalLayer() self.proposal_layer = ProposalLayer()
self.proposal_target_layer = ProposalTargetLayer() self.proposal_target_layer = ProposalTargetLayer()
self.softmax = torch.nn.Softmax(dim=1) self.softmax = torch.nn.Softmax(dim=1)
...@@ -80,7 +80,7 @@ class FastRCNN(torch.nn.Module): ...@@ -80,7 +80,7 @@ class FastRCNN(torch.nn.Module):
# Generate Proposals # Generate Proposals
# Apply the CXX implementation during inference # Apply the CXX implementation during inference
proposal_func = self.proposal_layer \ proposal_func = self.proposal_layer \
if self.training else self.proposal_cxx if self.training else self.rpn_decoder
self.rcnn_data = { self.rcnn_data = {
'rois': proposal_func( 'rois': proposal_func(
kwargs['features'], kwargs['features'],
......
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.base import conv1x1, conv3x3 from lib.modeling import conv1x1, conv3x3
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
......
...@@ -20,12 +20,10 @@ from __future__ import print_function ...@@ -20,12 +20,10 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.base import conv1x1, conv3x3, affine from lib.modeling import conv1x1, conv3x3, affine
class BasicBlock(torch.nn.Module): class BasicBlock(torch.nn.Module):
expansion = 1
def __init__(self, dim_in, dim_out, stride=1, def __init__(self, dim_in, dim_out, stride=1,
downsample=None, dropblock=None): downsample=None, dropblock=None):
super(BasicBlock, self).__init__() super(BasicBlock, self).__init__()
...@@ -110,9 +108,9 @@ class Bottleneck(torch.nn.Module): ...@@ -110,9 +108,9 @@ class Bottleneck(torch.nn.Module):
class ResNet(torch.nn.Module): class ResNet(torch.nn.Module):
def __init__(self, block, layers): def __init__(self, block, layers, filters):
super(ResNet, self).__init__() super(ResNet, self).__init__()
self.dim_in, filters = 64, [256, 512, 1024, 2048] self.dim_in, filters = filters[0], filters[1:]
self.feature_dims = [self.dim_in] + filters self.feature_dims = [self.dim_in] + filters
self.conv1 = torch.nn.Conv2d( self.conv1 = torch.nn.Conv2d(
3, 64, 3, 64,
...@@ -200,9 +198,13 @@ def resnet(depth): ...@@ -200,9 +198,13 @@ def resnet(depth):
elif depth == 269: units = [3, 30, 48, 8] elif depth == 269: units = [3, 30, 48, 8]
else: raise ValueError('Unsupported depth: %d' % depth) else: raise ValueError('Unsupported depth: %d' % depth)
block = Bottleneck if depth >= 50 else BasicBlock block = Bottleneck if depth >= 50 else BasicBlock
return ResNet(block, units) filters = [64, 256, 512, 1024, 2048] \
if depth >= 50 else [64, 64, 128, 256, 512]
return ResNet(block, units, filters)
def make_resnet_18(): return resnet(18)
def make_resnet_34(): return resnet(34)
def make_resnet_50(): return resnet(50) def make_resnet_50(): return resnet(50)
def make_resnet_101(): return resnet(101) def make_resnet_101(): return resnet(101)
def make_resnet_152(): return resnet(152) def make_resnet_152(): return resnet(152)
\ No newline at end of file
...@@ -18,8 +18,8 @@ import dragon.vm.torch as torch ...@@ -18,8 +18,8 @@ import dragon.vm.torch as torch
from collections import OrderedDict from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.base import conv3x3 from lib.modeling import conv3x3, RetinaNetDecoder
from lib.retinanet import AnchorTargetLayer, ProposalLayer from lib.retinanet import AnchorTargetLayer
class RetinaNet(torch.nn.Module): class RetinaNet(torch.nn.Module):
...@@ -37,29 +37,20 @@ class RetinaNet(torch.nn.Module): ...@@ -37,29 +37,20 @@ class RetinaNet(torch.nn.Module):
conv3x3(dim_in, dim_in, bias=True) conv3x3(dim_in, dim_in, bias=True)
for _ in range(cfg.RETINANET.NUM_CONVS)) for _ in range(cfg.RETINANET.NUM_CONVS))
# Packed as [C, A] not [A, C] # Packed as [C, A] not [A, C]
self.C = cfg.MODEL.NUM_CLASSES \ self.C = cfg.MODEL.NUM_CLASSES - 1
if cfg.RETINANET.SOFTMAX \
else cfg.MODEL.NUM_CLASSES - 1
A = len(cfg.RETINANET.ASPECT_RATIOS) * \ A = len(cfg.RETINANET.ASPECT_RATIOS) * \
cfg.RETINANET.SCALES_PER_OCTAVE cfg.RETINANET.SCALES_PER_OCTAVE
self.cls_score = conv3x3(dim_in, self.C * A, bias=True) self.cls_score = conv3x3(dim_in, self.C * A, bias=True)
self.bbox_pred = conv3x3(dim_in, 4 * A, bias=True) self.bbox_pred = conv3x3(dim_in, 4 * A, bias=True)
self.cls_prob = torch.nn.Softmax(dim=1, inplace=True) \ self.cls_prob = torch.nn.Sigmoid(inplace=True)
if cfg.RETINANET.SOFTMAX else torch.nn.Sigmoid(inplace=True)
self.relu = torch.nn.ELU(inplace=True) self.relu = torch.nn.ELU(inplace=True)
self.proposal_layer = ProposalLayer() self.decoder = RetinaNetDecoder()
######################################## ########################################
# RetinaNet losses # # RetinaNet losses #
######################################## ########################################
self.anchor_target_layer = AnchorTargetLayer() self.anchor_target_layer = AnchorTargetLayer()
if cfg.RETINANET.SOFTMAX:
self.cls_loss = torch.nn.SoftmaxFocalLoss(
ignore_index=-1,
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
else:
self.cls_loss = torch.nn.SigmoidFocalLoss( self.cls_loss = torch.nn.SigmoidFocalLoss(
alpha=cfg.MODEL.FOCAL_LOSS_ALPHA, alpha=cfg.MODEL.FOCAL_LOSS_ALPHA,
gamma=cfg.MODEL.FOCAL_LOSS_GAMMA) gamma=cfg.MODEL.FOCAL_LOSS_GAMMA)
...@@ -77,13 +68,6 @@ class RetinaNet(torch.nn.Module): ...@@ -77,13 +68,6 @@ class RetinaNet(torch.nn.Module):
# Bias prior initialization for Focal Loss # Bias prior initialization for Focal Loss
# For details, See the official codes: # For details, See the official codes:
# https://github.com/facebookresearch/Detectron # https://github.com/facebookresearch/Detectron
if cfg.RETINANET.SOFTMAX:
bias = self.cls_score.bias.numpy()
bias = bias.reshape((cfg.MODEL.NUM_CLASSES, -1))
bias[0, :] = math.log(
(cfg.MODEL.NUM_CLASSES - 1) *
(1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB)
else:
self.cls_score.bias.fill_( self.cls_score.bias.fill_(
-math.log((1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB)) -math.log((1 - cfg.PRIOR_PROB) / cfg.PRIOR_PROB))
...@@ -114,9 +98,12 @@ class RetinaNet(torch.nn.Module): ...@@ -114,9 +98,12 @@ class RetinaNet(torch.nn.Module):
return cls_score_wide[0], bbox_pred_wide[0] return cls_score_wide[0], bbox_pred_wide[0]
def compute_losses( def compute_losses(
self, features, self,
cls_score, bbox_pred, features,
gt_boxes, ims_info, cls_score,
bbox_pred,
gt_boxes,
ims_info,
): ):
"""Compute the RetinaNet classification loss and regression loss. """Compute the RetinaNet classification loss and regression loss.
...@@ -173,9 +160,10 @@ class RetinaNet(torch.nn.Module): ...@@ -173,9 +160,10 @@ class RetinaNet(torch.nn.Module):
) )
else: else:
outputs['detections'] = \ outputs['detections'] = \
self.proposal_layer( self.decoder(
kwargs['features'], kwargs['features'],
self.cls_prob(cls_score), self.cls_prob(cls_score)
.permute(0, 2, 1),
bbox_pred, bbox_pred,
kwargs['ims_info'], kwargs['ims_info'],
) )
......
...@@ -17,7 +17,7 @@ import dragon.vm.torch as torch ...@@ -17,7 +17,7 @@ import dragon.vm.torch as torch
from collections import OrderedDict from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.base import conv1x1, conv3x3 from lib.modeling import conv1x1, conv3x3
class RPN(torch.nn.Module): class RPN(torch.nn.Module):
...@@ -59,7 +59,6 @@ class RPN(torch.nn.Module): ...@@ -59,7 +59,6 @@ class RPN(torch.nn.Module):
for m in self.modules(): for m in self.modules():
if isinstance(m, torch.nn.Conv2d): if isinstance(m, torch.nn.Conv2d):
torch.nn.init.normal_(m.weight, std=0.01) torch.nn.init.normal_(m.weight, std=0.01)
torch.nn.init.constant_(m.bias, 0)
def compute_outputs(self, features): def compute_outputs(self, features):
"""Compute the RPN logits. """Compute the RPN logits.
...@@ -91,9 +90,12 @@ class RPN(torch.nn.Module): ...@@ -91,9 +90,12 @@ class RPN(torch.nn.Module):
return cls_score_wide[0], bbox_pred_wide[0] return cls_score_wide[0], bbox_pred_wide[0]
def compute_losses( def compute_losses(
self, features, self,
cls_score, bbox_pred, features,
gt_boxes, ims_info, cls_score,
bbox_pred,
gt_boxes,
ims_info,
): ):
"""Compute the RPN classification loss and regression loss. """Compute the RPN classification loss and regression loss.
......
...@@ -17,11 +17,13 @@ import dragon.vm.torch as torch ...@@ -17,11 +17,13 @@ import dragon.vm.torch as torch
from collections import OrderedDict from collections import OrderedDict
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.base import conv3x3 from lib.modeling import conv3x3
from lib.ssd import ( from lib.ssd import (
PriorBoxLayer, MultiBoxMatchLayer, PriorBoxLayer,
HardMiningLayer, MultiBoxTargetLayer, MultiBoxMatchLayer,
HardMiningLayer,
MultiBoxTargetLayer,
) )
...@@ -38,6 +40,8 @@ class SSD(torch.nn.Module): ...@@ -38,6 +40,8 @@ class SSD(torch.nn.Module):
self.softmax = torch.nn.Softmax(dim=2) self.softmax = torch.nn.Softmax(dim=2)
C = cfg.MODEL.NUM_CLASSES C = cfg.MODEL.NUM_CLASSES
feature_dims = list(filter(None, feature_dims))
for i, dim_in in enumerate(feature_dims): for i, dim_in in enumerate(feature_dims):
A = len(cfg.SSD.MULTIBOX.ASPECT_RATIOS[i]) + 1 A = len(cfg.SSD.MULTIBOX.ASPECT_RATIOS[i]) + 1
self.cls_score.append(conv3x3(dim_in, A * C, bias=True)) self.cls_score.append(conv3x3(dim_in, A * C, bias=True))
...@@ -89,8 +93,12 @@ class SSD(torch.nn.Module): ...@@ -89,8 +93,12 @@ class SSD(torch.nn.Module):
torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4) torch.cat(bbox_pred_wide, dim=1).view(0, -1, 4)
def compute_losses( def compute_losses(
self, prior_boxes, gt_boxes, self,
cls_score, bbox_pred, cls_prob, prior_boxes,
gt_boxes,
cls_score,
bbox_pred,
cls_prob,
): ):
"""Compute the SSD classification loss and regression loss. """Compute the SSD classification loss and regression loss.
......
...@@ -16,7 +16,7 @@ from __future__ import print_function ...@@ -16,7 +16,7 @@ from __future__ import print_function
import dragon.vm.torch as torch import dragon.vm.torch as torch
from lib.core.config import cfg from lib.core.config import cfg
from lib.modeling.base import conv1x1, conv3x3 from lib.modeling import conv1x1, conv3x3
class VGG(torch.nn.Module): class VGG(torch.nn.Module):
......
...@@ -141,7 +141,7 @@ def test_net(net, server): ...@@ -141,7 +141,7 @@ def test_net(net, server):
keep = soft_nms(cls_dets, cfg.TEST.NMS, keep = soft_nms(cls_dets, cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD, method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA) sigma=cfg.TEST.SOFT_NMS_SIGMA)
else: keep = nms(cls_dets, cfg.TEST.NMS) else: keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=True)
cls_dets = cls_dets[keep, :] cls_dets = cls_dets[keep, :]
all_boxes[j][i] = cls_dets all_boxes[j][i] = cls_dets
boxes_this_image.append(cls_dets) boxes_this_image.append(cls_dets)
......
...@@ -43,7 +43,7 @@ class Expander(object): ...@@ -43,7 +43,7 @@ class Expander(object):
w_off = int(math.floor(npr.uniform(0.0, expand_w - im_w))) w_off = int(math.floor(npr.uniform(0.0, expand_w - im_w)))
new_im = np.empty((expand_h, expand_w, 3), dtype=np.uint8) new_im = np.empty((expand_h, expand_w, 3), dtype=np.uint8)
new_im.fill(127) new_im[:] = cfg.PIXEL_MEANS
new_im[h_off: h_off + im_h, w_off: w_off + im_w, :] = im new_im[h_off: h_off + im_h, w_off: w_off + im_w, :] = im
if gt_boxes is not None: if gt_boxes is not None:
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!