Commit 41b3932b by Ting PAN

Refactor the API of rotated boxes

1 parent c020594c
Showing with 1355 additions and 724 deletions
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.2.3 (20191101)
Dragon Minimum Required (Version 0.3.0.dev20191021)
Changes:
Preview Features:
- Refactor the API of rotated boxes.
- Simplify the solver by adding LRScheduler.
- Change the ``ITER`` naming to ``STEP``.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.2.2 (20191021)
Dragon Minimum Required (Version 0.3.0.dev20191021)
Changes:
Preview Features:
- Add the dumping if detection results.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.2.1 (20191017)
Dragon Minimum Required (Version 0.3.0.dev20191017)
......
#!/bin/sh
# delete cache
# Delete cache
rm -r build install *.c *.cpp
# compile cython modules
# Compile cpp modules
python setup.py build_ext --inplace
g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -fopenmp
g++ -o ../lib/utils/ctypes_rbox.so -shared -fPIC -O2 rbox.cc -std=c++11 -fopenmp
# compile cuda modules
# Compile cuda modules
cd build && cmake .. && make install && cd ..
# setup
# Copy to the library root
cp -r install/lib ../
......@@ -22,11 +22,9 @@ MODEL:
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [60000, 80000]
MAX_ITERS: 90000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn
FRCNN:
ROI_XFORM_METHOD: RoIAlign
......
......@@ -22,11 +22,9 @@ MODEL:
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [120000, 160000]
MAX_ITERS: 180000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn
FRCNN:
ROI_XFORM_METHOD: RoIAlign
......
......@@ -13,11 +13,9 @@ MODEL:
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.002
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [100000, 140000]
MAX_ITERS: 140000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [100000, 140000]
MAX_STEPS: 140000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn
FRCNN:
ROI_XFORM_METHOD: RoIAlign
......
......@@ -14,10 +14,9 @@ MODEL:
SOLVER:
BASE_LR: 0.001
WEIGHT_DECAY: 0.0005
LR_POLICY: steps_with_decay
STEPS: [100000, 140000]
MAX_ITERS: 140000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [100000, 140000]
MAX_STEPS: 140000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn
RPN:
STRIDES: [16]
......
......@@ -22,11 +22,9 @@ MODEL:
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [30000, 40000]
MAX_ITERS: 45000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [30000, 40000]
MAX_STEPS: 45000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_400
FPN:
RPN_MIN_LEVEL: 3
......
......@@ -22,12 +22,10 @@ MODEL:
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
WARM_UP_ITERS: 2000 # default: 500
LR_POLICY: steps_with_decay
STEPS: [120000, 160000]
MAX_ITERS: 180000
SNAPSHOT_ITERS: 5000
WARM_UP_STEPS: 2000 # default: 500
DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_400
FPN:
RPN_MIN_LEVEL: 3
......@@ -41,9 +39,9 @@ TRAIN:
IMS_PER_BATCH: 8
SCALES: [400]
MAX_SIZE: 666
SCALE_JITTERING: True
COLOR_JITTERING: True
SCALE_RANGE: [0.75, 1.33]
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
SCALE_JITTER_RANGE: [0.75, 1.33]
TEST:
DATABASE: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
......
......@@ -13,11 +13,9 @@ MODEL:
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.02
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [40000, 50000, 60000]
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [40000, 50000, 60000]
MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
......@@ -28,9 +26,9 @@ TRAIN:
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0]
SCALE_JITTERING: True
COLOR_JITTERING: True
SCALE_JITTER_RANGE: [0.5, 2.0]
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
TEST:
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -13,12 +13,10 @@ MODEL:
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [40000, 50000, 60000]
WARM_UP_ITERS: 2000
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [40000, 50000, 60000]
WARM_UP_STEPS: 2000
MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
......@@ -29,9 +27,9 @@ TRAIN:
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0]
SCALE_JITTERING: True
COLOR_JITTERING: True
SCALE_JITTER_RANGE: [0.5, 2.0]
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
TEST:
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -13,12 +13,10 @@ MODEL:
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.01
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [40000, 50000, 60000]
WARM_UP_ITERS: 2000
MAX_ITERS: 60000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [40000, 50000, 60000]
WARM_UP_STEPS: 2000
MAX_STEPS: 60000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_300
FPN:
RPN_MIN_LEVEL: 3
......@@ -29,9 +27,9 @@ TRAIN:
IMS_PER_BATCH: 32
SCALES: [300]
MAX_SIZE: 500
SCALE_RANGE: [0.5, 2.0]
SCALE_JITTERING: True
COLOR_JITTERING: True
SCALE_JITTER_RANGE: [0.5, 2.0]
USE_SCALE_JITTER: True
USE_COLOR_JITTER: True
TEST:
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
......
......@@ -13,11 +13,9 @@ MODEL:
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.001
WEIGHT_DECAY: 0.0001
LR_POLICY: steps_with_decay
STEPS: [80000, 100000, 120000]
MAX_ITERS: 120000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [80000, 100000, 120000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_300
SSD:
RESIZE:
......
......@@ -13,13 +13,12 @@ MODEL:
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
SOLVER:
BASE_LR: 0.002
BASE_LR: 0.001
WARM_UP_FACTOR: 0.
WEIGHT_DECAY: 0.0005
LR_POLICY: steps_with_decay
STEPS: [80000, 100000, 120000]
MAX_ITERS: 120000
SNAPSHOT_ITERS: 5000
DECAY_STEPS: [80000, 100000, 120000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_300
SSD:
RESIZE:
......
NUM_GPUS: 1
VIS: False
ENABLE_TENSOR_BOARD: False
MODEL:
TYPE: ssd
BACKBONE: resnet50.fpn
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
'cow', 'diningtable', 'dog', 'horse',
'motorbike', 'person', 'pottedplant',
'sheep', 'sofa', 'train', 'tvmonitor']
NUM_CLASSES: 21
FPN:
RPN_MIN_LEVEL: 3
RPN_MAX_LEVEL: 8
SOLVER:
BASE_LR: 0.001
DECAY_STEPS: [80000, 100000, 120000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_320
SSD:
NUM_CONVS: 2
RESIZE:
HEIGHT: 320
WIDTH: 320
MULTIBOX:
STRIDES: [8, 16, 32, 64, 100, 300]
MIN_SIZES: [30, 60, 110, 162, 213, 264]
MAX_SIZES: [60, 110, 162, 213, 264, 315]
ASPECT_RATIOS: [[1, 2, 0.5], [1, 2, 0.5, 3, 0.33], [1, 2, 0.5, 3, 0.33],
[1, 2, 0.5, 3, 0.33], [1, 2, 0.5], [1, 2, 0.5]]
TRAIN:
WEIGHTS: '/model/R-50.Affine.pth'
DATABASE: '/data/voc_0712_trainval'
IMS_PER_BATCH: 32
TEST:
DATABASE: '/data/voc_2007_test'
PROTOCOL: 'voc2007' # 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH: 8
NMS_TOP_K: 400
NMS: 0.45
SCORE_THRESH: 0.01
DETECTIONS_PER_IM: 200
......@@ -20,10 +20,10 @@ from __future__ import print_function
import os.path as osp
import numpy as np
from lib.utils.attrdict import AttrDict as edict
from lib.utils.attrdict import AttrDict
__C = edict()
cfg = __C
cfg = __C = AttrDict()
###########################################
......@@ -33,7 +33,7 @@ cfg = __C
###########################################
__C.TRAIN = edict()
__C.TRAIN = AttrDict()
# Initialize network with weights from this file
__C.TRAIN.WEIGHTS = ''
......@@ -82,17 +82,17 @@ __C.TRAIN.USE_DIFF = True
__C.TRAIN.BBOX_THRESH = 0.5
# If True, randomly scale the image by scale range
__C.TRAIN.SCALE_JITTERING = False
__C.TRAIN.SCALE_RANGE = [0.75, 1.0]
__C.TRAIN.USE_SCALE_JITTER = False
__C.TRAIN.SCALE_JITTER_RANGE = [0.75, 1.0]
# If True, randomly distort the image by brightness, contrast, and saturation
__C.TRAIN.COLOR_JITTERING = False
__C.TRAIN.USE_COLOR_JITTER = False
# IOU >= thresh: positive example
__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
# IOU < thresh: negative example
__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
# If an anchor statisfied by positive and negative conditions set to negative
# If an anchor satisfied by positive and negative conditions set to negative
__C.TRAIN.RPN_CLOBBER_POSITIVES = False
# Max number of foreground examples
__C.TRAIN.RPN_FG_FRACTION = 0.5
......@@ -118,7 +118,7 @@ __C.TRAIN.RPN_STRADDLE_THRESH = 0
###########################################
__C.TEST = edict()
__C.TEST = AttrDict()
# Database to test
__C.TEST.DATABASE = ''
......@@ -151,10 +151,10 @@ __C.TEST.SOFT_NMS_SIGMA = 0.5
# The top-k prior boxes before nms.
__C.TEST.NMS_TOP_K = 400
# The threshold for predicting boxes
# The threshold for prAttrDicting boxes
__C.TEST.SCORE_THRESH = 0.05
# The threshold for predicting masks
# The threshold for prAttrDicting masks
__C.TEST.BINARY_THRESH = 0.5
# NMS threshold used on RPN proposals
......@@ -188,37 +188,32 @@ __C.TEST.DETECTIONS_PER_IM = 100
###########################################
__C.MODEL = edict()
__C.MODEL = AttrDict()
# The type of the model
# ('faster_rcnn',
# 'mask_rcnn',
# 'ssd',
# 'rssd',
# 'retinanet,
# )
__C.MODEL.TYPE = ''
# The float precision for training and inference
# (FLOAT32, FLOAT16,)
__C.MODEL.DATA_TYPE = 'FLOAT32'
__C.MODEL.PRECISION = 'FLOAT32'
# The backbone
__C.MODEL.BACKBONE = ''
# The number of classes in the dataset
__C.MODEL.NUM_CLASSES = -1
# Keep it for TaaS DataSet
# The name for each object class
__C.MODEL.CLASSES = ['__background__']
# Add StopGrad at a specified stage so the bottom layers are frozen
# Frozen the gradient since the convolution stage K
# The value of ``K`` is usually set to 2
__C.MODEL.FREEZE_AT = 2
# Whether to use focal loss for one-stage detectors?
# Enabled if model type in ('ssd',)
# Retinanet is force to use focal loss
__C.MODEL.USE_FOCAL_LOSS = False
# Setting of focal loss
__C.MODEL.FOCAL_LOSS_ALPHA = 0.25
__C.MODEL.FOCAL_LOSS_GAMMA = 2.0
......@@ -234,7 +229,7 @@ __C.MODEL.COARSEST_STRIDE = 32
###########################################
__C.RPN = edict()
__C.RPN = AttrDict()
# Strides for multiple rpn heads
__C.RPN.STRIDES = [4, 8, 16, 32, 64]
......@@ -253,7 +248,7 @@ __C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
###########################################
__C.RETINANET = edict()
__C.RETINANET = AttrDict()
# Anchor aspect ratios to use
__C.RETINANET.ASPECT_RATIOS = (0.5, 1.0, 2.0)
......@@ -291,7 +286,7 @@ __C.RETINANET.NEGATIVE_OVERLAP = 0.4
###########################################
__C.FPN = edict()
__C.FPN = AttrDict()
# Channel dimension of the FPN feature levels
__C.FPN.DIM = 256
......@@ -317,7 +312,7 @@ __C.FPN.ROI_MIN_LEVEL = 2
###########################################
__C.FRCNN = edict()
__C.FRCNN = AttrDict()
# RoI transformation function (e.g., RoIPool or RoIAlign)
__C.FRCNN.ROI_XFORM_METHOD = 'RoIPool'
......@@ -338,7 +333,7 @@ __C.FRCNN.ROI_XFORM_RESOLUTION = 7
###########################################
__C.MRCNN = edict()
__C.MRCNN = AttrDict()
# Resolution of mask predictions
__C.MRCNN.RESOLUTION = 28
......@@ -357,10 +352,7 @@ __C.MRCNN.ROI_XFORM_RESOLUTION = 14
###########################################
__C.SSD = edict()
# Whether to enable FPN enhancement?
__C.SSD.FPN_ON = False
__C.SSD = AttrDict()
# Convolutions to use in the cls and bbox tower
# NOTE: this doesn't include the last conv for logits
......@@ -369,7 +361,7 @@ __C.SSD.NUM_CONVS = 0
# Weight for bbox regression loss
__C.SSD.BBOX_REG_WEIGHT = 1.
__C.SSD.MULTIBOX = edict()
__C.SSD.MULTIBOX = AttrDict()
# MultiBox configs
__C.SSD.MULTIBOX.STRIDES = []
__C.SSD.MULTIBOX.MIN_SIZES = []
......@@ -377,25 +369,25 @@ __C.SSD.MULTIBOX.MAX_SIZES = []
__C.SSD.MULTIBOX.ASPECT_RATIOS = []
__C.SSD.MULTIBOX.ASPECT_ANGLES = []
__C.SSD.OHEM = edict()
__C.SSD.OHEM = AttrDict()
# The threshold for selecting negative bbox in hard example mining
__C.SSD.OHEM.NEG_OVERLAP = 0.5
# The ratio used in hard example mining
__C.SSD.OHEM.NEG_POS_RATIO = 3.0
# Distort the image?
__C.SSD.DISTORT = edict()
__C.SSD.DISTORT = AttrDict()
__C.SSD.DISTORT.BRIGHTNESS_PROB = 0.5
__C.SSD.DISTORT.CONTRAST_PROB = 0.5
__C.SSD.DISTORT.SATURATION_PROB = 0.5
# Expand the image?
__C.SSD.EXPAND = edict()
__C.SSD.EXPAND = AttrDict()
__C.SSD.EXPAND.PROB = 0.5
__C.SSD.EXPAND.MAX_RATIO = 4.0
# Resize the image?
__C.SSD.RESIZE = edict()
__C.SSD.RESIZE = AttrDict()
__C.SSD.RESIZE.HEIGHT = 300
__C.SSD.RESIZE.WIDTH = 300
__C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
......@@ -403,7 +395,7 @@ __C.SSD.RESIZE.INTERP_MODE = ['LINEAR', 'AREA', 'NEAREST', 'CUBIC', 'LANCZOS4']
# Samplers
# Format as (min_scale, max_scale,
# min_aspect_ratio, max_aspect_ratio,
# min_jaccard_overlap, max_jaccard_overlap,
# min_overlap, max_overlap,
# max_trials, max_sample)
__C.SSD.SAMPLERS = [
(1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1, 1), # Entire image
......@@ -423,7 +415,7 @@ __C.SSD.SAMPLERS = [
###########################################
__C.RESNET = edict()
__C.RESNET = AttrDict()
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
__C.RESNET.NUM_GROUPS = 1
......@@ -439,7 +431,7 @@ __C.RESNET.GROUP_WIDTH = 64
###########################################
__C.DROPBLOCK = edict()
__C.DROPBLOCK = AttrDict()
# Whether to use drop block for more regulization
__C.DROPBLOCK.DROP_ON = False
......@@ -455,59 +447,46 @@ __C.DROPBLOCK.DECREMENT = 1e-6
###########################################
__C.SOLVER = edict()
__C.SOLVER = AttrDict()
# Base learning rate for the specified schedule
__C.SOLVER.BASE_LR = 0.001
# The interval to display logs
__C.SOLVER.DISPLAY = 20
# The interval to snapshot a model
__C.SOLVER.SNAPSHOT_EVERY = 5000
# Prefix to yield the path: <prefix>_iters_XYZ.pth
__C.SOLVER.SNAPSHOT_PREFIX = ''
# Optional scaling factor for total loss
# This option is helpful to scale the magnitude
# of gradients during FP16 training
__C.SOLVER.LOSS_SCALING = 1.
# Schedule type (see functions in utils.lr_policy for options)
# E.g., 'step', 'steps_with_decay', ...
__C.SOLVER.LR_POLICY = 'steps_with_decay'
# Hyperparameter used by the specified policy
# For 'step', the current LR is multiplied by SOLVER.GAMMA at each step
__C.SOLVER.GAMMA = 0.1
# Uniform step size for 'steps' policy
__C.SOLVER.STEP_SIZE = 30000
__C.SOLVER.STEPS = []
# Maximum number of SGD iterations
__C.SOLVER.MAX_ITERS = 40000
__C.SOLVER.MAX_STEPS = 40000
# Base learning rate for the specified schedule
__C.SOLVER.BASE_LR = 0.001
# The uniform interval for LRScheduler
__C.SOLVER.DECAY_STEP = 1
# The custom intervals for LRScheduler
__C.SOLVER.DECAY_STEPS = []
# The decay factor for exponential LRScheduler
__C.SOLVER.DECAY_GAMMA = 0.1
# Warm up to ``BASE_LR`` over this number of steps
__C.SOLVER.WARM_UP_STEPS = 500
# Start the warm up from ``BASE_LR`` * ``FACTOR``
__C.SOLVER.WARM_UP_FACTOR = 0.333
# The type of LRScheduler
__C.SOLVER.LR_POLICY = 'steps_with_decay'
# Momentum to use with SGD
__C.SOLVER.MOMENTUM = 0.9
# L2 regularization hyper parameters
__C.SOLVER.WEIGHT_DECAY = 0.0005
__C.SOLVER.WEIGHT_DECAY = 0.0001
# L2 norm factor for clipping gradients
__C.SOLVER.CLIP_NORM = -1.0
# Warm up to SOLVER.BASE_LR over this number of SGD iterations
__C.SOLVER.WARM_UP_ITERS = 500
# Start the warm up from SOLVER.BASE_LR * SOLVER.WARM_UP_FACTOR
__C.SOLVER.WARM_UP_FACTOR = 1.0 / 3.0
# The steps for accumulating gradients
__C.SOLVER.ITER_SIZE = 1
# The interval to display logs
__C.SOLVER.DISPLAY = 20
# The interval to snapshot a model
__C.SOLVER.SNAPSHOT_ITERS = 5000
# prefix to yield the path: <prefix>_iters_XYZ.caffemodel
__C.SOLVER.SNAPSHOT_PREFIX = ''
###########################################
# #
......@@ -532,9 +511,6 @@ __C.PIXEL_MEANS = [102., 115., 122.]
# These are empirically chosen to approximately lead to unit variance targets
__C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
__C.RBOX_REG_WEIGHTS = (10.0, 10.0, 5., 5., 10.)
# Prior prob for the positives at the beginning of training.
# This is used to set the bias init for the logits layer
__C.PRIOR_PROB = 0.01
......@@ -581,7 +557,7 @@ def _merge_a_into_b(a, b):
# the types must match, too
v = _check_and_coerce_cfg_value_type(v, b[k], k)
# recursively merge dicts
if type(v) is edict:
if type(v) is AttrDict:
try:
_merge_a_into_b(a[k], b[k])
except:
......@@ -595,7 +571,7 @@ def cfg_from_file(filename):
"""Load a config file and merge it into the default options."""
import yaml
with open(filename, 'r') as f:
yaml_cfg = edict(yaml.load(f))
yaml_cfg = AttrDict(yaml.load(f))
global __C
_merge_a_into_b(yaml_cfg, __C)
......@@ -643,8 +619,8 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
value_a = list(value_a)
elif isinstance(value_a, list) and isinstance(value_b, tuple):
value_a = tuple(value_a)
elif isinstance(value_a, dict) and isinstance(value_b, edict):
value_a = edict(value_a)
elif isinstance(value_a, dict) and isinstance(value_b, AttrDict):
value_a = AttrDict(value_a)
else:
raise ValueError(
'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
......
......@@ -23,10 +23,8 @@ from lib.core.config import cfg_from_file
class Coordinator(object):
"""Coordinator is a simple tool to manage the
unique experiments from the YAML configurations.
"""Manage the unique experiments."""
"""
def __init__(self, cfg_file, exp_dir=None):
# Override the default configs
cfg_from_file(cfg_file)
......@@ -44,9 +42,14 @@ class Coordinator(object):
self.experiment_dir = exp_dir
def _path_at(self, file, auto_create=True):
try:
path = os.path.abspath(os.path.join(self.experiment_dir, file))
if auto_create and not os.path.exists(path):
os.makedirs(path)
except OSError:
path = os.path.abspath(os.path.join('/tmp', file))
if auto_create and not os.path.exists(path):
os.makedirs(path)
return path
def checkpoints_dir(self):
......@@ -55,7 +58,9 @@ class Coordinator(object):
def exports_dir(self):
return self._path_at('exports')
def results_dir(self, checkpoint=None):
def results_dir(self, checkpoint=None, output_dir=None):
if output_dir is not None:
return output_dir
sub_dir = os.path.splitext(os.path.basename(checkpoint))[0] if checkpoint else ''
return self._path_at(os.path.join('results', sub_dir))
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling.detector import Detector
from lib.utils import logger
class Solver(object):
def __init__(self):
# Define the generic detector
self.detector = Detector()
# Define the optimizer and its arguments
self.optimizer = None
self.opt_arguments = {
'scale_gradient': 1. / (
cfg.SOLVER.LOSS_SCALING *
cfg.SOLVER.ITER_SIZE
),
'clip_gradient': float(cfg.SOLVER.CLIP_NORM),
'weight_decay': cfg.SOLVER.WEIGHT_DECAY,
}
# Define the global step
self.iter = 0
# Define the decay step
self._current_step = 0
def _get_param_groups(self):
param_groups = [
{
'params': [],
'lr_mult': 1.,
'decay_mult': 1.,
},
# Special treatment for biases (mainly to match historical impl.
# details):
# (1) Do not apply weight decay
# (2) Use a 2x higher learning rate
{
'params': [],
'lr_mult': 2.,
'decay_mult': 0.,
}
]
for name, param in self.detector.named_parameters():
if 'bias' in name:
param_groups[1]['params'].append(param)
else:
param_groups[0]['params'].append(param)
return param_groups
def set_learning_rate(self):
policy = cfg.SOLVER.LR_POLICY
if policy == 'steps_with_decay':
if self._current_step < len(cfg.SOLVER.STEPS) \
and self.iter >= cfg.SOLVER.STEPS[self._current_step]:
self._current_step = self._current_step + 1
logger.info(
'MultiStep Status: Iteration {}, step = {}'
.format(self.iter, self._current_step)
)
new_lr = cfg.SOLVER.BASE_LR * (
cfg.SOLVER.GAMMA ** self._current_step)
self.optimizer.param_groups[0]['lr'] = \
self.optimizer.param_groups[1]['lr'] = new_lr
else:
raise ValueError('Unknown lr policy: ' + policy)
def one_step(self):
def add_loss(x, y):
return y if x is None else x + y
# Forward & Backward & Compute_loss
iter_size = cfg.SOLVER.ITER_SIZE
loss_scaling = cfg.SOLVER.LOSS_SCALING
stats = {'loss': {'total': 0.}, 'iter': self.iter}
run_time, tic = 0., time.time()
if iter_size > 1:
# Dragon is designed for manual gradients accumulating
# ``zero_grad`` is only required if calling ``accumulate_grad``
self.optimizer.zero_grad()
for i in range(iter_size):
outputs, total_loss = self.detector(), None
# Sum the partial losses
for k, v in outputs.items():
if 'loss' in k:
if k not in stats['loss']:
stats['loss'][k] = 0.
total_loss = add_loss(total_loss, v)
stats['loss'][k] += float(v) * loss_scaling
if loss_scaling != 1.:
total_loss *= loss_scaling
stats['loss']['total'] += float(total_loss)
total_loss.backward()
if iter_size > 1:
self.optimizer.accumulate_grad()
run_time += (time.time() - tic)
# Apply Update
self.set_learning_rate()
tic = time.time()
self.optimizer.step()
run_time += (time.time() - tic)
self.iter += 1
# Average loss by the iter size
for k in stats['loss'].keys():
stats['loss'][k] /= cfg.SOLVER.ITER_SIZE
# Misc stats
stats['lr'] = self.base_lr
stats['time'] = run_time
return stats
@property
def base_lr(self):
return self.optimizer.param_groups[0]['lr']
@base_lr.setter
def base_lr(self, value):
self.optimizer.param_groups[0]['lr'] = \
self.optimizer.param_groups[1]['lr'] = value
class SGDSolver(Solver):
def __init__(self):
super(SGDSolver, self).__init__()
self.opt_arguments.update(**{
'lr': cfg.SOLVER.BASE_LR,
'momentum': cfg.SOLVER.MOMENTUM,
})
self.optimizer = torch.optim.SGD(
self._get_param_groups(), **self.opt_arguments)
class NesterovSolver(Solver):
def __init__(self):
super(NesterovSolver, self).__init__()
self.opt_arguments.update(**{
'lr': cfg.SOLVER.BASE_LR,
'momentum': cfg.SOLVER.MOMENTUM,
'nesterov': True,
})
self.optimizer = torch.optim.SGD(
self._get_param_groups(), **self.opt_arguments)
class RMSPropSolver(Solver):
def __init__(self):
super(RMSPropSolver, self).__init__()
self.opt_arguments.update(**{
'lr': cfg.SOLVER.BASE_LR,
'alpha': 0.9,
'eps': 1e-5,
})
self.optimizer = torch.optim.RMSprop(
self._get_param_groups(), **self.opt_arguments)
class AdamSolver(Solver):
def __init__(self):
super(AdamSolver, self).__init__()
self.opt_arguments.update(**{
'lr': cfg.SOLVER.BASE_LR,
'beta1': 0.9,
'beta2': 0.999,
'eps': 1e-5,
})
self.optimizer = torch.optim.RMSprop(
self._get_param_groups(), **self.opt_arguments)
def get_solver_func(type):
if type == 'MomentumSGD':
return SGDSolver
elif type == 'Nesterov':
return NesterovSolver
elif type == 'RMSProp':
return RMSPropSolver
elif type == 'Adam':
return AdamSolver
else:
raise ValueError(
'Unsupported solver type: {}.\n'
'Excepted in (MomentumSGD, Nesterov, RMSProp, Adam).'
.format(type)
)
......@@ -34,7 +34,7 @@ class TestServer(object):
self.data_reader = dragon.io.DataReader(
dataset=lambda: dragon.io.SeetaRecordDataset(self.imdb.source))
self.data_transformer = DataTransformer()
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH)
self.data_reader.q_out = mp.Queue(cfg.TEST.IMS_PER_BATCH * 5)
self.data_reader.start()
self.gt_recs = collections.OrderedDict()
self.output_dir = output_dir
......@@ -70,6 +70,9 @@ class TestServer(object):
return self.gt_recs
def evaluate_detections(self, all_boxes):
if cfg.TEST.PROTOCOL == 'null':
self.imdb.dump_detections(all_boxes, self.output_dir)
else:
self.imdb.evaluate_detections(
all_boxes,
self.get_records(),
......
......@@ -18,53 +18,48 @@ from __future__ import division
from __future__ import print_function
import collections
import datetime
import os
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.core.solver import get_solver_func
from lib.solver.sgd import SGDSolver
from lib.utils import logger
from lib.utils import time_util
from lib.utils.stats import SmoothedValue
from lib.utils.timer import Timer
class SolverWrapper(object):
def __init__(self, coordinator):
self.solver = SGDSolver()
self.detector = self.solver.detector
self.output_dir = coordinator.checkpoints_dir()
self.solver = get_solver_func('MomentumSGD')()
# Load the pre-trained weights
init_weights = cfg.TRAIN.WEIGHTS
if init_weights != '':
if os.path.exists(init_weights):
logger.info('Loading weights from {}.'.format(init_weights))
self.solver.detector.load_weights(init_weights)
else:
raise ValueError('Invalid path of weights: {}'.format(init_weights))
# Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16':
self.solver.detector.half() # Powerful FP16 Support
self.solver.detector.cuda(cfg.GPU_ID)
# Setup the detector
self.detector.load_weights(cfg.TRAIN.WEIGHTS)
if cfg.MODEL.PRECISION.lower() == 'float16':
# Mixed precision training
self.detector.half()
self.detector.cuda(cfg.GPU_ID)
# Plan the metrics
self.board = None
self.metrics = collections.OrderedDict()
if cfg.ENABLE_TENSOR_BOARD and logger.is_root():
try:
from dragon.tools.tensorboard import TensorBoard
self.board = TensorBoard(log_dir=coordinator.experiment_dir + '/logs')
log_dir = coordinator.experiment_dir + '/logs'
self.board = TensorBoard(log_dir=log_dir)
except ImportError:
pass
def snapshot(self):
if not logger.is_root():
return None
filename = (cfg.SOLVER.SNAPSHOT_PREFIX + '_iter_{:d}'
.format(self.solver.iter) + '.pth')
filename = cfg.SOLVER.SNAPSHOT_PREFIX + \
'_iter_{}.pth'.format(self.solver.iter)
filename = os.path.join(self.output_dir, filename)
torch.save(self.solver.detector.state_dict(), filename)
if logger.is_root() and not os.path.exists(filename):
torch.save(self.detector.state_dict(), filename)
logger.info('Wrote snapshot to: {:s}'.format(filename))
return filename
def add_metrics(self, stats):
for k, v in stats['loss'].items():
......@@ -73,7 +68,7 @@ class SolverWrapper(object):
self.metrics[k].AddValue(v)
def send_metrics(self, stats):
if hasattr(self, 'board'):
if self.board is not None:
self.board.scalar_summary('lr', stats['lr'], stats['iter'])
self.board.scalar_summary('time', stats['time'], stats['iter'])
for k, v in self.metrics.items():
......@@ -90,10 +85,12 @@ class SolverWrapper(object):
stats['iter'],
)
def step(self, display=False):
def step(self):
display = self.solver.iter % cfg.SOLVER.DISPLAY == 0
stats = self.solver.one_step()
self.add_metrics(stats)
self.send_metrics(stats)
if display:
logger.info(
'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
......@@ -110,43 +107,28 @@ class SolverWrapper(object):
def train_model(self):
"""Network training loop."""
last_snapshot_iter = -1
timer = Timer()
model_paths = []
start_lr = self.solver.base_lr
while self.solver.iter < cfg.SOLVER.MAX_ITERS:
if self.solver.iter < cfg.SOLVER.WARM_UP_ITERS:
alpha = (self.solver.iter + 1.0) / cfg.SOLVER.WARM_UP_ITERS
self.solver.base_lr = \
start_lr * (cfg.SOLVER.WARM_UP_FACTOR * (1 - alpha) + alpha)
timer = time_util.Timer()
max_steps = cfg.SOLVER.MAX_STEPS
while self.solver.iter < max_steps:
# Apply 1-step SGD update
with timer.tic_and_toc():
self.step(display=self.solver.iter % cfg.SOLVER.DISPLAY == 0)
if self.solver.iter % (10 * cfg.SOLVER.DISPLAY) == 0:
average_time = timer.average_time
eta_seconds = average_time * (
cfg.SOLVER.MAX_ITERS - self.solver.iter)
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = float(self.solver.iter + 1) / cfg.SOLVER.MAX_ITERS
_, global_step = self.step(), self.solver.iter
if global_step % (10 * cfg.SOLVER.DISPLAY) == 0:
logger.info(
'< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >'
.format(progress, timer.average_time, eta)
time_util.get_progress_info(
timer, global_step, max_steps
)
)
if self.solver.iter % cfg.SOLVER.SNAPSHOT_ITERS == 0:
last_snapshot_iter = self.solver.iter
model_paths.append(self.snapshot())
if last_snapshot_iter != self.solver.iter:
model_paths.append(self.snapshot())
return model_paths
if global_step % cfg.SOLVER.SNAPSHOT_EVERY == 0:
self.snapshot()
def train_net(coordinator, start_iter=0):
sw = SolverWrapper(coordinator)
sw.solver.iter = start_iter
logger.info('Solving...')
model_paths = sw.train_model()
return model_paths
sw.train_model()
sw.snapshot()
......@@ -14,6 +14,7 @@
# ------------------------------------------------------------
import os
import shutil
import dragon
from lib.core.config import cfg
......@@ -59,6 +60,35 @@ class imdb(object):
def num_images(self):
return dragon.io.SeetaRecordDataset(self.source).size
def dump_detections(self, all_boxes, output_dir):
dataset = dragon.io.SeetaRecordDataset(self.source)
for file in ('data.data', 'data.index', 'data.meta'):
file = os.path.join(output_dir, file)
if os.path.exists(file):
os.remove(file)
writer = dragon.io.SeetaRecordWriter(output_dir, dataset.protocol)
for i in range(len(dataset)):
example = dataset.get()
example['object'] = []
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
detections = all_boxes[cls_ind][i]
if len(detections) == 0:
continue
for k in range(detections.shape[0]):
if detections[k, -1] < cfg.VIS_TH:
continue
example['object'].append({
'name': cls,
'xmin': float(detections[k][0]),
'ymin': float(detections[k][1]),
'xmax': float(detections[k][2]),
'ymax': float(detections[k][3]),
'difficult': 0,
})
writer.write(example)
def evaluate_detections(self, all_boxes, gt_recs, output_dir):
pass
......
......@@ -109,36 +109,6 @@ class TaaS(imdb):
# #
##############################################
def _write_xml_bbox_results(self, all_boxes, gt_recs, output_dir):
from xml.dom import minidom
import xml.etree.ElementTree as ET
ix = 0
for image_id, rec in gt_recs.items():
root = ET.Element('annotation')
ET.SubElement(root, 'filename').text = str(image_id)
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
continue
detections = all_boxes[cls_ind][ix]
if len(detections) == 0:
continue
for k in range(detections.shape[0]):
if detections[k, -1] < cfg.VIS_TH:
continue
object = ET.SubElement(root, 'object')
ET.SubElement(object, 'name').text = cls
ET.SubElement(object, 'difficult').text = '0'
bnd_box = ET.SubElement(object, 'bndbox')
ET.SubElement(bnd_box, 'xmin').text = str(detections[k][0])
ET.SubElement(bnd_box, 'ymin').text = str(detections[k][1])
ET.SubElement(bnd_box, 'xmax').text = str(detections[k][2])
ET.SubElement(bnd_box, 'ymax').text = str(detections[k][3])
ix += 1
rawText = ET.tostring(root)
dom = minidom.parseString(rawText)
with open('{}/{}.xml'.format(output_dir, image_id), 'w') as f:
dom.writexml(f, "", "\t", "\n", "utf-8")
def _write_voc_bbox_results(self, all_boxes, gt_recs, output_dir):
for cls_ind, cls in enumerate(self.classes):
if cls == '__background__':
......@@ -486,10 +456,6 @@ class TaaS(imdb):
self._do_voc_bbox_eval(
gt_recs, output_dir, IoU=0.7,
use_07_metric='2007' in protocol)
elif 'xml' in protocol:
if cfg.EXP_DIR != '':
output_dir = cfg.EXP_DIR
self._write_xml_bbox_results(all_boxes, gt_recs, output_dir)
elif 'coco' in protocol:
from lib.pycocotools.coco import COCO
if os.path.exists(cfg.TEST.JSON_FILE):
......
......@@ -20,7 +20,7 @@ import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
......@@ -194,8 +194,8 @@ class AnchorTargetLayer(torch.nn.Module):
.transpose(0, 3, 1, 2)
return {
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
'labels': array2tensor(labels),
'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights),
}
......@@ -92,7 +92,7 @@ class DataBatch(mp.Process):
if self._num_transformers == -1:
self._num_transformers = 2
# Add 1 transformer for color augmentation
if cfg.TRAIN.COLOR_JITTERING:
if cfg.TRAIN.USE_COLOR_JITTER:
self._num_transformers += 1
self._num_transformers = min(
self._num_transformers, self._max_transformers)
......
......@@ -19,8 +19,10 @@ import cv2
import numpy as np
from lib.core.config import cfg
from lib.utils import rotated_boxes
from lib.utils.blob import prep_im_for_blob
from lib.utils.boxes import flip_boxes
from lib.utils.image import get_image_with_target_size
class DataTransformer(multiprocessing.Process):
......@@ -101,23 +103,29 @@ class DataTransformer(multiprocessing.Process):
def get_annotations(cls, example):
objects = []
for ix, obj in enumerate(example['object']):
if 'xmin' in obj:
objects.append({
'name': obj['name'],
'difficult': obj.get('difficult', 0),
'bbox': [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']],
})
if 'x3' in obj:
bbox = rotated_boxes.vertices2box(
[obj['x1'], obj['y1'],
obj['x2'], obj['y2'],
obj['x3'], obj['y3'],
obj['x4'], obj['y4']]
)
elif 'x2' in obj:
bbox = [obj['x1'], obj['y1'], obj['x2'], obj['y2']]
elif 'xmin' in obj:
bbox = [obj['xmin'], obj['ymin'], obj['xmax'], obj['ymax']]
else:
bbox = obj['bbox']
objects.append({
'name': obj['name'],
'difficult': obj.get('difficult', 0),
'bbox': obj['bbox'],
'bbox': bbox,
})
return example['id'], objects
def get(self, example):
img = np.frombuffer(example['content'], np.uint8)
img = cv2.imdecode(img, -1)
img = cv2.imdecode(img, 1)
# Scale
scale_indices = np.random.randint(len(cfg.TRAIN.SCALES))
......@@ -137,10 +145,10 @@ class DataTransformer(multiprocessing.Process):
if jitter != 1.0:
# To a rectangle (scale, max_size)
target_size = (np.array(im.shape[0:2]) / jitter).astype(np.int)
im, offsets = _get_image_with_target_size(target_size, im)
im, offsets = get_image_with_target_size(target_size, im)
else:
# To a square (target_size, target_size)
im, offsets = _get_image_with_target_size([target_size] * 2, im)
im, offsets = get_image_with_target_size([target_size] * 2, im)
# Example -> RoIDict
roi_dict = self.make_roi_dict(example, im_scale, apply_flip, offsets)
......@@ -166,29 +174,3 @@ class DataTransformer(multiprocessing.Process):
self.q1_out.put(outputs)
else:
self.q2_out.put(outputs)
def _get_image_with_target_size(target_size, img):
im_shape = list(img.shape)
height_diff = target_size[0] - im_shape[0]
width_diff = target_size[1] - im_shape[1]
ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
im_shape[:2] = target_size
new_img = np.empty(im_shape, dtype=img.dtype)
new_img[:] = cfg.PIXEL_MEANS
new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
ofs_pad_width:ofs_pad_width + img.shape[1]] = \
img[ofs_crop_height:ofs_crop_height + target_size[0],
ofs_crop_width:ofs_crop_width + target_size[1]]
return new_img, (
ofs_pad_width - ofs_crop_width,
ofs_pad_height - ofs_crop_height,
target_size,
)
......@@ -18,19 +18,15 @@ import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms
from lib.utils.blob import blob_to_tensor
from lib.nms import nms_wrapper
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
"""Compute proposals by applying transformations to anchors."""
def __init__(self):
super(ProposalLayer, self).__init__()
......@@ -48,8 +44,8 @@ class ProposalLayer(torch.nn.Module):
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
......@@ -86,14 +82,15 @@ class ProposalLayer(torch.nn.Module):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix].reshape((-1, 4))
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
......@@ -111,11 +108,11 @@ class ProposalLayer(torch.nn.Module):
scores = scores[keep]
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300)
# 7. Take after_nms_top_n (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Output rois blob
......@@ -129,4 +126,4 @@ class ProposalLayer(torch.nn.Module):
if cfg_key == 'TRAIN':
return rpn_rois
else:
return [blob_to_tensor(rpn_rois)]
return [array2tensor(rpn_rois)]
......@@ -18,7 +18,7 @@ import numpy as np
import numpy.random as npr
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
......@@ -73,11 +73,11 @@ class ProposalTargetLayer(torch.nn.Module):
batch_outputs[k] = np.concatenate(batch_outputs[k], axis=0)
return {
'rois': [blob_to_tensor(batch_outputs['rois'])],
'labels': blob_to_tensor(batch_outputs['labels']),
'bbox_targets': blob_to_tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': blob_to_tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': blob_to_tensor(batch_outputs['bbox_outside_weights']),
'rois': [array2tensor(batch_outputs['rois'])],
'labels': array2tensor(batch_outputs['labels']),
'bbox_targets': array2tensor(batch_outputs['bbox_targets']),
'bbox_inside_weights': array2tensor(batch_outputs['bbox_inside_weights']),
'bbox_outside_weights': array2tensor(batch_outputs['bbox_outside_weights']),
}
......
......@@ -17,14 +17,13 @@ import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.nms import nms_wrapper
from lib.utils import framework
from lib.utils import time_util
from lib.utils.blob import im_list_to_blob
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.image import scale_image
from lib.utils.timer import Timer
from lib.utils.graph import FrozenGraph
from lib.utils.vis import vis_one_image
......@@ -48,7 +47,8 @@ def im_detect(detector, raw_image):
with torch.no_grad():
with torch.jit.Recorder(retain_ops=True):
outputs = detector.forward(inputs)
detector.frozen_graph = FrozenGraph(
detector.frozen_graph = \
framework.FrozenGraph(
{'data': inputs['data'],
'ims_info': inputs['ims_info']},
{'rois': outputs['rois'],
......@@ -88,14 +88,13 @@ def test_net(detector, server):
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
_t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}
for i in range(num_images):
image_id, raw_image = server.get_image()
_t['im_detect'].tic()
with _t['im_detect'].tic_and_toc():
scores, boxes = im_detect(detector, raw_image)
_t['im_detect'].toc()
_t['misc'].tic()
boxes_this_image = [[]]
......@@ -107,21 +106,30 @@ def test_net(detector, server):
(cls_boxes, cls_scores[:, np.newaxis])
).astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
cls_detections, cfg.TEST.NMS,
keep = nms_wrapper.soft_nms(
cls_detections,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(cls_detections, cfg.TEST.NMS, force_cpu=True)
keep = nms_wrapper.nms(
cls_detections,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
all_boxes[j][i] = cls_detections
boxes_this_image.append(cls_detections)
if cfg.VIS or cfg.VIS_ON_FILE:
vis_one_image(
raw_image, classes, boxes_this_image,
thresh=cfg.VIS_TH, box_alpha=1.0, show_class=True,
raw_image,
classes,
boxes_this_image,
thresh=cfg.VIS_TH,
box_alpha=1.,
show_class=True,
filename=server.get_save_filename(image_id),
)
......@@ -129,7 +137,8 @@ def test_net(detector, server):
if cfg.TEST.DETECTIONS_PER_IM > 0:
image_scores = []
for j in range(1, num_classes):
if len(all_boxes[j][i]) < 1: continue
if len(all_boxes[j][i]) < 1:
continue
image_scores.append(all_boxes[j][i][:, -1])
if len(image_scores) > 0:
image_scores = np.hstack(image_scores)
......
......@@ -14,6 +14,7 @@ from __future__ import division
from __future__ import print_function
import collections
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
......@@ -21,7 +22,7 @@ import numpy.random as npr
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
......@@ -180,8 +181,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
'labels': array2tensor(labels),
'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights),
}
......@@ -19,20 +19,16 @@ import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors
from lib.nms.nms_wrapper import nms
from lib.nms import nms_wrapper
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_tiled_boxes
from lib.utils.boxes import filter_boxes
class ProposalLayer(torch.nn.Module):
"""
Compute proposals by applying estimated bounding-box
transformations to a set of regular boxes (called "anchors").
"""
"""Compute proposals by applying transformations anchors."""
def __init__(self):
super(ProposalLayer, self).__init__()
......@@ -86,8 +82,8 @@ class ProposalLayer(torch.nn.Module):
def forward(self, features, cls_prob, bbox_pred, ims_info):
cfg_key = 'TRAIN' if self.training else 'TEST'
pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
pre_nms_top_n = cfg[cfg_key].RPN_PRE_NMS_TOP_N
post_nms_top_n = cfg[cfg_key].RPN_POST_NMS_TOP_N
nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
min_size = cfg[cfg_key].RPN_MIN_SIZE
......@@ -110,14 +106,15 @@ class ProposalLayer(torch.nn.Module):
scores = batch_scores[ix].reshape((-1, 1)) # [1, n] -> [n, 1]
deltas = batch_deltas[ix] # [n, 4]
if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
if pre_nms_top_n <= 0 or pre_nms_top_n >= len(scores):
order = np.argsort(-scores.squeeze())
else:
# Avoid sorting possibly large arrays; First partition to get top K
# unsorted and then sort just those (~20x faster for 200k scores)
inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN]
inds = np.argpartition(-scores.squeeze(), pre_nms_top_n)[:pre_nms_top_n]
order = np.argsort(-scores[inds].squeeze())
order = inds[order]
deltas = deltas[order]
anchors = all_anchors[order]
scores = scores[order]
......@@ -136,9 +133,9 @@ class ProposalLayer(torch.nn.Module):
# 6. Apply nms (e.g. threshold = 0.7)
# 7. Take after_nms_topN (e.g. 300)
# 8. Return the top proposals (-> RoIs top)
keep = nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_topN > 0:
keep = keep[:post_nms_topN]
keep = nms_wrapper.nms(np.hstack((proposals, scores)), nms_thresh)
if post_nms_top_n > 0:
keep = keep[:post_nms_top_n]
proposals = proposals[keep, :]
# Output rois blob
......@@ -156,16 +153,16 @@ class ProposalLayer(torch.nn.Module):
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1
k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(rpn_rois, min_level, max_level)
all_rois = []
for i in range(K):
for i in range(k):
lv_indices = np.where(fpn_levels == (i + min_level))[0]
if len(lv_indices) == 0:
# Fake a tiny roi to avoid empty roi pooling
all_rois.append(blob_to_tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
all_rois.append(array2tensor(np.array([[-1, 0, 0, 1, 1]], dtype=np.float32)))
else:
all_rois.append(blob_to_tensor(rpn_rois[lv_indices]))
all_rois.append(array2tensor(rpn_rois[lv_indices]))
return all_rois
......
......@@ -13,12 +13,12 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
import numpy as np
import numpy.random as npr
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
......@@ -87,9 +87,9 @@ class ProposalTargetLayer(torch.nn.Module):
# Distribute rois into K levels
min_level = cfg.FPN.ROI_MIN_LEVEL
max_level = cfg.FPN.ROI_MAX_LEVEL
K = max_level - min_level + 1
k = max_level - min_level + 1
fpn_levels = _map_rois_to_fpn_levels(batch_outputs['rois'], min_level, max_level)
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(K)]
lvs_indices = [np.where(fpn_levels == (i + min_level))[0] for i in range(k)]
_fmap_rois(
inputs=[batch_outputs[key] for key in keys],
fake_outputs=self.fake_outputs,
......@@ -99,11 +99,11 @@ class ProposalTargetLayer(torch.nn.Module):
)
return {
'rois': [blob_to_tensor(outputs['rois'][i]) for i in range(K)],
'labels': blob_to_tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': blob_to_tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': blob_to_tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': blob_to_tensor(np.vstack(outputs['bbox_outside_weights'])),
'rois': [array2tensor(outputs['rois'][i]) for i in range(k)],
'labels': array2tensor(np.concatenate(outputs['labels'], axis=0)),
'bbox_targets': array2tensor(np.vstack(outputs['bbox_targets'])),
'bbox_inside_weights': array2tensor(np.vstack(outputs['bbox_inside_weights'])),
'bbox_outside_weights': array2tensor(np.vstack(outputs['bbox_outside_weights'])),
}
......
......@@ -29,7 +29,7 @@ from lib.utils.logger import is_root
class Detector(torch.nn.Module):
"""The "Detector" organizes the detection pipelines.
"""Organize the detection pipelines.
A bunch of classic algorithms are integrated, see the
``lib.core.config`` for their hyper-parameters.
......@@ -112,6 +112,7 @@ class Detector(torch.nn.Module):
# 1. Extract features
# Process the data:
# 0) CPU => CUDA
# 1) NHWC => NCHW
# 2) uint8 => float32 or float16
# 3) Mean subtraction
......
......@@ -30,17 +30,18 @@ class FPN(torch.nn.Module):
super(FPN, self).__init__()
self.C = torch.nn.ModuleList()
self.P = torch.nn.ModuleList()
self.apply_func = self.apply_on_rcnn
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, HIGHEST_BACKBONE_LVL + 1):
self.C.append(conv1x1(feature_dims[lvl - 1], cfg.FPN.DIM, bias=True))
self.P.append(conv3x3(cfg.FPN.DIM, cfg.FPN.DIM, bias=True))
if 'retinanet' in cfg.MODEL.TYPE or 'ssd' in cfg.MODEL.TYPE:
if 'rcnn' in cfg.MODEL.TYPE:
self.apply_func = self.apply_on_rcnn
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
else:
self.apply_func = self.apply_on_generic
self.relu = torch.nn.ReLU(inplace=False)
for lvl in range(HIGHEST_BACKBONE_LVL + 1, cfg.FPN.RPN_MAX_LEVEL + 1):
dim_in = feature_dims[-1] if lvl == HIGHEST_BACKBONE_LVL + 1 else cfg.FPN.DIM
self.P.append(conv3x3(dim_in, cfg.FPN.DIM, stride=2, bias=True))
self.apply_func = self.apply_on_retinanet
self.relu = torch.nn.ReLU(inplace=False)
self.maxpool = torch.nn.MaxPool2d(1, 2, ceil_mode=True)
self.reset_parameters()
self.feature_dims = [cfg.FPN.DIM]
......@@ -69,7 +70,7 @@ class FPN(torch.nn.Module):
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
def apply_on_retinanet(self, features):
def apply_on_generic(self, features):
fpn_input = self.C[-1](features[-1])
min_lvl, max_lvl = cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL
outputs = [self.P[HIGHEST_BACKBONE_LVL - min_lvl](fpn_input)]
......
......@@ -37,7 +37,7 @@ def nms(detections, thresh, force_cpu=False):
if detections.shape[0] == 0:
return []
if detections.shape[1] == 6:
return rotated_boxes.nms(detections, thresh)
return rotated_boxes.cpu_nms(detections, thresh)
if cfg.USE_GPU_NMS and not force_cpu:
return gpu_nms(detections, thresh, device_id=cfg.GPU_ID)
else:
......
......@@ -17,7 +17,6 @@ import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.ops import functional as F
from lib.utils.blob import blob_to_tensor
class Bootstrap(torch.nn.Module):
......@@ -25,7 +24,7 @@ class Bootstrap(torch.nn.Module):
def __init__(self):
super(Bootstrap, self).__init__()
self.dtype = cfg.MODEL.DATA_TYPE.lower()
self.dtype = cfg.MODEL.PRECISION.lower()
self.mean_values = cfg.PIXEL_MEANS
self.dummy_buffer = torch.ones(1)
......
......@@ -19,7 +19,7 @@ import numpy as np
from lib.core.config import cfg
from lib.faster_rcnn.generate_anchors import generate_anchors_v2
from lib.utils import logger
from lib.utils.blob import blob_to_tensor
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
......@@ -145,8 +145,8 @@ class AnchorTargetLayer(torch.nn.Module):
bbox_outside_weights = bbox_outside_weights_wide.transpose((0, 2, 1))
return {
'labels': blob_to_tensor(labels),
'bbox_targets': blob_to_tensor(bbox_targets),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights),
'labels': array2tensor(labels),
'bbox_targets': array2tensor(bbox_targets),
'bbox_inside_weights': array2tensor(bbox_inside_weights),
'bbox_outside_weights': array2tensor(bbox_outside_weights),
}
......@@ -17,44 +17,14 @@ import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.nms import nms_wrapper
from lib.utils import framework
from lib.utils import time_util
from lib.utils.blob import im_list_to_blob
from lib.utils.graph import FrozenGraph
from lib.utils.image import scale_image
from lib.utils.timer import Timer
from lib.utils.vis import vis_one_image
def im_detect(detector, raw_image):
"""Detect a image, with single or multiple scales."""
ims, ims_scale = scale_image(raw_image)
# Prepare blobs
blobs = {'data': im_list_to_blob(ims)}
blobs['ims_info'] = np.array([
list(blobs['data'].shape[1:3]) + [im_scale]
for im_scale in ims_scale
], dtype=np.float32)
# Do Forward
if not hasattr(detector, 'frozen_graph'):
inputs = {
'data': torch.from_numpy(blobs['data']),
'ims_info': torch.from_numpy(blobs['ims_info']),
}
with torch.no_grad():
with torch.jit.Recorder(retain_ops=True):
outputs = detector.forward(inputs)
detector.frozen_graph = FrozenGraph(
{'data': inputs['data'],
'ims_info': inputs['ims_info']},
{'detections': outputs['detections']},
)
outputs = detector.frozen_graph(**blobs)
return outputs['detections'][:, 1:]
def ims_detect(detector, raw_images):
"""Detect images, with single or multiple scales."""
ims, ims_scale = scale_image(raw_images[0])
......@@ -81,7 +51,8 @@ def ims_detect(detector, raw_images):
with torch.no_grad():
with torch.jit.Recorder(retain_ops=True):
outputs = detector.forward(inputs)
detector.frozen_graph = FrozenGraph(
detector.frozen_graph = \
framework.FrozenGraph(
{'data': inputs['data'],
'ims_info': inputs['ims_info']},
{'detections': outputs['detections']},
......@@ -111,24 +82,21 @@ def test_net(detector, server):
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
_t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths
image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue
if batch_idx + item_idx >= num_images:
continue
image_id, raw_image = server.get_image()
image_ids.append(image_id)
raw_images.append(raw_image)
# Run detecting on specific scales
_t['im_detect'].tic()
if cfg.TEST.IMS_PER_BATCH > 1:
with _t['im_detect'].tic_and_toc():
results = ims_detect(detector, raw_images)
else:
results = [im_detect(detector, raw_images[0])]
_t['im_detect'].toc()
# Post-Processing
_t['misc'].tic()
......@@ -139,22 +107,22 @@ def test_net(detector, server):
detections = np.array(detections)
for j in range(1, num_classes):
cls_indices = np.where(detections[:, 5].astype(np.int32) == j)[0]
cls_boxes = detections[cls_indices, 0:4]
cls_boxes = detections[cls_indices, :4]
cls_scores = detections[cls_indices, 4]
cls_detections = np.hstack((
cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
keep = nms_wrapper.soft_nms(
cls_detections,
cfg.TEST.NMS,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
keep = nms_wrapper.nms(
cls_detections,
cfg.TEST.NMS,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
......
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from lib.core.config import cfg
class _LRScheduler(object):
def __init__(
self,
lr_max,
lr_min=0.,
warmup_steps=0,
warmup_factor=0.,
):
self._step_count = 0
self._lr_max, self._lr_min = lr_max, lr_min
self._warmup_steps = warmup_steps
self._warmup_factor = warmup_factor
self._last_lr = self._lr_max
self._last_steps = self._warmup_steps
def step(self):
self._step_count += 1
def get_lr(self):
if self._step_count < self._warmup_steps:
alpha = (self._step_count + 1.) / self._warmup_steps
decay_factor = self._warmup_factor * (1 - alpha) + alpha
self._last_lr = self._lr_max * decay_factor
return self._last_lr
return self.schedule_impl()
def schedule_impl(self):
raise NotImplementedError
class StepLR(_LRScheduler):
def __init__(
self,
lr_max,
decay_step,
decay_gamma,
warmup_steps=0,
warmup_factor=0.,
):
super(StepLR, self).__init__(
lr_max=lr_max,
warmup_steps=warmup_steps,
warmup_factor=warmup_factor,
)
self._decay_step = decay_step
self._decay_gamma = decay_gamma
def schedule_impl(self):
step_count = self._step_count - self._last_steps
if step_count % self._decay_step == 0:
decay_factor = step_count // self._decay_step
self._last_lr = self._lr_max * (
self._decay_gamma ** decay_factor)
return self._last_lr
class MultiStepLR(_LRScheduler):
def __init__(
self,
lr_max,
decay_steps,
decay_gamma,
warmup_steps=0,
warmup_factor=0.,
):
super(MultiStepLR, self).__init__(
lr_max=lr_max,
warmup_steps=warmup_steps,
warmup_factor=warmup_factor,
)
self._decay_steps = decay_steps
self._decay_gamma = decay_gamma
self._stage_count, self._num_stages = 0, len(self._decay_steps)
def schedule_impl(self):
if self._stage_count < self._num_stages:
k = self._decay_steps[self._stage_count]
while self._step_count >= k:
self._stage_count += 1
if self._stage_count >= self._num_stages:
break
k = self._decay_steps[self._stage_count]
self._last_lr = self._lr_max * (
self._decay_gamma ** self._stage_count)
return self._last_lr
class LinearLR(_LRScheduler):
def __init__(
self,
lr_max,
decay_step,
max_steps,
warmup_steps=0,
warmup_factor=0.,
):
super(LinearLR, self).__init__(
lr_max=lr_max,
lr_min=0.,
warmup_steps=warmup_steps,
warmup_factor=warmup_factor,
)
self._decay_step = decay_step
self._max_steps = max_steps - warmup_steps
def schedule_impl(self):
step_count = self._step_count - self._last_steps
if step_count % self._decay_step == 0:
decay_factor = 1. - float(step_count) / self._max_steps
self._last_lr = self._lr_max * decay_factor
return self._last_lr
class CosineLR(_LRScheduler):
def __init__(
self,
lr_max,
lr_min,
decay_step,
max_steps,
warmup_steps=0,
warmup_factor=0.,
):
super(CosineLR, self).__init__(
lr_max=lr_max,
lr_min=lr_min,
warmup_steps=warmup_steps,
warmup_factor=warmup_factor,
)
self._decay_step = decay_step
self._max_steps = max_steps - warmup_steps
def schedule_impl(self):
step_count = self._step_count - self._last_steps
if step_count % self._decay_step == 0:
decay_factor = 0.5 * (1. + math.cos(
math.pi * step_count / self._max_steps))
self._last_lr = self._lr_min + (
self._lr_max - self._lr_min
) * decay_factor
return self._last_lr
def get_scheduler():
lr_policy = cfg.SOLVER.LR_POLICY
if lr_policy == 'step':
return StepLR(
lr_max=cfg.SOLVER.BASE_LR,
decay_step=cfg.SOLVER.DECAY_STEP,
decay_gamma=cfg.SOLVER.DECAY_GAMMA,
warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
)
elif lr_policy == 'steps_with_decay':
return MultiStepLR(
lr_max=cfg.SOLVER.BASE_LR,
decay_steps=cfg.SOLVER.DECAY_STEPS,
decay_gamma=cfg.SOLVER.DECAY_GAMMA,
warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
)
elif lr_policy == 'cosine_decay':
return CosineLR(
lr_max=cfg.SOLVER.BASE_LR,
lr_min=0.,
decay_step=cfg.SOLVER.DECAY_STEP,
max_steps=cfg.SOLVER.MAX_STEPS,
warmup_steps=cfg.SOLVER.WARM_UP_STEPS,
warmup_factor=cfg.SOLVER.WARM_UP_FACTOR,
)
else:
raise ValueError('Unknown lr policy: ' + lr_policy)
if __name__ == '__main__':
def extract_label(scheduler):
class_name = scheduler.__class__.__name__
label = class_name + '('
if class_name == 'StepLR':
label += 'α=' + str(scheduler._decay_step) + ', '
label += 'γ=' + str(scheduler._decay_gamma)
elif class_name == 'MultiStepLR':
label += 'α=' + str(scheduler._decay_steps) + ', '
label += 'γ=' + str(scheduler._decay_gamma)
elif class_name == 'CosineLR':
label += 'α=' + str(scheduler._decay_step)
label += ')'
return label
vis = True
max_steps = 240
shared_args = {
'lr_max': 0.4,
'warmup_steps': 5,
'warmup_factor': 0.,
}
schedulers = [
StepLR(decay_step=1, decay_gamma=0.97, **shared_args),
MultiStepLR(decay_steps=[60, 120, 180], decay_gamma=0.1, **shared_args),
CosineLR(lr_min=0., decay_step=1, max_steps=max_steps, **shared_args),
LinearLR(decay_step=1, max_steps=max_steps, **shared_args),
]
for i in range(max_steps):
info = 'Step = %d\n' % i
for scheduler in schedulers:
if i == 0:
scheduler.lr_seq = []
info += ' * {}: {}\n'.format(
extract_label(scheduler),
scheduler.get_lr())
scheduler.lr_seq.append(scheduler.get_lr())
scheduler.step()
if not vis:
print(info)
if vis:
import matplotlib.pyplot as plt
plt.figure(1)
plt.title('Visualization of different LR Schedulers')
plt.xlabel('Step')
plt.ylabel('Learning Rate')
line = '--'
colors = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
for i, scheduler in enumerate(schedulers):
plt.plot(
range(max_steps),
scheduler.lr_seq,
colors[i] + line,
linewidth=1.,
label=extract_label(scheduler),
)
plt.legend()
plt.show()
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.modeling.detector import Detector
from lib.solver import lr_scheduler
from lib.utils import framework
from lib.utils import time_util
class SGDSolver(object):
def __init__(self):
# Define the generic detector
self.detector = Detector()
# Define the optimizer and its arguments
self.optimizer = torch.optim.SGD(
framework.get_param_groups(self.detector),
lr=cfg.SOLVER.BASE_LR,
momentum=cfg.SOLVER.MOMENTUM,
weight_decay=cfg.SOLVER.WEIGHT_DECAY,
clip_gradient=float(cfg.SOLVER.CLIP_NORM),
scale_gradient=1. / cfg.SOLVER.LOSS_SCALING,
)
self.lr_scheduler = lr_scheduler.get_scheduler()
def one_step(self):
def add_loss(x, y):
return y if x is None else x + y
stats = {
'iter': self.iter,
'loss': {'total': 0.},
'time': time_util.Timer(),
}
with stats['time'].tic_and_toc():
# Forward pass
outputs = self.detector()
# Backward pass
total_loss = None
loss_scaling = cfg.SOLVER.LOSS_SCALING
for k, v in outputs.items():
if 'loss' in k:
if k not in stats['loss']:
stats['loss'][k] = 0.
total_loss = add_loss(total_loss, v)
stats['loss'][k] += float(v) * loss_scaling
if loss_scaling != 1.:
total_loss *= loss_scaling
stats['loss']['total'] += float(total_loss)
total_loss.backward()
# Apply Update
self.base_lr = self.lr_scheduler.get_lr()
self.optimizer.step()
self.lr_scheduler.step()
# Misc stats
stats['lr'] = self.base_lr
stats['time'] = stats['time'].total_time
return stats
@property
def base_lr(self):
return self.optimizer.param_groups[0]['lr']
@base_lr.setter
def base_lr(self, value):
for group in self.optimizer.param_groups:
group['lr'] = value
@property
def iter(self):
return self.lr_scheduler._step_count
@iter.setter
def iter(self, value):
self.lr_scheduler._step_count = value
......@@ -83,7 +83,7 @@ class DataTransformer(multiprocessing.Process):
]
else:
roi_dict['boxes'][object_idx, :] = \
rotated_boxes.canonicalize(
rotated_boxes.vertices2box(
[obj['x1'], obj['y1'],
obj['x2'], obj['y2'],
obj['x3'], obj['y3'],
......@@ -108,7 +108,7 @@ class DataTransformer(multiprocessing.Process):
def get(self, example):
img = np.frombuffer(example['content'], np.uint8)
img = cv2.imdecode(img, -1)
img = cv2.imdecode(img, 1)
# Flip
flip = False
......
......@@ -17,7 +17,7 @@ import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
from lib.utils.blob import array2tensor
class HardMiningLayer(torch.nn.Module):
......@@ -63,4 +63,4 @@ class HardMiningLayer(torch.nn.Module):
labels_wide[ix][bg_inds] = 0 # Use hard negatives as bg indices
# Feed labels to compute cls loss
return {'labels': blob_to_tensor(labels_wide)}
return {'labels': array2tensor(labels_wide)}
......@@ -17,7 +17,7 @@ import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.blob import blob_to_tensor
from lib.utils.blob import array2tensor
from lib.utils.boxes import bbox_overlaps
from lib.utils.boxes import bbox_transform
from lib.utils.boxes import dismantle_gt_boxes
......@@ -121,7 +121,7 @@ class MultiBoxTargetLayer(torch.nn.Module):
bbox_outside_weights_wide[ix][ex_inds] = bbox_reg_weight
return {
'bbox_targets': blob_to_tensor(bbox_targets_wide),
'bbox_inside_weights': blob_to_tensor(bbox_inside_weights_wide),
'bbox_outside_weights': blob_to_tensor(bbox_outside_weights_wide),
'bbox_targets': array2tensor(bbox_targets_wide),
'bbox_inside_weights': array2tensor(bbox_inside_weights_wide),
'bbox_outside_weights': array2tensor(bbox_outside_weights_wide),
}
......@@ -18,12 +18,11 @@ import dragon.vm.torch as torch
import numpy as np
from lib.core.config import cfg
from lib.nms.nms_wrapper import nms
from lib.nms.nms_wrapper import soft_nms
from lib.nms import nms_wrapper
from lib.utils import framework
from lib.utils import time_util
from lib.utils.boxes import bbox_transform_inv
from lib.utils.boxes import clip_boxes
from lib.utils.timer import Timer
from lib.utils.graph import FrozenGraph
from lib.utils.vis import vis_one_image
......@@ -49,7 +48,8 @@ def ims_detect(detector, ims):
with torch.no_grad():
with torch.jit.Recorder(retain_ops=True):
outputs = detector.forward(inputs={'data': image})
detector.frozen_graph = FrozenGraph(
detector.frozen_graph = \
framework.FrozenGraph(
{'data': image},
{'cls_prob': outputs['cls_prob'],
'bbox_pred': outputs['bbox_pred']},
......@@ -81,21 +81,21 @@ def test_net(detector, server):
num_classes = server.num_classes
all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)]
_t = {'im_detect': Timer(), 'misc': Timer()}
_t = {'im_detect': time_util.Timer(), 'misc': time_util.Timer()}
for batch_idx in range(0, num_images, cfg.TEST.IMS_PER_BATCH):
# Collect raw images and ground-truths
image_ids, raw_images = [], []
for item_idx in range(cfg.TEST.IMS_PER_BATCH):
if batch_idx + item_idx >= num_images: continue
if batch_idx + item_idx >= num_images:
continue
image_id, raw_image = server.get_image()
image_ids.append(image_id)
raw_images.append(raw_image)
_t['im_detect'].tic()
with _t['im_detect'].tic_and_toc():
batch_scores, batch_boxes = ims_detect(detector, raw_images)
_t['im_detect'].toc()
_t['misc'].tic()
for item_idx in range(len(batch_scores)):
......@@ -114,16 +114,16 @@ def test_net(detector, server):
(cls_boxes, cls_scores[:, np.newaxis])) \
.astype(np.float32, copy=False)
if cfg.TEST.USE_SOFT_NMS:
keep = soft_nms(
keep = nms_wrapper.soft_nms(
cls_detections,
cfg.TEST.NMS,
thresh=cfg.TEST.NMS,
method=cfg.TEST.SOFT_NMS_METHOD,
sigma=cfg.TEST.SOFT_NMS_SIGMA,
)
else:
keep = nms(
keep = nms_wrapper.nms(
cls_detections,
cfg.TEST.NMS,
thresh=cfg.TEST.NMS,
force_cpu=True,
)
cls_detections = cls_detections[keep, :]
......
......@@ -47,18 +47,16 @@ class Distort(object):
def apply(self, img, boxes=None):
img = PIL.Image.fromarray(img)
if npr.uniform() < self._brightness_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Brightness(img)
img = img.enhance(delta)
if npr.uniform() < self._contrast_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Contrast(img)
img = img.enhance(delta)
if npr.uniform() < self._saturation_prob:
delta = npr.uniform(-0.3, 0.3) + 1.
img = PIL.ImageEnhance.Color(img)
img = img.enhance(delta)
transforms = [
(PIL.ImageEnhance.Brightness, self._brightness_prob),
(PIL.ImageEnhance.Contrast, self._contrast_prob),
(PIL.ImageEnhance.Color, self._saturation_prob),
]
npr.shuffle(transforms)
for transform_fn, prob in transforms:
if npr.uniform() < prob:
img = transform_fn(img)
img = img.enhance(1. + npr.uniform(-.4, .4))
return np.array(img), boxes
......
......@@ -21,7 +21,8 @@ import numpy as np
import dragon.vm.torch as torch
from lib.core.config import cfg
from lib.utils.image import resize_image, distort_image
from lib.utils.image import distort_image
from lib.utils.image import resize_image
def im_list_to_blob(ims):
......@@ -60,17 +61,17 @@ def mask_list_to_blob(masks):
return blob
def prep_im_for_blob(im, target_size, max_size):
def prep_im_for_blob(img, target_size, max_size):
"""Scale an image for use in a blob."""
im_shape, jitter = im.shape, 1.
im_shape, jitter = img.shape, 1.
if cfg.TRAIN.COLOR_JITTERING:
im = distort_image(im)
if cfg.TRAIN.USE_COLOR_JITTER:
img = distort_image(img)
if max_size > 0:
# Scale image along the shortest side
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_size_min = np.min(im_shape[:2])
im_size_max = np.max(im_shape[:2])
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
......@@ -78,31 +79,31 @@ def prep_im_for_blob(im, target_size, max_size):
im_scale = float(max_size) / float(im_size_max)
else:
# Scale image along the longest side
im_size_max = np.max(im_shape[0:2])
im_size_max = np.max(im_shape[:2])
im_scale = float(target_size) / float(im_size_max)
if cfg.TRAIN.SCALE_JITTERING:
r = cfg.TRAIN.SCALE_RANGE
if cfg.TRAIN.USE_SCALE_JITTER:
r = cfg.TRAIN.SCALE_JITTER_RANGE
jitter = r[0] + np.random.rand() * (r[1] - r[0])
im_scale *= jitter
return resize_image(im, im_scale, im_scale), im_scale, jitter
return resize_image(img, im_scale, im_scale), im_scale, jitter
def blob_to_tensor(blob, enforce_cpu=False):
if isinstance(blob, np.ndarray):
def array2tensor(array, enforce_cpu=False):
if isinstance(array, np.ndarray):
# Zero-Copy from numpy
cpu_tensor = torch.from_numpy(blob)
cpu_tensor = torch.from_numpy(array)
else:
cpu_tensor = blob
cpu_tensor = array
return cpu_tensor if enforce_cpu else \
cpu_tensor.cuda(cfg.GPU_ID)
def tensor_to_blob(blob, copy=False):
if isinstance(blob, torch.Tensor):
def tensor2array(tensor, copy=False):
if isinstance(tensor, torch.Tensor):
# Zero-Copy from numpy
array = blob.numpy(True)
array = tensor.numpy(True)
else:
array = blob
array = tensor
return array.copy() if copy else array
......@@ -16,8 +16,100 @@ from __future__ import print_function
import collections
import dragon
import dragon.vm.torch as torch
from dragon.core.framework import tensor_util
from dragon.vm.torch.jit.recorder import get_default_recorder
from dragon.core.util import six
def get_param_groups(module, bias_lr=1., bias_decay=0.):
"""Separate weight and bias into parameters groups.
Parameters
----------
module : dragon.vm.torch.nn.Module
The module to collect parameters.
bias_lr : float, optional, default=1.
The lr multiplier of bias.
bias_decay : float, optional, default=0.
The decay multiplier of bias.
Returns
-------
Sequence[ParamGroup]
The parameter groups.
"""
param_groups = [
{
'params': [],
'lr_mult': 1.,
'decay_mult': 1.,
},
{
'params': [],
'lr_mult': bias_lr,
'decay_mult': bias_decay,
}
]
for name, param in module.named_parameters():
gi = 1 if 'bias' in name else 0
param_groups[gi]['params'].append(param)
if len(param_groups[1]['params']) == 0:
param_groups.pop() # Remove empty group
return param_groups
def get_workspace():
"""Return the current default workspace.
Returns
-------
dragon.Workspace
The default workspace.
"""
return dragon.workspace.get_default()
def new_workspace(merge_default=True):
"""Create a new workspace.
Parameters
----------
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
workspace = dragon.Workspace()
if merge_default:
workspace.merge_from(get_workspace())
return workspace
def reset_workspace(workspace=None, merge_default=True):
"""Reset a workspace and return a new one.
Parameters
----------
workspace : dragon.Workspace, optional
The workspace to reset.
merge_default : bool, optional, default=True
**True** to merge tensors from default workspace.
Returns
-------
dragon.Workspace
The new workspace.
"""
if workspace is not None:
workspace.Clear() # Block the GIL
return new_workspace(merge_default)
class FrozenGraph(object):
......@@ -41,9 +133,8 @@ class FrozenGraph(object):
self._inputs = canonicalize(inputs)
self._outputs = canonicalize(outputs)
self._constants = canonicalize(constants)
self._graph = dragon.Workspace() \
.merge_from(dragon.workspace.get_default())
self._tape = get_default_recorder()
self._graph = new_workspace()
self._tape = torch.jit.get_default_recorder()
def forward(self, **kwargs):
# Assign inputs
......@@ -70,3 +161,7 @@ class FrozenGraph(object):
def __call__(self, **kwargs):
with self._graph.as_default():
return self.forward(**kwargs)
# Aliases
pickle = six.moves.pickle
......@@ -21,9 +21,50 @@ import PIL.ImageEnhance
from lib.core.config import cfg
def resize_image(im, fx, fy):
def distort_image(img):
img = PIL.Image.fromarray(img)
transforms = [
PIL.ImageEnhance.Brightness,
PIL.ImageEnhance.Contrast,
PIL.ImageEnhance.Color,
]
np.random.shuffle(transforms)
for transform in transforms:
if np.random.uniform() < .5:
img = transform(img)
img = img.enhance(1. + np.random.uniform(-.4, .4))
return np.array(img)
def get_image_with_target_size(target_size, img):
im_shape = list(img.shape)
height_diff = target_size[0] - im_shape[0]
width_diff = target_size[1] - im_shape[1]
ofs_crop_width = np.random.randint(max(-width_diff, 0) + 1)
ofs_pad_width = np.random.randint(max(width_diff, 0) + 1)
ofs_crop_height = np.random.randint(max(-height_diff, 0) + 1)
ofs_pad_height = np.random.randint(max(height_diff, 0) + 1)
im_shape[:2] = target_size
new_img = np.empty(im_shape, dtype=img.dtype)
new_img[:] = cfg.PIXEL_MEANS
new_img[ofs_pad_height:ofs_pad_height + img.shape[0],
ofs_pad_width:ofs_pad_width + img.shape[1]] = \
img[ofs_crop_height:ofs_crop_height + target_size[0],
ofs_crop_width:ofs_crop_width + target_size[1]]
return new_img, (
ofs_pad_width - ofs_crop_width,
ofs_pad_height - ofs_crop_height,
target_size,
)
def resize_image(img, fx, fy):
return cv2.resize(
im,
img,
dsize=None,
fx=fx, fy=fy,
interpolation=cv2.INTER_LINEAR,
......@@ -36,29 +77,12 @@ def resize_mask(mask, size):
return np.array(mask.resize(size, PIL.Image.NEAREST))
def distort_image(im):
im = PIL.Image.fromarray(im)
if np.random.uniform() < 0.5:
delta_brightness = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Brightness(im)
im = im.enhance(delta_brightness)
if np.random.uniform() < 0.5:
delta_contrast = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Contrast(im)
im = im.enhance(delta_contrast)
if np.random.uniform() < 0.5:
delta_saturation = np.random.uniform(-0.3, 0.3) + 1.
im = PIL.ImageEnhance.Color(im)
im = im.enhance(delta_saturation)
return np.array(im)
def scale_image(im):
def scale_image(img):
processed_ims, ims_scales = [], []
if cfg.TEST.MAX_SIZE > 0:
im_size_min = np.min(im.shape[:2])
im_size_max = np.max(im.shape[:2])
im_size_min = np.min(img.shape[:2])
im_size_max = np.max(img.shape[:2])
for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than MAX_SIZE
......@@ -66,7 +90,7 @@ def scale_image(im):
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
processed_ims.append(
cv2.resize(
im,
img,
dsize=None,
fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR,
......@@ -74,12 +98,12 @@ def scale_image(im):
ims_scales.append(im_scale)
else:
# Scale image along the longest side
im_size_max = np.max(im.shape[0:2])
im_size_max = np.max(img.shape[:2])
for target_size in cfg.TEST.SCALES:
im_scale = float(target_size) / float(im_size_max)
processed_ims.append(
cv2.resize(
im,
img,
dsize=None,
fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR,
......
......@@ -18,6 +18,7 @@ from __future__ import division
from __future__ import print_function
import contextlib
import datetime
import time
......@@ -30,9 +31,16 @@ class Timer(object):
self.diff = 0.
self.average_time = 0.
@contextlib.contextmanager
def tic_and_toc(self):
try:
yield self.tic()
finally:
self.toc()
def tic(self):
# Using time.time instead of time.clock because time time.clock
# does not normalize for multi-threading
# does not normalize for multithreading
self.start_time = time.time()
def toc(self, average=True):
......@@ -45,9 +53,28 @@ class Timer(object):
else:
return self.diff
@contextlib.contextmanager
def tic_and_toc(self):
try:
yield self.tic()
finally:
self.toc()
def get_progress_info(timer, curr_step, max_steps):
"""Return a info of current progress.
Parameters
----------
timer : Timer
The timer to get progress.
curr_step : int
The current step.
max_steps : int
The total number of steps.
Returns
-------
str
The progress info.
"""
average_time = timer.average_time
eta_seconds = average_time * (max_steps - curr_step)
eta = str(datetime.timedelta(seconds=int(eta_seconds)))
progress = (curr_step + 1.) / max_steps
return '< PROGRESS: {:.2%} | SPEED: {:.3f}s / iter | ETA: {} >' \
.format(progress, timer.average_time, eta)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from os import path as osp
from maker import make_record
if __name__ == '__main__':
voc_root = '/data/VOC'
make_record(
record_file=osp.join(voc_root, 'voc_0712_trainval'),
images_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval']
)
make_record(
record_file=osp.join(voc_root, 'voc_2007_test'),
images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test']
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import cv2
import dragon
import numpy as np
import xml.etree.ElementTree as ET
def make_example(image_file, xml_file):
tree = ET.parse(xml_file)
filename = os.path.split(xml_file)[-1]
objs = tree.findall('object')
example = {'id': filename.split('.')[0], 'object': []}
with open(image_file, 'rb') as f:
img_bytes = bytes(f.read())
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 1)
example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
is_diff = 0
if obj.find('difficult') is not None:
is_diff = int(obj.find('difficult').text) == 1
example['object'].append({
'name': obj.find('name').text.strip(),
'x1': float(bbox.find('x1').text),
'y1': float(bbox.find('y1').text),
'x2': float(bbox.find('x2').text),
'y2': float(bbox.find('y2').text),
'x3': float(bbox.find('x3').text),
'y3': float(bbox.find('y3').text),
'x4': float(bbox.find('x4').text),
'y4': float(bbox.find('y4').text),
'difficult': is_diff,
})
return example
def make_record(
record_file,
images_path,
annotations_path,
imagesets_path,
splits
):
if os.path.exists(record_file):
raise ValueError('The record file is already exist.')
os.makedirs(record_file)
if not isinstance(images_path, list):
images_path = [images_path]
if not isinstance(annotations_path, list):
annotations_path = [annotations_path]
if not isinstance(imagesets_path, list):
imagesets_path = [imagesets_path]
assert len(splits) == len(imagesets_path)
assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter(
path=record_file,
protocol={
'id': 'string',
'content': 'bytes',
'height': 'int64',
'width': 'int64',
'depth': 'int64',
'object': [{
'name': 'string',
'x1': 'float64',
'y1': 'float64',
'x2': 'float64',
'y2': 'float64',
'x3': 'float64',
'y3': 'float64',
'x4': 'float64',
'y4': 'float64',
'difficult': 'int64',
}]
}
)
count, total_line = 0, 0
start_time = time.time()
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f:
lines = f.readlines()
total_line += len(lines)
for line in lines:
count += 1
if count % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
writer.write(make_example(image_file, xml_file))
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
writer.close()
end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time))
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from os import path as osp
from maker import make_record
if __name__ == '__main__':
voc_root = '/data/VOC'
make_record(
record_file=osp.join(voc_root, 'voc_0712_trainval'),
images_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/JPEGImages')],
annotations_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/Annotations')],
imagesets_path=[osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
osp.join(voc_root, 'VOCdevkit2012/VOC2012/ImageSets/Main')],
splits=['trainval', 'trainval']
)
make_record(
record_file=osp.join(voc_root, 'voc_2007_test'),
images_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/JPEGImages'),
annotations_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/Annotations'),
imagesets_path=osp.join(voc_root, 'VOCdevkit2007/VOC2007/ImageSets/Main'),
splits=['test']
)
# ------------------------------------------------------------
# Copyright (c) 2017-present, SeetaTech, Co.,Ltd.
#
# Licensed under the BSD 2-Clause License.
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import cv2
import dragon
import numpy as np
import xml.etree.ElementTree as ET
def make_example(image_file, xml_file):
tree = ET.parse(xml_file)
filename = os.path.split(xml_file)[-1]
objs = tree.findall('object')
example = {'id': filename.split('.')[0], 'object': []}
with open(image_file, 'rb') as f:
img_bytes = bytes(f.read())
img = cv2.imdecode(np.frombuffer(img_bytes, 'uint8'), 1)
example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes
for ix, obj in enumerate(objs):
bbox = obj.find('bndbox')
is_diff = 0
if obj.find('difficult') is not None:
is_diff = int(obj.find('difficult').text) == 1
example['object'].append({
'name': obj.find('name').text.strip(),
'xmin': float(bbox.find('xmin').text),
'ymin': float(bbox.find('ymin').text),
'xmax': float(bbox.find('xmax').text),
'ymax': float(bbox.find('ymax').text),
'difficult': is_diff,
})
return example
def make_record(
record_file,
images_path,
annotations_path,
imagesets_path,
splits
):
if os.path.exists(record_file):
raise ValueError('The record file is already exist.')
os.makedirs(record_file)
if not isinstance(images_path, list):
images_path = [images_path]
if not isinstance(annotations_path, list):
annotations_path = [annotations_path]
if not isinstance(imagesets_path, list):
imagesets_path = [imagesets_path]
assert len(splits) == len(imagesets_path)
assert len(splits) == len(images_path)
assert len(splits) == len(annotations_path)
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
writer = dragon.io.SeetaRecordWriter(
path=record_file,
protocol={
'id': 'string',
'content': 'bytes',
'height': 'int64',
'width': 'int64',
'depth': 'int64',
'object': [{
'name': 'string',
'xmin': 'float64',
'ymin': 'float64',
'xmax': 'float64',
'ymax': 'float64',
'difficult': 'int64',
}]
}
)
count, total_line = 0, 0
start_time = time.time()
for db_idx, split in enumerate(splits):
split_file = os.path.join(imagesets_path[db_idx], split + '.txt')
assert os.path.exists(split_file)
with open(split_file, 'r') as f:
lines = f.readlines()
total_line += len(lines)
for line in lines:
count += 1
if count % 2000 == 0:
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(
count, total_line, now_time - start_time))
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + '.jpg')
xml_file = os.path.join(annotations_path[db_idx], filename + '.xml')
writer.write(make_example(image_file, xml_file))
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
writer.close()
end_time = time.time()
data_size = os.path.getsize(record_file + '/data.data') * 1e-6
print('{} images take {:.2f} MB in {:.2f} sec.'
.format(total_line, data_size, end_time - start_time))
......@@ -74,7 +74,7 @@ if __name__ == '__main__':
detector.optimize_for_inference()
# Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16':
if cfg.MODEL.PRECISION.lower() == 'float16':
detector.half() # Powerful FP16 Support
data = torch.zeros(*args.input_shape).byte()
......
......@@ -37,8 +37,14 @@ def parse_args():
parser.add_argument('--exp_dir', dest='exp_dir',
help='experiment dir',
default=None, type=str)
parser.add_argument('--output_dir', dest='output_dir',
help='output dir',
default=None, type=str)
parser.add_argument('--iter', dest='iter', help='global step',
default=0, type=int)
default=None, type=int)
parser.add_argument('--dump', dest='dump',
help='dump the result back to record?',
action='store_true')
parser.add_argument('--wait', dest='wait',
help='wait the checkpoint?',
action='store_true')
......@@ -75,19 +81,19 @@ if __name__ == '__main__':
# Inspect the database
database = get_imdb(cfg.TEST.DATABASE)
cfg.TEST.PROTOCOL = 'null' if args.dump else cfg.TEST.PROTOCOL
logger.info('Database({}): {} images will be used to test.'
.format(cfg.TEST.DATABASE, database.num_images))
# Ready to test the network
logger.info('Results will be saved to `{:s}`'
.format(coordinator.results_dir(checkpoint)))
output_dir = coordinator.results_dir(checkpoint, args.output_dir)
logger.info('Results will be saved to `{:s}`'.format(output_dir))
detector = Detector().eval().cuda(cfg.GPU_ID)
detector.load_weights(checkpoint)
detector.optimize_for_inference()
# Mixed precision training?
if cfg.MODEL.DATA_TYPE.lower() == 'float16':
if cfg.MODEL.PRECISION.lower() == 'float16':
detector.half() # Powerful FP16 Support
server = TestServer(coordinator.results_dir(checkpoint))
test_engine.test_net(detector, server)
test_engine.test_net(detector, TestServer(output_dir))
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!