Commit d240a4fd by Ting PAN

Adapt to the latest dragon preview version

Summary:
This commit changes repo to match dragon.0.3.0.dev20200723.
1 parent 169218d4
Showing with 231 additions and 185 deletions
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.4.3 (20200724)
Dragon Minimum Required (Version 0.3.0.dev20200723)
Changes:
- Adapt to the latest dragon preview version.
Preview Features:
- None
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.4.2 (20200707)
Dragon Minimum Required (Version 0.3.0.dev20200707)
......
......@@ -14,7 +14,7 @@ The torch-style codes help us to simplify the hierarchical pipeline of modern de
## Requirements
seeta-dragon >= 0.3.0.dev20200707
seeta-dragon >= 0.3.0.dev20200723
## Installation
......
NUM_GPUS: 8
VIS: False
ENABLE_TENSOR_BOARD: False
MODEL:
TYPE: mask_rcnn
BACKBONE: resnet101.fpn
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_mask_rcnn
FRCNN:
ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7
MRCNN:
ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 14
TRAIN:
WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
SCALES: [800]
MAX_SIZE: 1333
NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
NUM_GPUS: 8
VIS: False
ENABLE_TENSOR_BOARD: False
MODEL:
TYPE: mask_rcnn
BACKBONE: resnet101.fpn
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench',
'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag',
'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife',
'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli',
'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush']
NUM_CLASSES: 81
SOLVER:
BASE_LR: 0.02
DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_mask_rcnn
FRCNN:
ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 7
MRCNN:
ROI_XFORM_METHOD: RoIAlign
ROI_XFORM_RESOLUTION: 14
TRAIN:
WEIGHTS: '/model/R-101.Affine.pth'
DATASET: '/data/coco_2014_trainval35k'
IMS_PER_BATCH: 2
BATCH_SIZE: 512
SCALES: [800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
PROTOCOL: 'coco'
SCALES: [800]
MAX_SIZE: 1333
NMS: 0.5
RPN_POST_NMS_TOP_N: 1000
......@@ -53,8 +53,7 @@ def im_detect(detector, raw_image):
# Decode results
all_scores, all_boxes = [], []
pred_boxes = \
box_util.bbox_transform_inv(
pred_boxes = box_util.bbox_transform_inv(
outputs['rois'][:, 1:5],
outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS,
......@@ -76,8 +75,8 @@ def test_net(weights, num_classes, q_in, q_out, device):
_t = time_util.new_timers('im_detect', 'misc')
while True:
idx, raw_image = q_in.get()
if idx < 0:
i, raw_image = q_in.get()
if i < 0:
break
boxes_this_image = [[]]
......@@ -110,9 +109,8 @@ def test_net(weights, num_classes, q_in, q_out, device):
_t['misc'].toc()
q_out.put((
idx,
i,
dict([('im_detect', _t['im_detect'].average_time),
('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]),
))
......@@ -56,8 +56,7 @@ def im_detect(detector, raw_image):
# Decode results
all_scores, all_boxes, batch_inds = [], [], []
pred_boxes = \
box_util.bbox_transform_inv(
pred_boxes = box_util.bbox_transform_inv(
outputs['rois'][:, 1:5],
outputs['bbox_pred'],
cfg.BBOX_REG_WEIGHTS,
......@@ -114,8 +113,8 @@ def test_net(weights, num_classes, q_in, q_out, device):
_t = time_util.new_timers('im_detect', 'mask_detect', 'misc')
while True:
idx, raw_image = q_in.get()
if idx < 0:
i, raw_image = q_in.get()
if i < 0:
break
rois_this_image = []
......@@ -171,7 +170,7 @@ def test_net(weights, num_classes, q_in, q_out, device):
_t['mask_detect'].toc()
q_out.put((
idx,
i,
dict([('im_detect', _t['im_detect'].average_time),
('mask_detect', _t['mask_detect'].average_time),
('misc', _t['misc'].average_time)]),
......
......@@ -15,7 +15,6 @@ from __future__ import print_function
import types
import dragon
import dragon.vm.torch as torch
import numpy as np
......@@ -80,12 +79,12 @@ def test_net(weights, num_classes, q_in, q_out, device):
if must_stop:
break
indices, raw_images = [], []
for i in range(cfg.TEST.IMS_PER_BATCH):
idx, raw_image = q_in.get()
if idx < 0:
for _ in range(cfg.TEST.IMS_PER_BATCH):
i, raw_image = q_in.get()
if i < 0:
must_stop = True
break
indices.append(idx)
indices.append(i)
raw_images.append(raw_image)
if len(raw_images) == 0:
......
......@@ -87,12 +87,12 @@ def test_net(weights, num_classes, q_in, q_out, device):
if must_stop:
break
indices, raw_images = [], []
for i in range(cfg.TEST.IMS_PER_BATCH):
idx, raw_image = q_in.get()
if idx < 0:
for _ in range(cfg.TEST.IMS_PER_BATCH):
i, raw_image = q_in.get()
if i < 0:
must_stop = True
break
indices.append(idx)
indices.append(i)
raw_images.append(raw_image)
if len(raw_images) == 0:
......@@ -135,6 +135,6 @@ def test_net(weights, num_classes, q_in, q_out, device):
q_out.put((
indices[i],
dict([('im_detect', _t['im_detect'].average_time),
('misc',_t['misc'].average_time)]),
('misc', _t['misc'].average_time)]),
dict([('boxes', boxes_this_image)]),
))
......@@ -608,6 +608,5 @@ def _check_and_coerce_cfg_value_type(value_a, value_b, key):
else:
raise ValueError(
'Type mismatch ({} vs. {}) with values ({} vs. {}) for config '
'key: {}'.format(type_b, type_a, value_b, value_a, key)
)
'key: {}'.format(type_b, type_a, value_b, value_a, key))
return value_a
......@@ -65,7 +65,7 @@ class SolverWrapper(object):
for k, v in stats['loss'].items():
if k not in self.metrics:
self.metrics[k] = SmoothedValue(20)
self.metrics[k].AddValue(v)
self.metrics[k].add_value(v)
def send_metrics(self, stats):
if self.board is not None:
......@@ -74,16 +74,10 @@ class SolverWrapper(object):
for k, v in self.metrics.items():
if k == 'total':
self.board.scalar_summary(
'total_loss',
v.GetMedianValue(),
stats['iter'],
)
'total_loss', v.get_median(), stats['iter'])
else:
self.board.scalar_summary(
k,
v.GetMedianValue(),
stats['iter'],
)
k, v.get_median(), stats['iter'])
def step(self):
display = self.solver.iter % cfg.SOLVER.DISPLAY == 0
......@@ -92,17 +86,14 @@ class SolverWrapper(object):
if display:
logger.info(
'Iteration %d, lr = %.8f, loss = %f, time = %.2fs' % (
stats['iter'], stats['lr'],
self.metrics['total'].GetMedianValue(),
stats['time'],
)
)
'Iteration %d, lr = %.8f, loss = %f, time = %.2fs'
% (stats['iter'], stats['lr'],
self.metrics['total'].get_median(), stats['time']))
for k, v in self.metrics.items():
if k == 'total':
continue
logger.info(' ' * 10 + 'Train net output({}): {}'
.format(k, v.GetMedianValue()))
.format(k, v.get_median()))
self.send_metrics(stats)
def train_model(self):
......@@ -116,11 +107,7 @@ class SolverWrapper(object):
_, global_step = self.step(), self.solver.iter
if global_step % (10 * cfg.SOLVER.DISPLAY) == 0:
logger.info(
time_util.get_progress_info(
timer, global_step, max_steps
)
)
logger.info(time_util.get_progress_info(timer, global_step, max_steps))
if global_step % cfg.SOLVER.SNAPSHOT_EVERY == 0:
self.snapshot()
......
......@@ -7,10 +7,6 @@
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/imdb.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
......
......@@ -7,10 +7,6 @@
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/factory.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
......@@ -18,6 +14,7 @@ from __future__ import division
from __future__ import print_function
import os
from seetadet.datasets import kpl_dataset
......
......@@ -7,10 +7,6 @@
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
......@@ -18,7 +14,6 @@ from __future__ import division
from __future__ import print_function
import os
import dragon
from seetadet.core.config import cfg
......
......@@ -95,8 +95,7 @@ class AirNet(nn.Module):
def __init__(self, blocks, num_stages):
super(AirNet, self).__init__()
self.dim_in, filters = 64, [64, 128, 256, 384]
self.feature_dims = [None, None] + \
filters[1:num_stages - 1]
self.feature_dims = [None, None] + filters[1:num_stages - 1]
self.conv1 = nn.Conv2d(
3, 64,
kernel_size=7,
......
......@@ -113,10 +113,10 @@ class Detector(nn.Module):
# 1. Extract features
# Process the data:
# 0) CPU => CUDA
# 1) NHWC => NCHW
# 2) uint8 => float32 or float16
# 3) Mean subtraction
# 1) CPU => CUDA
# 2) NHWC => NCHW
# 3) uint8 => float32 or float16
# 4) Mean subtraction
image_data = self.bootstrap(inputs['data'])
features = self.body(image_data)
......
......@@ -27,7 +27,7 @@ from seetadet.modules import vision
class FastRCNN(nn.Module):
"""Generate proposal regions for R-CNN series.
r"""Generate proposal regions for R-CNN series.
The pipeline is as follows:
......@@ -36,8 +36,10 @@ class FastRCNN(nn.Module):
... -> Features / \-> bbox_pred -> bbox_loss
"""
def __init__(self, dim_in=256):
super(FastRCNN, self).__init__()
self.data = {}
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
......@@ -130,12 +132,9 @@ class FastRCNN(nn.Module):
# Compute rcnn losses
bbox_pred = outputs['bbox_pred'].view(0, -1, 4) \
.index_select((0, 1), self.data['bbox_indices'])
bbox_loss_weight = \
cfg.MODEL.REG_LOSS_WEIGHT / (
bbox_loss_weight = cfg.MODEL.REG_LOSS_WEIGHT / (
roi_features.shape[0] if isinstance(
self.bbox_loss, nn.SmoothL1Loss
) else 1.
)
self.bbox_loss, nn.SmoothL1Loss) else 1.)
outputs.update(collections.OrderedDict([
('cls_loss', self.cls_loss(
cls_score, self.data['labels'])),
......
......@@ -13,13 +13,10 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.torch.nn import functional as nn_funcs
from seetadet.core.config import cfg
from seetadet.modules import init
from seetadet.modules import nn
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
......@@ -65,11 +62,9 @@ class FPN(nn.Module):
for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample(
fpn_input, scale_factor=2)
upscale_output = nn.upsample(fpn_input, scale_factor=2)
else:
upscale_output = nn_funcs.upsample(
fpn_input, size=lateral_output.shape[2:])
upscale_output = nn.upsample(fpn_input, size=lateral_output.shape[2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
......@@ -88,11 +83,9 @@ class FPN(nn.Module):
for i in range(self.highest_backbone_lvl - 1, min_lvl - 1, -1):
lateral_output = self.C[i - min_lvl](features[i - 1])
if self.coarsest_stride > 0:
upscale_output = nn_funcs.upsample(
fpn_input, scale_factor=2)
upscale_output = nn.upsample(fpn_input, scale_factor=2)
else:
upscale_output = nn_funcs.upsample(
fpn_input, size=lateral_output.shape[2:])
upscale_output = nn.upsample(fpn_input, size=lateral_output.shape[2:])
fpn_input = lateral_output.__iadd__(upscale_output)
outputs.insert(0, self.P[i - min_lvl](fpn_input))
return outputs
......
......@@ -27,8 +27,7 @@ from seetadet.modules import vision
class MaskRCNN(nn.Module):
def __init__(self, dim_in=256):
"""Generate mask regions for R-CNN series.
r"""Generate mask regions for R-CNN series.
The pipeline is as follows:
......@@ -41,7 +40,10 @@ class MaskRCNN(nn.Module):
... -> Features /
"""
def __init__(self, dim_in=256):
super(MaskRCNN, self).__init__()
self.data = {}
self.roi_head_dim = dim_in * (cfg.FRCNN.ROI_XFORM_RESOLUTION ** 2)
self.fc6 = nn.Linear(self.roi_head_dim, cfg.FRCNN.MLP_HEAD_DIM)
self.fc7 = nn.Linear(cfg.FRCNN.MLP_HEAD_DIM, cfg.FRCNN.MLP_HEAD_DIM)
......@@ -67,6 +69,7 @@ class MaskRCNN(nn.Module):
self.cls_loss = nn.CrossEntropyLoss()
self.bbox_loss = nn.SmoothL1Loss(reduction='sum')
self.mask_loss = nn.BCEWithLogitsLoss()
self.compute_mask_score = None
# Compute spatial scales according to strides
self.spatial_scales = [
1. / (2 ** lvl)
......
......@@ -51,16 +51,19 @@ def conv_quintet(dim_in, dim_out, ks, stride):
class Setting(object):
V2 = (
[2, 3, 4, 3, 3, 1],
[2, 2, 2, 1, 2, 1],
[32, 16, 24, 32, 64, 96, 160, 320, 1280],
)
PROXYLESS_MOBILE = (
[4, 4, 4, 4, 4, 1],
[2, 2, 2, 1, 2, 1],
[32, 16, 32, 40, 80, 96, 192, 320, 1280],
)
PROXYLESS_GPU = (
[4, 4, 4, 4, 4, 1],
[2, 2, 2, 1, 2, 1],
......@@ -68,7 +71,7 @@ class Setting(object):
)
def Stem(dim_out, stride=1):
def stem(dim_out, stride=1):
return torch.nn.Sequential(
torch.nn.Conv2d(
3, dim_out,
......@@ -128,27 +131,25 @@ class NASMobileNet(nn.Module):
self.num_layers = len(choices)
assert sum(repeats) == self.num_layers
# + Stem
# Stem
self.bootstrap = vision.Bootstrap()
self.conv1 = Stem(out_channels[0], stride=2)
self.conv1 = stem(out_channels[0], stride=2)
self.stage1 = Choice(out_channels[0], out_channels[1], mb=1, ks=3)
dim_in = out_channels[1]
self.feature_dims = [out_channels[-1]]
# + Body
# Body
self.layers = []
for name, rep, dim_out, stride in zip(
names, repeats, out_channels[2:], strides):
self.layers.append(select_block(
choices[len(self.layers)]
)(dim_in, dim_out, stride=stride))
block_cls = select_block(choices[len(self.layers)])
self.layers.append(block_cls(dim_in, dim_out, stride=stride))
if stride == 2:
self.feature_dims.insert(
-1, dim_in * self.layers[-1].mb)
for i in range(rep - 1):
self.layers.append(select_block(
choices[len(self.layers)]
)(dim_out, dim_out, stride=1))
block_cls = select_block(choices[len(self.layers)])
self.layers.append(block_cls(dim_out, dim_out, stride=1))
fullname = 'stage%s' % name.split('!')[0]
seq = getattr(self, fullname, [])
seq += self.layers[-rep:]
......@@ -157,6 +158,7 @@ class NASMobileNet(nn.Module):
dim_in = dim_out
self.conv6 = nn.Sequential(*conv_triplet(dim_in, out_channels[-1]))
self.last_outputs = None
self.reset_parameters()
def reset_parameters(self):
......@@ -195,6 +197,8 @@ class NASMobileNet(nn.Module):
if y is not None:
outputs.append(y)
outputs.append(self.conv6(x))
if self.training:
self.last_outputs = outputs
return outputs
......
......@@ -157,6 +157,7 @@ class ResNet(nn.Module):
self.layer2 = self.make_blocks(block, filters[1], layers[1], 2)
self.layer3 = self.make_blocks(block, filters[2], layers[2], 2, drop3)
self.layer4 = self.make_blocks(block, filters[3], layers[3], 2, drop4)
self.last_outputs = None
self.reset_parameters()
def reset_parameters(self):
......@@ -203,7 +204,6 @@ class ResNet(nn.Module):
outputs += [self.layer4(outputs[-1])]
if self.training:
# Hold the frozen outputs if necessary
self.last_outputs = outputs
return outputs
......
......@@ -27,6 +27,7 @@ from seetadet.modules import nn
class RetinaNet(nn.Module):
def __init__(self, dim_in=256):
super(RetinaNet, self).__init__()
self.data = {}
########################################
# RetinaNet outputs #
......@@ -42,8 +43,7 @@ class RetinaNet(nn.Module):
)
# Packed as [C, A] not [A, C]
self.C = cfg.MODEL.NUM_CLASSES - 1
A = len(cfg.RETINANET.ASPECT_RATIOS) * \
cfg.RETINANET.SCALES_PER_OCTAVE
A = len(cfg.RETINANET.ASPECT_RATIOS) * cfg.RETINANET.SCALES_PER_OCTAVE
self.cls_score = nn.Conv3x3(dim_in, self.C * A, bias=True)
self.bbox_pred = nn.Conv3x3(dim_in, 4 * A, bias=True)
self.cls_prob = nn.Sigmoid(inplace=True)
......@@ -60,7 +60,6 @@ class RetinaNet(nn.Module):
self.bbox_loss = nn.IoULoss()
else:
self.bbox_loss = nn.SmoothL1Loss(0.1111)
self.centerness_loss = nn.BCEWithLogitsLoss(reduction='valid')
self.reset_parameters()
def reset_parameters(self):
......@@ -99,10 +98,10 @@ class RetinaNet(nn.Module):
if len(features) > 1:
# Concat them if necessary
return torch.cat(cls_score_wide, dim=2), \
torch.cat(bbox_pred_wide, dim=2)
return (torch.cat(cls_score_wide, dim=2),
torch.cat(bbox_pred_wide, dim=2))
else:
return cls_score_wide[0], bbox_pred_wide[0], \
return cls_score_wide[0], bbox_pred_wide[0]
def compute_losses(self, features, cls_score, bbox_pred, gt_boxes):
"""Compute the RetinaNet classification loss and regression loss.
......@@ -115,12 +114,8 @@ class RetinaNet(nn.Module):
The classification logits.
bbox_pred : dragon.vm.torch.Tensor
The bbox regression logits.
centerness : dragon.vm.torch.Tensor
The centerness logits.
gt_boxes : numpy.ndarray
The packed ground-truth boxes.
ims_info : numpy.ndarray
The information of input images.
"""
self.data = \
......@@ -144,9 +139,7 @@ class RetinaNet(nn.Module):
def forward(self, *args, **kwargs):
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = collections.OrderedDict([('bbox_pred', bbox_pred)])
if self.training:
outputs.update(
self.compute_losses(
......@@ -164,5 +157,4 @@ class RetinaNet(nn.Module):
bbox_pred,
kwargs['ims_info'],
)
return outputs
......@@ -27,6 +27,7 @@ class RPN(nn.Module):
def __init__(self, dim_in=256):
super(RPN, self).__init__()
self.data = {}
##################################
# RPN outputs #
......
......@@ -25,6 +25,7 @@ from seetadet.modules import nn
class SSD(nn.Module):
def __init__(self, feature_dims):
super(SSD, self).__init__()
self.data = {}
########################################
# SSD outputs #
......@@ -111,10 +112,10 @@ class SSD(nn.Module):
self.bbox_pred[i](bbox_x)
.permute(0, 2, 3, 1).view(0, -1))
# Concat them if necessary
return \
torch.cat(cls_score_wide, dim=1).view(0, -1, cfg.MODEL.NUM_CLASSES), \
torch.cat(bbox_pred_wide, dim=1).view(0, -1, self.box_dim)
return (torch.cat(cls_score_wide, dim=1)
.view(0, -1, cfg.MODEL.NUM_CLASSES),
torch.cat(bbox_pred_wide, dim=1)
.view(0, -1, self.box_dim))
def compute_losses(
self,
......@@ -181,12 +182,10 @@ class SSD(nn.Module):
prior_boxes = self.prior_box(kwargs['features'])
cls_score, bbox_pred = self.compute_outputs(kwargs['features'])
cls_score, bbox_pred = cls_score.float(), bbox_pred.float()
outputs = collections.OrderedDict([
('bbox_pred', bbox_pred),
('prior_boxes', prior_boxes),
])
if self.training:
outputs.update(
self.compute_losses(
......@@ -200,5 +199,4 @@ class SSD(nn.Module):
else:
outputs['cls_prob'] = \
self.softmax(cls_score)
return outputs
......@@ -89,6 +89,7 @@ class VGG(nn.Module):
stride=kps[2]
),
)
self.last_outputs = None
self.reset_parameters()
def reset_parameters(self):
......@@ -144,7 +145,6 @@ class VGG(nn.Module):
outputs.append(x)
if self.training:
# Hold the frozen outputs if necessary
self.last_outputs = outputs
return outputs
......
......@@ -14,12 +14,12 @@ from __future__ import division
from __future__ import print_function
from dragon.vm.torch import nn
from dragon.vm.torch.autograd.function import Function
from dragon.vm.torch import autograd
from seetadet.core.config import cfg
class _NonMaxSuppression(Function):
class _NonMaxSuppression(autograd.Function):
"""Filter out boxes that have high IoU with selected ones."""
def __init__(self, key, dev, **kwargs):
......@@ -36,7 +36,7 @@ class _NonMaxSuppression(Function):
return self.dispatch([dets], [self.alloc()])
class _RetinaNetDecoder(Function):
class _RetinaNetDecoder(autograd.Function):
"""Decode predictions from RetinaNet."""
def __init__(self, key, dev, **kwargs):
......@@ -61,7 +61,7 @@ class _RetinaNetDecoder(Function):
return self.dispatch(inputs, [self.alloc()], check_device=False)
class _RPNDecoder(Function):
class _RPNDecoder(autograd.Function):
"""Decode proposal regions from RPN."""
def __init__(self, key, dev, **kwargs):
......
......@@ -18,7 +18,6 @@ from __future__ import print_function
import dragon
from dragon.vm import torch
from dragon.vm.torch import nn
from dragon.vm.torch.nn import functional
from seetadet.core.config import cfg
......@@ -72,9 +71,11 @@ class CrossEntropyLoss(object):
class IoULoss(nn.Module):
def __init__(self, reduction='mean', delta_weights=None):
super(IoULoss, self).__init__()
self.data = {} # Store the detached tensors
self.reduction = reduction
self.delta_weights = delta_weights
# Store the detached tensors
self.data = {}
self.x1, self.y1, self.x2, self.y2 = None, None, None, None
def transform_inv(self, boxes, deltas, name=None):
widths = boxes[:, 2] - boxes[:, 0]
......@@ -166,8 +167,8 @@ class L2Normalize(nn.Module):
self.weight = nn.Parameter(torch.Tensor(num_features).fill_(init))
def forward(self, input):
out = functional.normalize(input, p=2, dim=1, eps=1e-5)
out = functional.affine(out, self.weight)
out = nn.functional.normalize(input, p=2, dim=1, eps=1e-5)
out = nn.functional.affine(out, self.weight)
return out
......@@ -198,13 +199,14 @@ class SmoothL1Loss(nn.Module):
self.reduction = reduction
def forward(self, input, target, *args):
return functional.smooth_l1_loss(
return nn.functional.smooth_l1_loss(
input, target,
beta=self.beta,
reduction=self.reduction,
)
# Aliases
Affine = nn.Affine
AvgPool2d = nn.AvgPool2d
BatchNorm2d = nn.BatchNorm2d
......@@ -220,3 +222,4 @@ ModuleList = nn.ModuleList
Sequential = nn.Sequential
Sigmoid = nn.Sigmoid
Softmax = nn.Softmax
upsample = nn.functional.upsample
......@@ -16,12 +16,14 @@ from __future__ import print_function
import functools
from dragon.vm import torch
from dragon.vm import torchvision
from dragon.vm.torch import nn
from seetadet.core.config import cfg
def roi_align(input, boxes, spatial_scale, size):
return torch.vision.ops.roi_align(
return torchvision.ops.roi_align(
input, boxes,
output_size=(size, size),
spatial_scale=spatial_scale,
......@@ -29,14 +31,14 @@ def roi_align(input, boxes, spatial_scale, size):
def roi_pool(input, boxes, spatial_scale, size):
return torch.vision.ops.roi_pool(
return torchvision.ops.roi_pool(
input, boxes,
output_size=(size, size),
spatial_scale=spatial_scale,
)
class Bootstrap(torch.nn.Module):
class Bootstrap(nn.Module):
"""Process the input to match the computation."""
def __init__(self):
......
......@@ -13,8 +13,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from dragon.vm.onnx import exporter
from dragon.vm.onnx import helper
from dragon.vm.onnx.core import exporter
from dragon.vm.onnx.core import helper
@exporter.register('RetinanetDecoder')
......
......@@ -18,7 +18,6 @@ from __future__ import division
from __future__ import print_function
import numpy as np
import dragon.vm.torch as torch
from seetadet.core.config import cfg
from seetadet.utils.image import distort_image
......
......@@ -39,12 +39,10 @@ def intersection(boxes1, boxes2):
all_pairs_max_xmin = np.maximum(x_min1, np.transpose(x_min2))
inter_heights = np.maximum(
np.zeros(all_pairs_max_ymin.shape),
all_pairs_min_ymax - all_pairs_max_ymin
)
all_pairs_min_ymax - all_pairs_max_ymin)
inter_widths = np.maximum(
np.zeros(all_pairs_max_xmin.shape),
all_pairs_min_xmax - all_pairs_max_xmin
)
all_pairs_min_xmax - all_pairs_max_xmin)
return inter_heights * inter_widths
......@@ -67,7 +65,6 @@ def iou(boxes1, boxes2):
inter = intersection(boxes1, boxes2)
area1 = boxes_area(boxes1)
area2 = boxes_area(boxes2)
union = \
np.expand_dims(area1, axis=1) + \
np.expand_dims(area2, axis=0) - inter
union = (np.expand_dims(area1, axis=1) +
np.expand_dims(area2, axis=0) - inter)
return inter / union
......@@ -17,9 +17,9 @@ import importlib.machinery
import os
import dragon
import numpy as np
from dragon.core.util import six
from dragon.vm import torch
import numpy as np
from seetadet.core.config import cfg
......@@ -61,8 +61,7 @@ def load_library(library_prefix):
"""
loader_details = (
importlib.machinery.ExtensionFileLoader,
importlib.machinery.EXTENSION_SUFFIXES
)
importlib.machinery.EXTENSION_SUFFIXES)
library_prefix = os.path.abspath(library_prefix)
lib_dir, fullname = os.path.split(library_prefix)
finder = importlib.machinery.FileFinder(lib_dir, loader_details)
......@@ -70,8 +69,7 @@ def load_library(library_prefix):
if ext_specs is None:
raise ImportError(
'Could not find the pre-built library '
'for <%s>.' % library_prefix
)
'for <%s>.' % library_prefix)
dragon.load_library(ext_specs.origin)
......
......@@ -80,25 +80,21 @@ def scale_image(img):
im_scale = float(target_size) / float(im_size_min)
if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
processed_ims.append(
cv2.resize(
processed_ims.append(cv2.resize(
img,
dsize=None,
fx=im_scale, fy=im_scale,
interpolation=cv2.INTER_LINEAR,
))
interpolation=cv2.INTER_LINEAR))
ims_scales.append(im_scale)
else:
# Scale image into a square
for target_size in cfg.TEST.SCALES:
im_scale_h = float(target_size) / img.shape[0]
im_scale_w = float(target_size) / img.shape[1]
processed_ims.append(
cv2.resize(
processed_ims.append(cv2.resize(
img,
dsize=(target_size, target_size),
interpolation=cv2.INTER_LINEAR,
))
interpolation=cv2.INTER_LINEAR))
ims_scales.append([im_scale_h, im_scale_w])
return processed_ims, ims_scales
......@@ -31,21 +31,16 @@ _logger_lock = threading.Lock()
def get_logger():
global _logger
# Use double-checked locking to avoid taking lock unnecessarily.
if _logger:
return _logger
_logger_lock.acquire()
try:
if _logger:
return _logger
logger = _logging.getLogger('SeetaDet')
logger.setLevel('INFO')
logger.propagate = False
if True:
# Determine whether we are in an interactive environment
_interactive = False
......@@ -56,7 +51,6 @@ def get_logger():
except AttributeError:
# Even now, we may be in an interactive shell with `python -i`.
_interactive = _sys.flags.interactive
# If we are in an interactive environment (like Jupyter), set loglevel
# to INFO and pipe the output to stdout.
if _interactive:
......@@ -64,15 +58,12 @@ def get_logger():
_logging_target = _sys.stdout
else:
_logging_target = _sys.stderr
# Add the output handler.
_handler = _logging.StreamHandler(_logging_target)
_handler.setFormatter(_logging.Formatter('%(levelname)s %(message)s'))
logger.addHandler(_handler)
_logger = logger
return _logger
finally:
_logger_lock.release()
......@@ -106,11 +97,6 @@ def info(msg, *args, **kwargs):
get_logger().info(_detailed_msg(msg), *args, **kwargs)
def warn(msg, *args, **kwargs):
if is_root():
get_logger().warn(_detailed_msg(msg), *args, **kwargs)
def warning(msg, *args, **kwargs):
if is_root():
get_logger().warning(_detailed_msg(msg), *args, **kwargs)
......
......@@ -26,15 +26,10 @@ from seetadet.utils import boxes as box_util
def dismantle_masks(gt_boxes, gt_masks, num_images):
"""Dismantle the packed ground-truth boxes."""
return [
gt_boxes[
np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
][:, :-1] for i in range(num_images)
], [
gt_masks[
np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]
] for i in range(num_images)
]
return ([gt_boxes[np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]][:, :-1]
for i in range(num_images)],
[gt_masks[np.where(gt_boxes[:, -1].astype(np.int32) == i)[0]]
for i in range(num_images)])
def intersect_box_mask(ex_box, gt_box, gt_mask):
......@@ -66,18 +61,14 @@ def mask_overlap(box1, box2, mask1, mask2):
return 0
w = x2 - x1 + 1
h = y2 - y1 + 1
# Get masks in the intersection part
start_ya = y1 - box1[1]
start_xa = x1 - box1[0]
inter_mask_a = mask1[start_ya: start_ya + h, start_xa:start_xa + w]
start_yb = y1 - box2[1]
start_xb = x1 - box2[0]
inter_mask_b = mask2[start_yb: start_yb + h, start_xb:start_xb + w]
assert inter_mask_a.shape == inter_mask_b.shape
inter = np.logical_and(inter_mask_b, inter_mask_a).sum()
union = mask1.sum() + mask2.sum() - inter
if union < 1.:
......
......@@ -7,10 +7,6 @@
#
# <https://opensource.org/licenses/BSD-2-Clause>
#
# Codes are based on:
#
# <https://github.com/facebookresearch/Detectron/blob/master/lib/utils/logging.py>
#
# ------------------------------------------------------------
from __future__ import absolute_import
......@@ -30,17 +26,17 @@ class SmoothedValue(object):
self.total = 0.0
self.count = 0
def AddValue(self, value):
def add_value(self, value):
self.deque.append(value)
self.series.append(value)
self.count += 1
self.total += value
def GetMedianValue(self):
def get_median(self):
return np.median(self.deque)
def GetAverageValue(self):
def get_average(self):
return np.mean(self.deque)
def GetGlobalAverageValue(self):
def get_global_average(self):
return self.total / self.count
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!