Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SeetaResearch
/
SeetaDet
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit bf361560
authored
Apr 09, 2019
by
Ting PAN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Bump to 0.1.1
1 parent
c8535116
Show whitespace changes
Inline
Side-by-side
Showing
30 changed files
with
342 additions
and
143 deletions
CHANGES
README.md
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
configs/retinanet/voc_retinanet_300_R-18-FPN.yml
configs/retinanet/voc_retinanet_300_R-34-FPN.yml
configs/ssd/voc_ssd_300_AirNet-5b.yml
configs/ssd/voc_ssd_300_VGG-16.yml
lib/core/config.py
lib/datasets/factory.py
lib/faster_rcnn/data/data_transformer.py
lib/modeling/__init__.py
lib/modeling/airnet.py
lib/modeling/base.py
lib/modeling/detector.py
lib/modeling/factory.py
lib/modeling/fast_rcnn.py
lib/modeling/fpn.py
lib/modeling/resnet.py
lib/modeling/retinanet.py
lib/modeling/rpn.py
lib/modeling/ssd.py
lib/modeling/vgg.py
lib/retinanet/test.py
lib/ssd/data/preprocessing/expand.py
CHANGES
View file @
bf36156
------------------------------------------------------------------------
The list of most significant changes made over time in SeetaDet.
SeetaDet 0.1.1 (20190409)
Dragon Minimum Required (Version 0.3.0.0)
Changes:
Preview Features:
- Add RandomCrop/RandomPad for ScaleJittering.
- Add ResNet18/ResNet34/AirNet for R-CNN and RetinaNet.
- Use C++ Implemented Decoder for RetinaNet instead.
Bugs fixed:
- None
------------------------------------------------------------------------
SeetaDet 0.1.0 (20190314)
Dragon Minimum Required (Version 0.3.0.0)
...
...
README.md
View file @
bf36156
...
...
@@ -67,9 +67,11 @@ python export.py --cfg <MODEL_YAML> --exp_dir <EXP_DIR> --iter <ITERATION>
| :------: | :------: |
|
[
VGG16.SSD
](
http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.SSD.pth
)
| SSD |
|
[
VGG16.RCNN
](
http://dragon.seetatech.com/download/models/SeetaDet/imagenet/VGG16.RCNN.pth
)
| R-CNN |
|
[
R-18.Affine
](
http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-18.Affine.pth
)
| R-CNN, RetinaNet |
|
[
R-34.Affine
](
http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-34.Affine.pth
)
| R-CNN, RetinaNet |
|
[
R-50.Affine
](
http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-50.Affine.pth
)
| R-CNN, RetinaNet |
|
[
R-101.Affine
](
http://dragon.seetatech.com/download/models/SeetaDet/imagenet/R-101.Affine.pth
)
| R-CNN, RetinaNet |
|
[
AirNet.
SSD
](
http://dragon.seetatech.com/download/models/SeetaDet/imagenet/AirNet.SSD.pth
)
|
SSD |
|
[
AirNet.
Affine
](
http://dragon.seetatech.com/download/models/SeetaDet/imagenet/AirNet.Affine.pth
)
| R-CNN, RetinaNet,
SSD |
## References
...
...
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_1x.yml
View file @
bf36156
...
...
@@ -33,14 +33,14 @@ FRCNN:
ROI_XFORM_RESOLUTION
:
7
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/R-101.Affine.pth'
DATABASE
:
'
taas:
/data/coco_2014_trainval35k_lmdb'
DATABASE
:
'
/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH
:
2
USE_DIFF
:
False
# Do not use crowd objects
BATCH_SIZE
:
512
SCALES
:
[
800
]
MAX_SIZE
:
1333
TEST
:
DATABASE
:
'
taas:
/data/coco_2014_minival_lmdb'
DATABASE
:
'
/data/coco_2014_minival_lmdb'
JSON_FILE
:
'
/data/instances_minival2014.json'
PROTOCOL
:
'
coco'
RPN_POST_NMS_TOP_N
:
1000
...
...
configs/faster_rcnn/coco_faster_rcnn_R-101-FPN_2x.yml
View file @
bf36156
...
...
@@ -33,14 +33,14 @@ FRCNN:
ROI_XFORM_RESOLUTION
:
7
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/R-101.Affine.pth'
DATABASE
:
'
taas:
/data/coco_2014_trainval35k_lmdb'
DATABASE
:
'
/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH
:
2
USE_DIFF
:
False
# Do not use crowd objects
BATCH_SIZE
:
512
SCALES
:
[
800
]
MAX_SIZE
:
1333
TEST
:
DATABASE
:
'
taas:
/data/coco_2014_minival_lmdb'
DATABASE
:
'
/data/coco_2014_minival_lmdb'
JSON_FILE
:
'
/data/instances_minival2014.json'
PROTOCOL
:
'
coco'
RPN_POST_NMS_TOP_N
:
1000
...
...
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN.yml
View file @
bf36156
...
...
@@ -24,13 +24,13 @@ FRCNN:
ROI_XFORM_RESOLUTION
:
7
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/R-50.Affine.pth'
DATABASE
:
'
taas:
/data/voc_0712_trainval_lmdb'
DATABASE
:
'
/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH
:
2
BATCH_SIZE
:
128
SCALES
:
[
600
]
MAX_SIZE
:
1000
TEST
:
DATABASE
:
'
taas:
/data/voc_2007_test_lmdb'
DATABASE
:
'
/data/voc_2007_test_lmdb'
PROTOCOL
:
'
voc2007'
# 'voc2007', 'voc2010', 'coco'
RPN_POST_NMS_TOP_N
:
1000
SCALES
:
[
600
]
...
...
configs/faster_rcnn/voc_faster_rcnn_VGG-16-C4.yml
View file @
bf36156
...
...
@@ -29,14 +29,14 @@ FRCNN:
MLP_HEAD_DIM
:
4096
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/VGG16.RCNN.pth'
DATABASE
:
'
taas:
/data/voc_0712_trainval_lmdb'
DATABASE
:
'
/data/voc_0712_trainval_lmdb'
RPN_MIN_SIZE
:
16
IMS_PER_BATCH
:
2
BATCH_SIZE
:
128
SCALES
:
[
600
]
MAX_SIZE
:
1000
TEST
:
DATABASE
:
'
taas:
/data/voc_2007_test_lmdb'
DATABASE
:
'
/data/voc_2007_test_lmdb'
PROTOCOL
:
'
voc2007'
# 'voc2007', 'voc2010', 'coco'
RPN_MIN_SIZE
:
16
RPN_POST_NMS_TOP_N
:
300
...
...
configs/retinanet/coco_retinanet_400_R-50-FPN_1x.yml
View file @
bf36156
...
...
@@ -33,12 +33,12 @@ FPN:
RPN_MAX_LEVEL
:
7
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/R-50.Affine.pth'
DATABASE
:
'
taas:
/data/coco_2014_trainval35k_lmdb'
DATABASE
:
'
/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH
:
8
SCALES
:
[
400
]
MAX_SIZE
:
666
TEST
:
DATABASE
:
'
taas:
/data/coco_2014_minival_lmdb'
DATABASE
:
'
/data/coco_2014_minival_lmdb'
JSON_FILE
:
'
/data/instances_minival2014.json'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
...
...
configs/retinanet/coco_retinanet_400_R-50-FPN_4x.yml
View file @
bf36156
...
...
@@ -37,15 +37,15 @@ DROPBLOCK:
DECREMENT
:
0.000005
# * 20000 = 0.1
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/R-50.Affine.pth'
DATABASE
:
'
taas:
/data/coco_2014_trainval35k_lmdb'
DATABASE
:
'
/data/coco_2014_trainval35k_lmdb'
IMS_PER_BATCH
:
8
SCALES
:
[
400
]
MAX_SIZE
:
666
SCALE_JITTERING
:
True
COLOR_JITTERING
:
True
SCALE_RANGE
:
[
0.
8
,
1.2
]
SCALE_RANGE
:
[
0.
75
,
1.33
]
TEST
:
DATABASE
:
'
taas:
/data/coco_2014_minival_lmdb'
DATABASE
:
'
/data/coco_2014_minival_lmdb'
JSON_FILE
:
'
/data/instances_minival2014.json'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
...
...
configs/retinanet/voc_retinanet_300_AirNet-FPN.yml
0 → 100644
View file @
bf36156
NUM_GPUS
:
1
VIS
:
False
VIS_ON_FILE
:
False
MODEL
:
TYPE
:
retinanet
BACKBONE
:
airnet.fpn
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
'
cow'
,
'
diningtable'
,
'
dog'
,
'
horse'
,
'
motorbike'
,
'
person'
,
'
pottedplant'
,
'
sheep'
,
'
sofa'
,
'
train'
,
'
tvmonitor'
]
NUM_CLASSES
:
21
SOLVER
:
BASE_LR
:
0.02
WEIGHT_DECAY
:
0.0001
LR_POLICY
:
steps_with_decay
STEPS
:
[
40000
,
50000
,
60000
]
MAX_ITERS
:
60000
SNAPSHOT_ITERS
:
5000
SNAPSHOT_PREFIX
:
voc_retinanet_300
FPN
:
RPN_MIN_LEVEL
:
3
RPN_MAX_LEVEL
:
7
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/AirNet.Affine.pth'
DATABASE
:
'
/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH
:
32
SCALES
:
[
300
]
MAX_SIZE
:
500
SCALE_RANGE
:
[
0.5
,
2.0
]
SCALE_JITTERING
:
True
COLOR_JITTERING
:
True
TEST
:
DATABASE
:
'
/data/voc_2007_test_lmdb'
PROTOCOL
:
'
voc2007'
# 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH
:
1
SCALES
:
[
300
]
MAX_SIZE
:
500
NMS
:
0.45
\ No newline at end of file
configs/retinanet/voc_retinanet_300_R-18-FPN.yml
0 → 100644
View file @
bf36156
NUM_GPUS
:
1
VIS
:
False
VIS_ON_FILE
:
False
MODEL
:
TYPE
:
retinanet
BACKBONE
:
resnet18.fpn
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
'
cow'
,
'
diningtable'
,
'
dog'
,
'
horse'
,
'
motorbike'
,
'
person'
,
'
pottedplant'
,
'
sheep'
,
'
sofa'
,
'
train'
,
'
tvmonitor'
]
NUM_CLASSES
:
21
SOLVER
:
BASE_LR
:
0.01
WEIGHT_DECAY
:
0.0001
LR_POLICY
:
steps_with_decay
STEPS
:
[
40000
,
50000
,
60000
]
WARM_UP_ITERS
:
2000
MAX_ITERS
:
60000
SNAPSHOT_ITERS
:
5000
SNAPSHOT_PREFIX
:
voc_retinanet_300
FPN
:
RPN_MIN_LEVEL
:
3
RPN_MAX_LEVEL
:
7
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/R-18.Affine.pth'
DATABASE
:
'
/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH
:
32
SCALES
:
[
300
]
MAX_SIZE
:
500
SCALE_RANGE
:
[
0.5
,
2.0
]
SCALE_JITTERING
:
True
COLOR_JITTERING
:
True
TEST
:
DATABASE
:
'
/data/voc_2007_test_lmdb'
PROTOCOL
:
'
voc2007'
# 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH
:
1
SCALES
:
[
300
]
MAX_SIZE
:
500
NMS
:
0.45
\ No newline at end of file
configs/retinanet/voc_retinanet_300_R-34-FPN.yml
0 → 100644
View file @
bf36156
NUM_GPUS
:
1
VIS
:
False
VIS_ON_FILE
:
False
MODEL
:
TYPE
:
retinanet
BACKBONE
:
resnet34.fpn
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
'
cow'
,
'
diningtable'
,
'
dog'
,
'
horse'
,
'
motorbike'
,
'
person'
,
'
pottedplant'
,
'
sheep'
,
'
sofa'
,
'
train'
,
'
tvmonitor'
]
NUM_CLASSES
:
21
SOLVER
:
BASE_LR
:
0.01
WEIGHT_DECAY
:
0.0001
LR_POLICY
:
steps_with_decay
STEPS
:
[
40000
,
50000
,
60000
]
WARM_UP_ITERS
:
2000
MAX_ITERS
:
60000
SNAPSHOT_ITERS
:
5000
SNAPSHOT_PREFIX
:
voc_retinanet_300
FPN
:
RPN_MIN_LEVEL
:
3
RPN_MAX_LEVEL
:
7
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/R-34.Affine.pth'
DATABASE
:
'
/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH
:
32
SCALES
:
[
300
]
MAX_SIZE
:
500
SCALE_RANGE
:
[
0.5
,
2.0
]
SCALE_JITTERING
:
True
COLOR_JITTERING
:
True
TEST
:
DATABASE
:
'
/data/voc_2007_test_lmdb'
PROTOCOL
:
'
voc2007'
# 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH
:
1
SCALES
:
[
300
]
MAX_SIZE
:
500
NMS
:
0.45
\ No newline at end of file
configs/ssd/voc_ssd_300_AirNet-5b.yml
View file @
bf36156
...
...
@@ -29,11 +29,11 @@ SSD:
STRIDES
:
[
8
,
16
,
32
]
ASPECT_RATIOS
:
[[
1
,
2
,
0.5
],
[
1
,
2
,
0.5
],
[
1
,
2
,
0.5
]]
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/AirNet.
SSD
.pth'
DATABASE
:
'
taas:
/data/voc_0712_trainval_lmdb'
WEIGHTS
:
'
/data/models/imagenet/AirNet.
Affine
.pth'
DATABASE
:
'
/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH
:
32
TEST
:
DATABASE
:
'
taas:
/data/voc_2007_test_lmdb'
DATABASE
:
'
/data/voc_2007_test_lmdb'
PROTOCOL
:
'
voc2007'
# 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH
:
8
NMS_TOP_K
:
400
...
...
configs/ssd/voc_ssd_300_VGG-16.yml
View file @
bf36156
...
...
@@ -33,10 +33,10 @@ SSD:
[
1
,
2
,
0.5
,
3
,
0.33
],
[
1
,
2
,
0.5
],
[
1
,
2
,
0.5
]]
TRAIN
:
WEIGHTS
:
'
/data/models/imagenet/VGG16.SSD.pth'
DATABASE
:
'
taas:
/data/voc_0712_trainval_lmdb'
DATABASE
:
'
/data/voc_0712_trainval_lmdb'
IMS_PER_BATCH
:
32
TEST
:
DATABASE
:
'
taas:
/data/voc_2007_test_lmdb'
DATABASE
:
'
/data/voc_2007_test_lmdb'
PROTOCOL
:
'
voc2007'
# 'voc2007', 'voc2010', 'coco'
IMS_PER_BATCH
:
8
NMS_TOP_K
:
400
...
...
lib/core/config.py
View file @
bf36156
...
...
@@ -13,6 +13,10 @@
#
# ------------------------------------------------------------
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os.path
as
osp
import
numpy
as
np
...
...
@@ -104,9 +108,6 @@ __C.TRAIN.RPN_MIN_SIZE = 0
# Set to -1 or a large value, e.g. 100000, to disable pruning anchors
__C
.
TRAIN
.
RPN_STRADDLE_THRESH
=
0
# Resume from the last checkpoint?
__C
.
TRAIN
.
RESUME
=
False
###########################################
# #
...
...
@@ -184,6 +185,7 @@ __C.TEST.DETECTIONS_PER_IM = 100
# #
###########################################
__C
.
MODEL
=
edict
()
# The type of the model
...
...
@@ -211,11 +213,6 @@ __C.MODEL.CLASSES = ['__background__']
# Add StopGrad at a specified stage so the bottom layers are frozen
__C
.
MODEL
.
FREEZE_AT
=
2
# Whether to use bias prior to improve the one-stage detector?
# Enabled if model type in ('ssd',)
# Retinanet is force to use bias prior
__C
.
MODEL
.
USE_BIAS_PRIOR
=
False
# Whether to use focal loss for one-stage detectors?
# Enabled if model type in ('ssd',)
# Retinanet is force to use focal loss
...
...
@@ -234,6 +231,7 @@ __C.MODEL.COARSEST_STRIDE = -1
# #
###########################################
__C
.
RPN
=
edict
()
# Strides for multiple rpn heads
...
...
@@ -252,6 +250,7 @@ __C.RPN.ASPECT_RATIOS = [0.5, 1, 2]
# #
###########################################
__C
.
RETINANET
=
edict
()
# Anchor aspect ratios to use
...
...
@@ -269,7 +268,7 @@ __C.RETINANET.ANCHOR_SCALE = 4
__C
.
RETINANET
.
NUM_CONVS
=
4
# During inference, #locs to select based on cls score before NMS is performed
__C
.
RETINANET
.
PRE_NMS_TOP_N
=
1
000
__C
.
RETINANET
.
PRE_NMS_TOP_N
=
5
000
# IoU overlap ratio for labeling an anchor as positive
# Anchors with >= iou overlap are labeled positive
...
...
@@ -279,9 +278,6 @@ __C.RETINANET.POSITIVE_OVERLAP = 0.5
# Anchors with < iou overlap are labeled negative
__C
.
RETINANET
.
NEGATIVE_OVERLAP
=
0.4
# Whether softmax should be used in classification branch training
__C
.
RETINANET
.
SOFTMAX
=
False
###########################################
# #
...
...
@@ -336,6 +332,7 @@ __C.FRCNN.ROI_XFORM_RESOLUTION = 7
# #
###########################################
__C
.
MRCNN
=
edict
()
# Resolution of mask predictions
...
...
@@ -354,6 +351,7 @@ __C.MRCNN.ROI_XFORM_RESOLUTION = 14
# #
###########################################
__C
.
SSD
=
edict
()
# Whether to enable FPN enhancement?
...
...
@@ -412,6 +410,7 @@ __C.SSD.SAMPLERS = [
# #
###########################################
__C
.
RESNET
=
edict
()
# Number of groups to use; 1 ==> ResNet; > 1 ==> ResNeXt
...
...
@@ -427,6 +426,7 @@ __C.RESNET.GROUP_WIDTH = 64
# #
###########################################
__C
.
DROPBLOCK
=
edict
()
# Whether to use drop block for more regulization
...
...
@@ -442,6 +442,7 @@ __C.DROPBLOCK.DECREMENT = 1e-6
# #
###########################################
__C
.
SOLVER
=
edict
()
# Base learning rate for the specified schedule
...
...
@@ -502,6 +503,7 @@ __C.SOLVER.SNAPSHOT_PREFIX = ''
# #
###########################################
# Number of GPUs to use (applies to both training and testing)
__C
.
NUM_GPUS
=
1
...
...
@@ -523,14 +525,6 @@ __C.BBOX_REG_WEIGHTS = (10., 10., 5., 5.)
# Default weights on (dx, dy, dw, dh, da) for normalizing rbox regression targets
__C
.
RBOX_REG_WEIGHTS
=
(
10.0
,
10.0
,
5.0
,
5.0
,
10.0
)
# Clip bounding box transformation predictions to prevent np.exp from
# overflowing
# Heuristic choice based on that would scale a 16 pixel anchor up to 1000 pixels
__C
.
BBOX_XFORM_CLIP
=
np
.
log
(
1000.
/
16.
)
# Clip ?
__C
.
USE_XFORM_CLIP
=
False
# Prior prob for the positives at the beginning of training.
# This is used to set the bias init for the logits layer
__C
.
PRIOR_PROB
=
0.01
...
...
lib/datasets/factory.py
View file @
bf36156
...
...
@@ -13,6 +13,7 @@
#
# ------------------------------------------------------------
import
os
from
lib.datasets.taas
import
TaaS
...
...
@@ -26,12 +27,12 @@ def get_imdb(name):
if
len
(
keys
)
>=
2
:
cls
,
source
=
keys
[
0
],
':'
.
join
(
keys
[
1
:])
if
cls
not
in
_GLOBAL_DATA_SETS
:
raise
KeyError
(
'Unknown
datas
et: {}'
.
format
(
cls
))
raise
KeyError
(
'Unknown
DataS
et: {}'
.
format
(
cls
))
return
_GLOBAL_DATA_SETS
[
cls
](
source
)
elif
len
(
keys
)
==
1
:
return
_GLOBAL_DATA_SETS
[
name
](
)
elif
os
.
path
.
exists
(
name
)
:
return
_GLOBAL_DATA_SETS
[
'taas'
](
name
)
else
:
raise
ValueError
(
'Illegal
format of image database: {}'
.
format
(
name
)
)
raise
ValueError
(
'Illegal
Database: {}'
+
name
)
def
list_imdbs
():
...
...
lib/faster_rcnn/data/data_transformer.py
View file @
bf36156
...
...
@@ -45,7 +45,13 @@ class DataTransformer(Process):
self
.
Q_in
=
self
.
Q1_out
=
self
.
Q2_out
=
None
self
.
daemon
=
True
def
make_roidb
(
self
,
ann_datum
,
im_scale
,
flip
=
False
,
offsets
=
None
):
def
make_record
(
self
,
ann_datum
,
im_scale
,
flip
=
False
,
offsets
=
None
,
):
annotations
=
ann_datum
.
annotation
n_objects
=
0
if
not
self
.
_use_diff
:
...
...
@@ -53,35 +59,43 @@ class DataTransformer(Process):
if
not
ann
.
difficult
:
n_objects
+=
1
else
:
n_objects
=
len
(
annotations
)
r
oidb
=
{
r
ecord
=
{
'width'
:
ann_datum
.
datum
.
width
,
'height'
:
ann_datum
.
datum
.
height
,
'gt_classes'
:
np
.
zeros
((
n_objects
,),
dtype
=
np
.
int32
),
'boxes'
:
np
.
zeros
((
n_objects
,
4
),
dtype
=
np
.
float32
),
}
ix
=
0
# Filter the difficult instances
instance_idx
=
0
for
ann
in
annotations
:
if
not
self
.
_use_diff
and
ann
.
difficult
:
continue
roidb
[
'boxes'
][
ix
,
:]
=
[
max
(
0
,
ann
.
x1
),
max
(
0
,
ann
.
y1
),
record
[
'boxes'
][
instance_idx
,
:]
=
[
max
(
0
,
ann
.
x1
),
max
(
0
,
ann
.
y1
),
min
(
ann
.
x2
,
ann_datum
.
datum
.
width
-
1
),
min
(
ann
.
y2
,
ann_datum
.
datum
.
height
-
1
)]
roidb
[
'gt_classes'
][
ix
]
=
self
.
_class_to_ind
[
ann
.
name
]
ix
+=
1
min
(
ann
.
y2
,
ann_datum
.
datum
.
height
-
1
),
]
record
[
'gt_classes'
][
instance_idx
]
=
self
.
_class_to_ind
[
ann
.
name
]
instance_idx
+=
1
if
flip
:
roidb
[
'boxes'
]
=
_flip_boxes
(
roidb
[
'boxes'
],
roidb
[
'width'
])
# Flip the boxes if necessary
if
flip
:
record
[
'boxes'
]
=
_flip_boxes
(
record
[
'boxes'
],
record
[
'width'
])
roidb
[
'boxes'
]
*=
im_scale
# Scale the boxes to the detecting scale
record
[
'boxes'
]
*=
im_scale
# Apply the offsets from scale jitter
if
offsets
is
not
None
:
r
oidb
[
'boxes'
][:,
0
::
2
]
+=
offsets
[
0
]
r
oidb
[
'boxes'
][:,
1
::
2
]
+=
offsets
[
1
]
r
oidb
[
'boxes'
][:,
:]
=
np
.
minimum
(
np
.
maximum
(
r
oidb
[
'boxes'
][:,
:],
0
),
r
ecord
[
'boxes'
][:,
0
::
2
]
+=
offsets
[
0
]
r
ecord
[
'boxes'
][:,
1
::
2
]
+=
offsets
[
1
]
r
ecord
[
'boxes'
][:,
:]
=
np
.
minimum
(
np
.
maximum
(
r
ecord
[
'boxes'
][:,
:],
0
),
[
offsets
[
2
][
1
]
-
1
,
offsets
[
2
][
0
]
-
1
]
*
2
)
return
r
oidb
return
r
ecord
@classmethod
def
get_image
(
cls
,
serialized
):
...
...
@@ -121,7 +135,14 @@ class DataTransformer(Process):
target_size
=
cfg
.
TRAIN
.
SCALES
[
scale_indices
]
im
,
im_scale
,
jitter
=
prep_im_for_blob
(
im
,
target_size
,
cfg
.
TRAIN
.
MAX_SIZE
)
# Crop or Pad
# Flip
flip
=
False
if
self
.
_use_flipped
:
if
npr
.
randint
(
0
,
2
)
>
0
:
im
=
im
[:,
::
-
1
,
:]
flip
=
True
# Random Crop or RandomPad
offsets
=
None
if
cfg
.
TRAIN
.
MAX_SIZE
>
0
:
if
jitter
!=
1.0
:
...
...
@@ -132,20 +153,13 @@ class DataTransformer(Process):
# To a square (target_size, target_size)
im
,
offsets
=
_get_image_with_target_size
([
target_size
]
*
2
,
im
)
# Flip
flip
=
False
if
self
.
_use_flipped
:
if
npr
.
randint
(
0
,
2
)
>
0
:
im
=
im
[:,
::
-
1
,
:]
flip
=
True
# Datum -> RoIDB
roidb
=
self
.
make_roidb
(
datum
,
im_scale
,
flip
,
offsets
)
# Datum -> Record
rec
=
self
.
make_record
(
datum
,
im_scale
,
flip
,
offsets
)
# Post-Process for gt boxes
# Shape like: [num_objects, {x1, y1, x2, y2, cls}]
gt_boxes
=
np
.
empty
((
len
(
r
oidb
[
'gt_classes'
]),
5
),
dtype
=
np
.
float32
)
gt_boxes
[:,
0
:
4
],
gt_boxes
[:,
4
]
=
r
oidb
[
'boxes'
],
roidb
[
'gt_classes'
]
gt_boxes
=
np
.
empty
((
len
(
r
ec
[
'gt_classes'
]),
5
),
dtype
=
np
.
float32
)
gt_boxes
[:,
0
:
4
],
gt_boxes
[:,
4
]
=
r
ec
[
'boxes'
],
rec
[
'gt_classes'
]
return
im
,
im_scale
,
gt_boxes
...
...
@@ -175,16 +189,16 @@ def _flip_boxes(boxes, width):
def
_get_image_with_target_size
(
target_size
,
im
):
im_shape
=
list
(
im
.
shape
)
width_diff
=
target_size
[
1
]
-
im_shape
[
1
]
offset_crop_width
=
max
(
-
width_diff
//
2
,
0
)
offset_pad_width
=
max
(
width_diff
//
2
,
0
)
offset_crop_width
=
np
.
random
.
randint
(
0
,
max
(
-
width_diff
,
0
)
+
1
)
offset_pad_width
=
np
.
random
.
randint
(
0
,
max
(
width_diff
,
0
)
+
1
)
height_diff
=
target_size
[
0
]
-
im_shape
[
0
]
offset_crop_height
=
max
(
-
height_diff
//
2
,
0
)
offset_pad_height
=
max
(
height_diff
//
2
,
0
)
offset_crop_height
=
np
.
random
.
randint
(
0
,
max
(
-
height_diff
,
0
)
+
1
)
offset_pad_height
=
np
.
random
.
randint
(
0
,
max
(
height_diff
,
0
)
+
1
)
im_shape
[
0
:
2
]
=
target_size
new_im
=
np
.
empty
(
im_shape
,
dtype
=
im
.
dtype
)
new_im
.
fill
(
127
)
new_im
[:]
=
cfg
.
PIXEL_MEANS
new_im
[
offset_pad_height
:
offset_pad_height
+
im
.
shape
[
0
],
offset_pad_width
:
offset_pad_width
+
im
.
shape
[
1
]]
=
\
...
...
lib/modeling/__init__.py
View file @
bf36156
...
...
@@ -11,6 +11,9 @@
# Import custom modules
from
lib.modeling.base
import
Bootstarp
from
lib.modeling.base
import
RPNDecoder
from
lib.modeling.base
import
RetinaNetDecoder
from
lib.modeling.base
import
conv1x1
,
conv3x3
,
bn
,
affine
from
lib.modeling.fpn
import
FPN
from
lib.modeling.rpn
import
RPN
from
lib.modeling.fast_rcnn
import
FastRCNN
...
...
lib/modeling/airnet.py
View file @
bf36156
...
...
@@ -15,16 +15,16 @@ from __future__ import print_function
import
dragon.vm.torch
as
torch
from
lib.modeling
.base
import
conv1x1
,
conv3x3
,
bn
from
lib.modeling
import
conv1x1
,
conv3x3
,
bn
,
affine
class
WideResBlock
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
dim_in
,
dim_out
,
stride
=
1
,
downsample
=
None
):
super
(
WideResBlock
,
self
)
.
__init__
()
self
.
conv1
=
conv3x3
(
dim_in
,
dim_out
,
stride
)
self
.
bn1
=
bn
(
dim_out
,
eps
=
1e-3
)
self
.
bn1
=
affine
(
dim_out
)
self
.
conv2
=
conv3x3
(
dim_out
,
dim_out
)
self
.
bn2
=
bn
(
dim_out
,
eps
=
1e-3
)
self
.
bn2
=
affine
(
dim_out
)
self
.
downsample
=
downsample
self
.
relu
=
torch
.
nn
.
ReLU
(
inplace
=
True
)
...
...
@@ -50,15 +50,15 @@ class InceptionBlock(torch.nn.Module):
def
__init__
(
self
,
dim_in
,
dim_out
):
super
(
InceptionBlock
,
self
)
.
__init__
()
self
.
conv1
=
conv1x1
(
dim_in
,
dim_out
)
self
.
bn1
=
bn
(
dim_out
,
eps
=
1e-3
)
self
.
bn1
=
affine
(
dim_out
)
self
.
conv2
=
conv3x3
(
dim_out
,
dim_out
//
2
)
self
.
bn2
=
bn
(
dim_out
//
2
,
eps
=
1e-3
)
self
.
bn2
=
affine
(
dim_out
//
2
)
self
.
conv3a
=
conv3x3
(
dim_out
//
2
,
dim_out
)
self
.
bn3a
=
bn
(
dim_out
,
eps
=
1e-3
)
self
.
bn3a
=
affine
(
dim_out
)
self
.
conv3b
=
conv3x3
(
dim_out
,
dim_out
)
self
.
bn3b
=
bn
(
dim_out
,
eps
=
1e-3
)
self
.
bn3b
=
affine
(
dim_out
)
self
.
conv4
=
conv3x3
(
dim_out
*
3
,
dim_out
)
self
.
bn4
=
bn
(
dim_out
,
eps
=
1e-3
)
self
.
bn4
=
affine
(
dim_out
)
self
.
relu
=
torch
.
nn
.
ReLU
(
inplace
=
True
)
def
forward
(
self
,
x
):
...
...
@@ -93,7 +93,8 @@ class AirNet(torch.nn.Module):
def
__init__
(
self
,
blocks
,
num_stages
):
super
(
AirNet
,
self
)
.
__init__
()
self
.
dim_in
,
filters
=
64
,
[
64
,
128
,
256
,
384
]
self
.
feature_dims
=
filters
[
1
:
num_stages
-
1
]
self
.
feature_dims
=
[
None
,
None
]
+
\
filters
[
1
:
num_stages
-
1
]
self
.
conv1
=
torch
.
nn
.
Conv2d
(
3
,
64
,
kernel_size
=
7
,
...
...
@@ -101,7 +102,7 @@ class AirNet(torch.nn.Module):
padding
=
3
,
bias
=
False
,
)
self
.
bn1
=
bn
(
self
.
dim_in
,
eps
=
1e-3
)
self
.
bn1
=
affine
(
self
.
dim_in
)
self
.
relu
=
torch
.
nn
.
ReLU
(
inplace
=
True
)
self
.
maxpool
=
torch
.
nn
.
MaxPool2d
(
kernel_size
=
2
,
...
...
@@ -128,7 +129,7 @@ class AirNet(torch.nn.Module):
def
make_blocks
(
self
,
dim_out
,
blocks
,
stride
=
1
):
downsample
=
torch
.
nn
.
Sequential
(
conv1x1
(
self
.
dim_in
,
dim_out
,
stride
=
stride
),
bn
(
dim_out
,
eps
=
1e-3
),
affine
(
dim_out
),
)
layers
=
[
WideResBlock
(
self
.
dim_in
,
dim_out
,
stride
,
downsample
)]
self
.
dim_in
=
dim_out
...
...
@@ -148,7 +149,7 @@ class AirNet(torch.nn.Module):
x
=
self
.
maxpool
(
x
)
x
=
self
.
layer1
(
x
)
outputs
=
[
self
.
layer2
(
x
)]
outputs
=
[
None
,
None
,
self
.
layer2
(
x
)]
if
hasattr
(
self
,
'layer3'
):
outputs
+=
[
self
.
layer3
(
outputs
[
-
1
])]
if
hasattr
(
self
,
'layer4'
):
outputs
+=
[
self
.
layer4
(
outputs
[
-
1
])]
...
...
@@ -164,7 +165,7 @@ def airnet(num_stages):
)
return
AirNet
(
blocks
,
num_stages
)
def
make_airnet_
():
return
airnet
(
5
)
def
make_airnet_3b
():
return
airnet
(
3
)
def
make_airnet_4b
():
return
airnet
(
4
)
def
make_airnet_5b
():
return
airnet
(
5
)
\ No newline at end of file
lib/modeling/base.py
View file @
bf36156
...
...
@@ -44,11 +44,11 @@ class Bootstarp(torch.nn.Module):
return
self
.
run
(
inputs
,
outputs
)
class
ProposalCXX
(
torch
.
nn
.
Module
):
"""
Extended operator to generate proposal regions
."""
class
RPNDecoder
(
torch
.
nn
.
Module
):
"""
Generate proposal regions from RPN
."""
def
__init__
(
self
):
super
(
ProposalCXX
,
self
)
.
__init__
()
super
(
RPNDecoder
,
self
)
.
__init__
()
self
.
register_op
()
self
.
K
=
(
cfg
.
FPN
.
ROI_MAX_LEVEL
-
cfg
.
FPN
.
ROI_MIN_LEVEL
+
1
)
\
...
...
@@ -58,6 +58,7 @@ class ProposalCXX(torch.nn.Module):
self
.
op_meta
=
{
'op_type'
:
'Proposal'
,
'arguments'
:
{
'det_type'
:
'RCNN'
,
'strides'
:
cfg
.
RPN
.
STRIDES
,
'ratios'
:
[
float
(
e
)
for
e
in
cfg
.
RPN
.
ASPECT_RATIOS
],
'scales'
:
[
float
(
e
)
for
e
in
cfg
.
RPN
.
SCALES
],
...
...
@@ -79,6 +80,38 @@ class ProposalCXX(torch.nn.Module):
return
outputs
if
isinstance
(
outputs
,
list
)
else
[
outputs
]
class
RetinaNetDecoder
(
torch
.
nn
.
Module
):
"""Generate proposal regions from retinanet."""
def
__init__
(
self
):
super
(
RetinaNetDecoder
,
self
)
.
__init__
()
k_max
,
k_min
=
cfg
.
FPN
.
RPN_MAX_LEVEL
,
cfg
.
FPN
.
RPN_MIN_LEVEL
scales_per_octave
=
cfg
.
RETINANET
.
SCALES_PER_OCTAVE
self
.
strides
=
[
int
(
2.
**
lvl
)
for
lvl
in
range
(
k_min
,
k_max
+
1
)]
self
.
scales
=
[
cfg
.
RETINANET
.
ANCHOR_SCALE
*
(
2
**
(
octave
/
float
(
scales_per_octave
)))
for
octave
in
range
(
scales_per_octave
)]
self
.
register_op
()
def
register_op
(
self
):
self
.
op_meta
=
{
'op_type'
:
'Proposal'
,
'arguments'
:
{
'det_type'
:
'RETINANET'
,
'strides'
:
self
.
strides
,
'scales'
:
self
.
scales
,
'ratios'
:
[
float
(
e
)
for
e
in
cfg
.
RETINANET
.
ASPECT_RATIOS
],
'pre_nms_top_n'
:
cfg
.
RETINANET
.
PRE_NMS_TOP_N
,
'score_thresh'
:
cfg
.
TEST
.
SCORE_THRESH
,
}
}
def
forward
(
self
,
features
,
cls_prob
,
bbox_pred
,
ims_info
):
inputs
=
features
+
[
cls_prob
,
bbox_pred
,
to_tensor
(
ims_info
)]
outputs
=
[
self
.
register_output
()]
return
self
.
run
(
inputs
,
outputs
)
def
conv1x1
(
dim_in
,
dim_out
,
stride
=
1
,
bias
=
False
):
"""1x1 convolution."""
return
torch
.
nn
.
Conv2d
(
...
...
lib/modeling/detector.py
View file @
bf36156
...
...
@@ -22,9 +22,12 @@ from lib.utils.logger import is_root
from
lib.modeling.factory
import
get_body_func
from
lib.modeling
import
(
Bootstarp
,
FPN
,
RPN
,
Bootstarp
,
FPN
,
RPN
,
FastRCNN
,
RetinaNet
,
SSD
,
RetinaNet
,
SSD
,
)
...
...
@@ -144,6 +147,7 @@ class Detector(torch.nn.Module):
# 3.3 Feature -> SSD
if
hasattr
(
self
,
'ssd'
):
features
=
list
(
filter
(
None
,
features
))
outputs
.
update
(
self
.
ssd
(
features
=
features
,
...
...
lib/modeling/factory.py
View file @
bf36156
...
...
@@ -55,7 +55,7 @@ for D in [16, 19]:
'lib.modeling.vgg.make_vgg_{}{}'
.
format
(
D
,
T
)
# AirNet
for
D
in
[
'3b'
,
'4b'
,
'5b'
]:
for
D
in
[
'
'
,
'
3b'
,
'4b'
,
'5b'
]:
_STORE
[
'BODY'
][
'airnet{}'
.
format
(
D
)]
=
\
'lib.modeling.airnet.make_airnet_{}'
.
format
(
D
)
...
...
lib/modeling/fast_rcnn.py
View file @
bf36156
...
...
@@ -17,7 +17,7 @@ import dragon.vm.torch as torch
from
collections
import
OrderedDict
from
lib.core.config
import
cfg
from
lib.modeling
.base
import
ProposalCXX
from
lib.modeling
import
RPNDecoder
class
FastRCNN
(
torch
.
nn
.
Module
):
...
...
@@ -43,7 +43,7 @@ class FastRCNN(torch.nn.Module):
self
.
fc7
=
torch
.
nn
.
Linear
(
cfg
.
FRCNN
.
MLP_HEAD_DIM
,
cfg
.
FRCNN
.
MLP_HEAD_DIM
)
self
.
cls_score
=
torch
.
nn
.
Linear
(
cfg
.
FRCNN
.
MLP_HEAD_DIM
,
cfg
.
MODEL
.
NUM_CLASSES
)
self
.
bbox_pred
=
torch
.
nn
.
Linear
(
cfg
.
FRCNN
.
MLP_HEAD_DIM
,
cfg
.
MODEL
.
NUM_CLASSES
*
4
)
self
.
proposal_cxx
=
ProposalCXX
()
self
.
rpn_decoder
=
RPNDecoder
()
self
.
proposal_layer
=
ProposalLayer
()
self
.
proposal_target_layer
=
ProposalTargetLayer
()
self
.
softmax
=
torch
.
nn
.
Softmax
(
dim
=
1
)
...
...
@@ -80,7 +80,7 @@ class FastRCNN(torch.nn.Module):
# Generate Proposals
# Apply the CXX implementation during inference
proposal_func
=
self
.
proposal_layer
\
if
self
.
training
else
self
.
proposal_cxx
if
self
.
training
else
self
.
rpn_decoder
self
.
rcnn_data
=
{
'rois'
:
proposal_func
(
kwargs
[
'features'
],
...
...
lib/modeling/fpn.py
View file @
bf36156
...
...
@@ -16,7 +16,7 @@ from __future__ import print_function
import
dragon.vm.torch
as
torch
from
lib.core.config
import
cfg
from
lib.modeling
.base
import
conv1x1
,
conv3x3
from
lib.modeling
import
conv1x1
,
conv3x3
HIGHEST_BACKBONE_LVL
=
5
# E.g., "conv5"-like level
...
...
lib/modeling/resnet.py
View file @
bf36156
...
...
@@ -20,12 +20,10 @@ from __future__ import print_function
import
dragon.vm.torch
as
torch
from
lib.core.config
import
cfg
from
lib.modeling
.base
import
conv1x1
,
conv3x3
,
affine
from
lib.modeling
import
conv1x1
,
conv3x3
,
affine
class
BasicBlock
(
torch
.
nn
.
Module
):
expansion
=
1
def
__init__
(
self
,
dim_in
,
dim_out
,
stride
=
1
,
downsample
=
None
,
dropblock
=
None
):
super
(
BasicBlock
,
self
)
.
__init__
()
...
...
@@ -110,9 +108,9 @@ class Bottleneck(torch.nn.Module):
class
ResNet
(
torch
.
nn
.
Module
):
def
__init__
(
self
,
block
,
layers
):
def
__init__
(
self
,
block
,
layers
,
filters
):
super
(
ResNet
,
self
)
.
__init__
()
self
.
dim_in
,
filters
=
64
,
[
256
,
512
,
1024
,
2048
]
self
.
dim_in
,
filters
=
filters
[
0
],
filters
[
1
:
]
self
.
feature_dims
=
[
self
.
dim_in
]
+
filters
self
.
conv1
=
torch
.
nn
.
Conv2d
(
3
,
64
,
...
...
@@ -200,9 +198,13 @@ def resnet(depth):
elif
depth
==
269
:
units
=
[
3
,
30
,
48
,
8
]
else
:
raise
ValueError
(
'Unsupported depth:
%
d'
%
depth
)
block
=
Bottleneck
if
depth
>=
50
else
BasicBlock
return
ResNet
(
block
,
units
)
filters
=
[
64
,
256
,
512
,
1024
,
2048
]
\
if
depth
>=
50
else
[
64
,
64
,
128
,
256
,
512
]
return
ResNet
(
block
,
units
,
filters
)
def
make_resnet_18
():
return
resnet
(
18
)
def
make_resnet_34
():
return
resnet
(
34
)
def
make_resnet_50
():
return
resnet
(
50
)
def
make_resnet_101
():
return
resnet
(
101
)
def
make_resnet_152
():
return
resnet
(
152
)
\ No newline at end of file
lib/modeling/retinanet.py
View file @
bf36156
...
...
@@ -18,8 +18,8 @@ import dragon.vm.torch as torch
from
collections
import
OrderedDict
from
lib.core.config
import
cfg
from
lib.modeling
.base
import
conv3x3
from
lib.retinanet
import
AnchorTargetLayer
,
ProposalLayer
from
lib.modeling
import
conv3x3
,
RetinaNetDecoder
from
lib.retinanet
import
AnchorTargetLayer
class
RetinaNet
(
torch
.
nn
.
Module
):
...
...
@@ -37,29 +37,20 @@ class RetinaNet(torch.nn.Module):
conv3x3
(
dim_in
,
dim_in
,
bias
=
True
)
for
_
in
range
(
cfg
.
RETINANET
.
NUM_CONVS
))
# Packed as [C, A] not [A, C]
self
.
C
=
cfg
.
MODEL
.
NUM_CLASSES
\
if
cfg
.
RETINANET
.
SOFTMAX
\
else
cfg
.
MODEL
.
NUM_CLASSES
-
1
self
.
C
=
cfg
.
MODEL
.
NUM_CLASSES
-
1
A
=
len
(
cfg
.
RETINANET
.
ASPECT_RATIOS
)
*
\
cfg
.
RETINANET
.
SCALES_PER_OCTAVE
self
.
cls_score
=
conv3x3
(
dim_in
,
self
.
C
*
A
,
bias
=
True
)
self
.
bbox_pred
=
conv3x3
(
dim_in
,
4
*
A
,
bias
=
True
)
self
.
cls_prob
=
torch
.
nn
.
Softmax
(
dim
=
1
,
inplace
=
True
)
\
if
cfg
.
RETINANET
.
SOFTMAX
else
torch
.
nn
.
Sigmoid
(
inplace
=
True
)
self
.
cls_prob
=
torch
.
nn
.
Sigmoid
(
inplace
=
True
)
self
.
relu
=
torch
.
nn
.
ELU
(
inplace
=
True
)
self
.
proposal_layer
=
ProposalLay
er
()
self
.
decoder
=
RetinaNetDecod
er
()
########################################
# RetinaNet losses #
########################################
self
.
anchor_target_layer
=
AnchorTargetLayer
()
if
cfg
.
RETINANET
.
SOFTMAX
:
self
.
cls_loss
=
torch
.
nn
.
SoftmaxFocalLoss
(
ignore_index
=-
1
,
alpha
=
cfg
.
MODEL
.
FOCAL_LOSS_ALPHA
,
gamma
=
cfg
.
MODEL
.
FOCAL_LOSS_GAMMA
)
else
:
self
.
cls_loss
=
torch
.
nn
.
SigmoidFocalLoss
(
alpha
=
cfg
.
MODEL
.
FOCAL_LOSS_ALPHA
,
gamma
=
cfg
.
MODEL
.
FOCAL_LOSS_GAMMA
)
...
...
@@ -77,13 +68,6 @@ class RetinaNet(torch.nn.Module):
# Bias prior initialization for Focal Loss
# For details, See the official codes:
# https://github.com/facebookresearch/Detectron
if
cfg
.
RETINANET
.
SOFTMAX
:
bias
=
self
.
cls_score
.
bias
.
numpy
()
bias
=
bias
.
reshape
((
cfg
.
MODEL
.
NUM_CLASSES
,
-
1
))
bias
[
0
,
:]
=
math
.
log
(
(
cfg
.
MODEL
.
NUM_CLASSES
-
1
)
*
(
1
-
cfg
.
PRIOR_PROB
)
/
cfg
.
PRIOR_PROB
)
else
:
self
.
cls_score
.
bias
.
fill_
(
-
math
.
log
((
1
-
cfg
.
PRIOR_PROB
)
/
cfg
.
PRIOR_PROB
))
...
...
@@ -114,9 +98,12 @@ class RetinaNet(torch.nn.Module):
return
cls_score_wide
[
0
],
bbox_pred_wide
[
0
]
def
compute_losses
(
self
,
features
,
cls_score
,
bbox_pred
,
gt_boxes
,
ims_info
,
self
,
features
,
cls_score
,
bbox_pred
,
gt_boxes
,
ims_info
,
):
"""Compute the RetinaNet classification loss and regression loss.
...
...
@@ -173,9 +160,10 @@ class RetinaNet(torch.nn.Module):
)
else
:
outputs
[
'detections'
]
=
\
self
.
proposal_lay
er
(
self
.
decod
er
(
kwargs
[
'features'
],
self
.
cls_prob
(
cls_score
),
self
.
cls_prob
(
cls_score
)
.
permute
(
0
,
2
,
1
),
bbox_pred
,
kwargs
[
'ims_info'
],
)
...
...
lib/modeling/rpn.py
View file @
bf36156
...
...
@@ -17,7 +17,7 @@ import dragon.vm.torch as torch
from
collections
import
OrderedDict
from
lib.core.config
import
cfg
from
lib.modeling
.base
import
conv1x1
,
conv3x3
from
lib.modeling
import
conv1x1
,
conv3x3
class
RPN
(
torch
.
nn
.
Module
):
...
...
@@ -59,7 +59,6 @@ class RPN(torch.nn.Module):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
torch
.
nn
.
Conv2d
):
torch
.
nn
.
init
.
normal_
(
m
.
weight
,
std
=
0.01
)
torch
.
nn
.
init
.
constant_
(
m
.
bias
,
0
)
def
compute_outputs
(
self
,
features
):
"""Compute the RPN logits.
...
...
@@ -91,9 +90,12 @@ class RPN(torch.nn.Module):
return
cls_score_wide
[
0
],
bbox_pred_wide
[
0
]
def
compute_losses
(
self
,
features
,
cls_score
,
bbox_pred
,
gt_boxes
,
ims_info
,
self
,
features
,
cls_score
,
bbox_pred
,
gt_boxes
,
ims_info
,
):
"""Compute the RPN classification loss and regression loss.
...
...
lib/modeling/ssd.py
View file @
bf36156
...
...
@@ -17,11 +17,13 @@ import dragon.vm.torch as torch
from
collections
import
OrderedDict
from
lib.core.config
import
cfg
from
lib.modeling
.base
import
conv3x3
from
lib.modeling
import
conv3x3
from
lib.ssd
import
(
PriorBoxLayer
,
MultiBoxMatchLayer
,
HardMiningLayer
,
MultiBoxTargetLayer
,
PriorBoxLayer
,
MultiBoxMatchLayer
,
HardMiningLayer
,
MultiBoxTargetLayer
,
)
...
...
@@ -38,6 +40,8 @@ class SSD(torch.nn.Module):
self
.
softmax
=
torch
.
nn
.
Softmax
(
dim
=
2
)
C
=
cfg
.
MODEL
.
NUM_CLASSES
feature_dims
=
list
(
filter
(
None
,
feature_dims
))
for
i
,
dim_in
in
enumerate
(
feature_dims
):
A
=
len
(
cfg
.
SSD
.
MULTIBOX
.
ASPECT_RATIOS
[
i
])
+
1
self
.
cls_score
.
append
(
conv3x3
(
dim_in
,
A
*
C
,
bias
=
True
))
...
...
@@ -89,8 +93,12 @@ class SSD(torch.nn.Module):
torch
.
cat
(
bbox_pred_wide
,
dim
=
1
)
.
view
(
0
,
-
1
,
4
)
def
compute_losses
(
self
,
prior_boxes
,
gt_boxes
,
cls_score
,
bbox_pred
,
cls_prob
,
self
,
prior_boxes
,
gt_boxes
,
cls_score
,
bbox_pred
,
cls_prob
,
):
"""Compute the SSD classification loss and regression loss.
...
...
lib/modeling/vgg.py
View file @
bf36156
...
...
@@ -16,7 +16,7 @@ from __future__ import print_function
import
dragon.vm.torch
as
torch
from
lib.core.config
import
cfg
from
lib.modeling
.base
import
conv1x1
,
conv3x3
from
lib.modeling
import
conv1x1
,
conv3x3
class
VGG
(
torch
.
nn
.
Module
):
...
...
lib/retinanet/test.py
View file @
bf36156
...
...
@@ -141,7 +141,7 @@ def test_net(net, server):
keep
=
soft_nms
(
cls_dets
,
cfg
.
TEST
.
NMS
,
method
=
cfg
.
TEST
.
SOFT_NMS_METHOD
,
sigma
=
cfg
.
TEST
.
SOFT_NMS_SIGMA
)
else
:
keep
=
nms
(
cls_dets
,
cfg
.
TEST
.
NMS
)
else
:
keep
=
nms
(
cls_dets
,
cfg
.
TEST
.
NMS
,
force_cpu
=
True
)
cls_dets
=
cls_dets
[
keep
,
:]
all_boxes
[
j
][
i
]
=
cls_dets
boxes_this_image
.
append
(
cls_dets
)
...
...
lib/ssd/data/preprocessing/expand.py
View file @
bf36156
...
...
@@ -43,7 +43,7 @@ class Expander(object):
w_off
=
int
(
math
.
floor
(
npr
.
uniform
(
0.0
,
expand_w
-
im_w
)))
new_im
=
np
.
empty
((
expand_h
,
expand_w
,
3
),
dtype
=
np
.
uint8
)
new_im
.
fill
(
127
)
new_im
[:]
=
cfg
.
PIXEL_MEANS
new_im
[
h_off
:
h_off
+
im_h
,
w_off
:
w_off
+
im_w
,
:]
=
im
if
gt_boxes
is
not
None
:
...
...
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment