Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SeetaResearch
/
SeetaDet
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit e3b9b641
authored
Oct 26, 2020
by
Ting PAN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix the bug of scaling flipped box
1 parent
9d12d142
Hide whitespace changes
Inline
Side-by-side
Showing
23 changed files
with
215 additions
and
187 deletions
configs/faster_rcnn/coco_faster_rcnn_R-50-FPN_800_1x.yml
configs/faster_rcnn/coco_faster_rcnn_R-50-FPN_800_2x.yml
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN_640.yml
configs/mask_rcnn/coco_mask_rcnn_R-50-FPN_800_1x.yml
configs/mask_rcnn/coco_mask_rcnn_R-50-FPN_800_2x.yml
configs/retinanet/coco_retinanet_R-50-FPN_416_6x.yml
configs/retinanet/coco_retinanet_R-50-FPN_512_6x.yml
configs/retinanet/coco_retinanet_R-50-FPN_800_1x.yml
configs/retinanet/coco_retinanet_R-50-FPN_800_2x.yml
configs/retinanet/voc_retinanet_R-50-FPN_416.yml
configs/retinanet/voc_retinanet_R-50-FPN_512.yml
configs/ssd/voc_ssd_VGG-16_300.yml
configs/ssd/voc_ssd_VGG-16_512.yml
scripts/coco/im2rec.py
scripts/coco/maker.py
scripts/coco/maskgen.py → scripts/coco/roidb.py
seetadet/algo/faster_rcnn/data_transformer.py
seetadet/algo/mask_rcnn/data_transformer.py
seetadet/algo/mask_rcnn/proposal_target.py
seetadet/algo/ssd/data_transformer.py
seetadet/core/config.py
seetadet/utils/mask.py
setup.py
configs/faster_rcnn/coco_faster_rcnn_R-50-FPN_800_1x.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
faster_rcnn
BACKBONE
:
resnet50.fpn
TYPE
:
'
faster_rcnn'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
...
@@ -21,24 +21,23 @@ MODEL:
'
teddy
bear'
,
'
hair
drier'
,
'
toothbrush'
]
SOLVER
:
BASE_LR
:
0.02
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
60000
,
80000
]
MAX_STEPS
:
90000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_faster_rcnn_R-50-FPN_800_1x
SNAPSHOT_PREFIX
:
'
coco_faster_rcnn_R-50-FPN_800_1x'
FRCNN
:
BATCH_SIZE
:
512
ROI_XFORM_RESOLUTION
:
7
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
SCALES
:
[
800
]
...
...
configs/faster_rcnn/coco_faster_rcnn_R-50-FPN_800_2x.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
faster_rcnn
BACKBONE
:
resnet50.fpn
TYPE
:
'
faster_rcnn'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
...
@@ -21,24 +21,23 @@ MODEL:
'
teddy
bear'
,
'
hair
drier'
,
'
toothbrush'
]
SOLVER
:
BASE_LR
:
0.02
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
120000
,
160000
]
MAX_STEPS
:
180000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_faster_rcnn_R-50-FPN_800_2x
SNAPSHOT_PREFIX
:
'
coco_faster_rcnn_R-50-FPN_800_2x'
FRCNN
:
BATCH_SIZE
:
512
ROI_XFORM_RESOLUTION
:
7
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
SCALES
:
[
800
]
...
...
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN_640.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
faster_rcnn
BACKBONE
:
resnet50.fpn
TYPE
:
'
faster_rcnn'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
...
...
@@ -18,7 +18,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_faster_rcnn_R-50-FPN_640
SNAPSHOT_PREFIX
:
'
voc_faster_rcnn_R-50-FPN_640'
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/voc_0712_trainval'
...
...
configs/mask_rcnn/coco_mask_rcnn_R-50-FPN_800_1x.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
mask_rcnn
BACKBONE
:
resnet50.fpn
TYPE
:
'
mask_rcnn'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
...
@@ -24,7 +24,7 @@ SOLVER:
DECAY_STEPS
:
[
60000
,
80000
]
MAX_STEPS
:
90000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_mask_rcnn_R-50-FPN_800_1x
SNAPSHOT_PREFIX
:
'
coco_mask_rcnn_R-50-FPN_800_1x'
FRCNN
:
BATCH_SIZE
:
512
ROI_XFORM_RESOLUTION
:
7
...
...
@@ -32,14 +32,14 @@ MRCNN:
ROI_XFORM_RESOLUTION
:
14
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
SCALES
:
[
800
]
MAX_SIZE
:
1333
...
...
configs/mask_rcnn/coco_mask_rcnn_R-50-FPN_800_2x.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
mask_rcnn
BACKBONE
:
resnet50.fpn
TYPE
:
'
mask_rcnn'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
...
@@ -24,7 +24,7 @@ SOLVER:
DECAY_STEPS
:
[
120000
,
160000
]
MAX_STEPS
:
180000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_mask_rcnn_R-50-FPN_800_2x
SNAPSHOT_PREFIX
:
'
coco_mask_rcnn_R-50-FPN_800_2x'
FRCNN
:
BATCH_SIZE
:
512
ROI_XFORM_RESOLUTION
:
7
...
...
@@ -32,14 +32,14 @@ MRCNN:
ROI_XFORM_RESOLUTION
:
14
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
SCALES
:
[
800
]
MAX_SIZE
:
1333
...
...
configs/retinanet/coco_retinanet_R-50-FPN_416_6x.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
retinanet
BACKBONE
:
resnet50.fpn
TYPE
:
'
retinanet'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
...
@@ -24,22 +24,21 @@ FPN:
RPN_MAX_LEVEL
:
7
SOLVER
:
BASE_LR
:
0.01
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
90000
,
120000
]
MAX_STEPS
:
135000
SNAPSHOT_EVERY
:
2500
SNAPSHOT_PREFIX
:
coco_retinanet_R-50-FPN_416_6x
SNAPSHOT_PREFIX
:
'
coco_retinanet_R-50-FPN_416_6x'
PIPELINE
:
TYPE
:
'
ssd'
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
8
SCALES
:
[
416
]
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
SCALES
:
[
416
]
...
...
configs/retinanet/coco_retinanet_R-50-FPN_512_6x.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
retinanet
BACKBONE
:
resnet50.fpn
TYPE
:
'
retinanet'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
...
@@ -24,22 +24,21 @@ FPN:
RPN_MAX_LEVEL
:
7
SOLVER
:
BASE_LR
:
0.01
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
90000
,
120000
]
MAX_STEPS
:
135000
SNAPSHOT_EVERY
:
2500
SNAPSHOT_PREFIX
:
coco_retinanet_R-50-FPN_512_6x
SNAPSHOT_PREFIX
:
'
coco_retinanet_R-50-FPN_512_6x'
PIPELINE
:
TYPE
:
'
ssd'
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
8
SCALES
:
[
512
]
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
SCALES
:
[
512
]
...
...
configs/retinanet/coco_retinanet_R-50-FPN_800_1x.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
retinanet
BACKBONE
:
resnet50.fpn
TYPE
:
'
retinanet'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
...
@@ -24,21 +24,20 @@ FPN:
RPN_MAX_LEVEL
:
7
SOLVER
:
BASE_LR
:
0.01
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
60000
,
80000
]
MAX_STEPS
:
90000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_retinanet_R-50-FPN_800_1x
SNAPSHOT_PREFIX
:
'
coco_retinanet_R-50-FPN_800_1x'
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
SCALES
:
[
800
]
...
...
configs/retinanet/coco_retinanet_R-50-FPN_800_2x.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
retinanet
BACKBONE
:
resnet50.fpn
TYPE
:
'
retinanet'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
...
@@ -24,21 +24,20 @@ FPN:
RPN_MAX_LEVEL
:
7
SOLVER
:
BASE_LR
:
0.01
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
120000
,
160000
]
MAX_STEPS
:
180000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_retinanet_R-50-FPN_800_2x
SNAPSHOT_PREFIX
:
'
coco_retinanet_R-50-FPN_800_2x'
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
SCALES
:
[
800
]
...
...
configs/retinanet/voc_retinanet_R-50-FPN_416.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
retinanet
BACKBONE
:
resnet50.fpn
TYPE
:
'
retinanet'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
...
...
@@ -20,7 +20,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_retinanet_R-50-FPN_416
SNAPSHOT_PREFIX
:
'
voc_retinanet_R-50-FPN_416'
PIPELINE
:
TYPE
:
'
ssd'
TRAIN
:
...
...
configs/retinanet/voc_retinanet_R-50-FPN_512.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 2
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
retinanet
BACKBONE
:
resnet50.fpn
TYPE
:
'
retinanet'
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
...
...
@@ -20,7 +20,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_retinanet_R-50-FPN_512
SNAPSHOT_PREFIX
:
'
voc_retinanet_R-50-FPN_512'
PIPELINE
:
TYPE
:
'
ssd'
TRAIN
:
...
...
configs/ssd/voc_ssd_VGG-16_300.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS
:
[
1.0
,
1.0
,
1.0
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
ssd
BACKBONE
:
vgg16_reduced_300
TYPE
:
'
ssd'
BACKBONE
:
'
vgg16_reduced_300'
COARSEST_STRIDE
:
0
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
...
...
@@ -31,7 +31,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_ssd_VGG-16_300
SNAPSHOT_PREFIX
:
'
voc_ssd_VGG-16_300'
TRAIN
:
WEIGHTS
:
'
/model/VGG16.SSD.pkl'
DATASET
:
'
/data/voc_0712_trainval'
...
...
configs/ssd/voc_ssd_VGG-16_512.yml
View file @
e3b9b64
...
...
@@ -2,8 +2,8 @@ NUM_GPUS: 2
PIXEL_STDS
:
[
1.0
,
1.0
,
1.0
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
TYPE
:
ssd
BACKBONE
:
vgg16_reduced_512
TYPE
:
'
ssd'
BACKBONE
:
'
vgg16_reduced_512'
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
...
...
@@ -32,7 +32,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_ssd_VGG-16_512
SNAPSHOT_PREFIX
:
'
voc_ssd_VGG-16_512'
TRAIN
:
WEIGHTS
:
'
/model/VGG16.SSD.pkl'
DATASET
:
'
/data/voc_0712_trainval'
...
...
scripts/coco/im2rec.py
View file @
e3b9b64
...
...
@@ -18,7 +18,7 @@ import os
import
shutil
from
maker
import
make_record
from
maskgen
import
make_mask
,
merge_mask
from
roidb
import
make_database
if
__name__
==
'__main__'
:
...
...
@@ -27,30 +27,25 @@ if __name__ == '__main__':
# Encode masks to RLE bytes
if
not
os
.
path
.
exists
(
'build'
):
os
.
makedirs
(
'build'
)
make_mask
(
'train'
,
'2014'
,
COCO_ROOT
)
make_mask
(
'valminusminival'
,
'2014'
,
COCO_ROOT
)
make_mask
(
'minival'
,
'2014'
,
COCO_ROOT
)
merge_mask
(
'trainval35k'
,
'2014'
,
[
'build/coco_2014_train_mask.pkl'
,
'build/coco_2014_valminusminival_mask.pkl'
])
make_database
(
'train'
,
'2017'
,
COCO_ROOT
)
make_database
(
'val'
,
'2017'
,
COCO_ROOT
)
# coco_201
4_trainval35k
# coco_201
7_train
make_record
(
record_file
=
os
.
path
.
join
(
COCO_ROOT
,
'coco_2014_trainval35k'
),
images_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'images/train2014'
),
os
.
path
.
join
(
COCO_ROOT
,
'images/val2014'
)],
splits_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'splits'
),
os
.
path
.
join
(
COCO_ROOT
,
'splits'
)],
mask_file
=
'build/coco_2014_trainval35k_mask.pkl'
,
splits
=
[
'train'
,
'valminusminival'
],
db_file
=
'build/coco_2017_train.db.pkl'
,
record_file
=
os
.
path
.
join
(
COCO_ROOT
,
'coco_2017_train'
),
images_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'images/train2017'
)],
splits_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'splits'
)],
splits
=
[
'train2017'
],
)
# coco_201
4_mini
val
# coco_201
7_
val
make_record
(
record_file
=
os
.
path
.
join
(
COCO_ROOT
,
'coco_2014_minival'
)
,
images_path
=
os
.
path
.
join
(
COCO_ROOT
,
'images/val2014
'
),
mask_file
=
'build/coco_2014_minival_mask.pkl'
,
splits_path
=
os
.
path
.
join
(
COCO_ROOT
,
'splits'
)
,
splits
=
[
'
minival
'
],
db_file
=
'build/coco_2017_val.db.pkl'
,
record_file
=
os
.
path
.
join
(
COCO_ROOT
,
'coco_2017_val
'
),
images_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'images/val2017'
)]
,
splits_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'splits'
)]
,
splits
=
[
'
val2017
'
],
)
shutil
.
rmtree
(
'build'
)
scripts/coco/maker.py
View file @
e3b9b64
...
...
@@ -18,7 +18,7 @@ import dragon
import
numpy
as
np
def
make_example
(
image_file
,
mask_
objects
,
im_scale
=
None
):
def
make_example
(
image_file
,
objects
,
im_scale
=
None
):
filename
=
os
.
path
.
split
(
image_file
)[
-
1
]
example
=
{
'id'
:
filename
.
split
(
'.'
)[
0
],
'object'
:
[]}
...
...
@@ -39,7 +39,7 @@ def make_example(image_file, mask_objects, im_scale=None):
example
[
'height'
],
example
[
'width'
],
example
[
'depth'
]
=
img
.
shape
example
[
'content'
]
=
img_bytes
for
ix
,
obj
in
enumerate
(
mask_objects
)
:
for
obj
in
objects
:
x1
,
y1
,
x2
,
y2
=
obj
[
'bbox'
]
example
[
'object'
]
.
append
({
'name'
:
obj
[
'name'
],
...
...
@@ -58,7 +58,7 @@ def make_example(image_file, mask_objects, im_scale=None):
def
make_record
(
record_file
,
images_path
,
mask
_file
,
db
_file
,
splits_path
,
splits
,
ext
=
'.jpg'
,
...
...
@@ -75,11 +75,11 @@ def make_record(
assert
len
(
splits
)
==
len
(
splits_path
)
assert
len
(
splits
)
==
len
(
images_path
)
if
mask
_file
is
not
None
:
with
open
(
mask
_file
,
'rb'
)
as
f
:
all_
mask
s
=
pickle
.
load
(
f
)
if
db
_file
is
not
None
:
with
open
(
db
_file
,
'rb'
)
as
f
:
all_
entrie
s
=
pickle
.
load
(
f
)
else
:
all_
mask
s
=
{}
all_
entrie
s
=
{}
print
(
'Start Time:'
,
time
.
strftime
(
"
%
a,
%
d
%
b
%
Y
%
H:
%
M:
%
S"
,
time
.
gmtime
()))
...
...
@@ -133,8 +133,8 @@ def make_record(
count
,
total_line
,
now_time
-
start_time
))
filename
=
line
.
strip
()
image_file
=
os
.
path
.
join
(
images_path
[
db_idx
],
filename
+
ext
)
mask_objects
=
all_masks
[
filename
]
if
filename
in
all_mask
s
else
{}
writer
.
write
(
make_example
(
image_file
,
mask_
objects
,
im_scale
))
objects
=
all_entries
[
filename
]
if
filename
in
all_entrie
s
else
{}
writer
.
write
(
make_example
(
image_file
,
objects
,
im_scale
))
now_time
=
time
.
time
()
print
(
'{} / {} in {:.2f} sec'
.
format
(
count
,
total_line
,
now_time
-
start_time
))
...
...
scripts/coco/
maskgen
.py
→
scripts/coco/
roidb
.py
View file @
e3b9b64
...
...
@@ -74,11 +74,13 @@ class COCOWrapper(object):
def
image_path_from_index
(
self
,
index
):
"""Construct an image path from the image's "index" identifier."""
# Example image path for index=119993:
# images/train2014/COCO_train2014_000000119993.jpg
file_name
=
(
'COCO_'
+
self
.
_data_name
+
'_'
+
str
(
index
)
.
zfill
(
12
)
+
'.jpg'
)
# images/train2014/COCO_train2014_000000119993.jpg
# images/train2017/000000119993.jpg
filename
=
str
(
index
)
.
zfill
(
12
)
+
'.jpg'
if
'2014'
in
self
.
_data_name
:
filename
=
'COCO_{}_{}'
.
format
(
self
.
_data_name
,
filename
)
image_path
=
osp
.
join
(
self
.
_data_path
,
'images'
,
self
.
_data_name
,
file
_
name
)
self
.
_data_name
,
filename
)
assert
osp
.
exists
(
image_path
),
\
'Path does not exist: {}'
.
format
(
image_path
)
return
image_path
...
...
@@ -99,19 +101,18 @@ class COCOWrapper(object):
objects
=
self
.
_COCO
.
loadAnns
(
ann_ids
)
# Sanitize boxes -- some are invalid
valid_objects
=
[]
mask
,
polygons
=
b
''
,
[]
for
obj
in
objects
:
x1
=
float
(
max
(
0
,
obj
[
'bbox'
][
0
]))
y1
=
float
(
max
(
0
,
obj
[
'bbox'
][
1
]))
x2
=
float
(
min
(
width
-
1
,
x1
+
max
(
0
,
obj
[
'bbox'
][
2
]
-
1
)))
y2
=
float
(
min
(
height
-
1
,
y1
+
max
(
0
,
obj
[
'bbox'
][
3
]
-
1
)))
mask
,
polygons
=
b
''
,
[]
if
isinstance
(
obj
[
'segmentation'
],
list
):
for
p
in
obj
[
'segmentation'
]:
if
len
(
p
)
<
6
:
print
(
'Remove Invalid segm.'
)
# Valid polygons have >= 3 points, so require >= 6 coordinates
polygons
=
[
p
for
p
in
obj
[
'segmentation'
]
if
len
(
p
)
>=
6
]
# mask_bytes = mask_utils.poly2bytes(poly, height, width)
else
:
# Crowd masks
# Some are encoded with height or width
...
...
@@ -141,25 +142,26 @@ class COCOWrapper(object):
return
len
(
self
.
_classes
)
def
make_
mask
(
split
,
year
,
data_dir
):
def
make_
database
(
split
,
year
,
data_dir
):
coco
=
COCOWrapper
(
split
,
year
,
data_dir
)
print
(
'Preparing to make split: {}, total {} images'
.
format
(
split
,
coco
.
num_images
))
if
not
osp
.
exists
(
osp
.
join
(
coco
.
_data_path
,
'splits'
)):
os
.
makedirs
(
osp
.
join
(
coco
.
_data_path
,
'splits'
))
gt_rec
s
=
collections
.
OrderedDict
()
entrie
s
=
collections
.
OrderedDict
()
for
i
in
range
(
coco
.
num_images
):
filename
=
osp
.
basename
(
coco
.
image_path_at
(
i
))
.
split
(
'.'
)[
0
]
h
,
w
,
objects
=
coco
.
annotation_at
(
i
)
gt_rec
s
[
filename
]
=
objects
entrie
s
[
filename
]
=
objects
with
open
(
osp
.
join
(
'build'
,
'coco_'
+
year
+
'
_'
+
split
+
'_mask
.pkl'
),
'wb'
)
as
f
:
pickle
.
dump
(
gt_rec
s
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
'coco_'
+
year
+
'_'
+
split
+
'
.db
.pkl'
),
'wb'
)
as
f
:
pickle
.
dump
(
entrie
s
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
with
open
(
osp
.
join
(
coco
.
_data_path
,
'splits'
,
split
+
'.txt'
),
'w'
)
as
f
:
with
open
(
osp
.
join
(
coco
.
_data_path
,
'splits'
,
split
+
year
+
'.txt'
),
'w'
)
as
f
:
for
i
in
range
(
coco
.
num_images
):
filename
=
str
(
osp
.
basename
(
coco
.
image_path_at
(
i
))
.
split
(
'.'
)[
0
])
if
i
!=
coco
.
num_images
-
1
:
...
...
@@ -167,16 +169,16 @@ def make_mask(split, year, data_dir):
f
.
write
(
filename
)
def
merge_
mask
(
split
,
year
,
mask
_files
):
gt_rec
s
=
collections
.
OrderedDict
()
data_path
=
os
.
path
.
dirname
(
mask
_files
[
0
])
def
merge_
database
(
split
,
year
,
db
_files
):
entrie
s
=
collections
.
OrderedDict
()
data_path
=
os
.
path
.
dirname
(
db
_files
[
0
])
for
mask_file
in
mask
_files
:
with
open
(
mask
_file
,
'rb'
)
as
f
:
rec
s
=
pickle
.
load
(
f
)
gt_recs
.
update
(
rec
s
)
for
db_file
in
db
_files
:
with
open
(
db
_file
,
'rb'
)
as
f
:
entrie
s
=
pickle
.
load
(
f
)
entries
.
update
(
entrie
s
)
with
open
(
osp
.
join
(
data_path
,
'coco_'
+
year
+
'
_'
+
split
+
'_mask
.pkl'
),
'wb'
)
as
f
:
pickle
.
dump
(
gt_rec
s
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
'coco_'
+
year
+
'_'
+
split
+
'
.db
.pkl'
),
'wb'
)
as
f
:
pickle
.
dump
(
entrie
s
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
seetadet/algo/faster_rcnn/data_transformer.py
View file @
e3b9b64
...
...
@@ -27,6 +27,8 @@ from seetadet.utils import image as image_util
class
DataTransformer
(
multiprocessing
.
Process
):
"""DataTransformer."""
def
__init__
(
self
,
**
kwargs
):
super
(
DataTransformer
,
self
)
.
__init__
()
self
.
_scales
=
cfg
.
TRAIN
.
SCALES
...
...
@@ -43,7 +45,7 @@ class DataTransformer(multiprocessing.Process):
self
.
q_in
=
self
.
q_out
=
None
self
.
daemon
=
True
def
get_boxes
(
self
,
example
,
im_scale
):
def
get_boxes
(
self
,
example
,
im_scale
,
flipped
):
objects
,
num_objects
=
example
.
objects
,
0
height
,
width
=
example
.
height
,
example
.
width
if
not
self
.
_use_diff
:
...
...
@@ -56,7 +58,7 @@ class DataTransformer(multiprocessing.Process):
boxes
=
np
.
zeros
((
num_objects
,
4
),
'float32'
)
gt_classes
=
np
.
zeros
((
num_objects
,),
'float32'
)
# Filter the difficult instances
# Filter the difficult instances
.
object_idx
=
0
for
obj
in
objects
:
if
not
self
.
_use_diff
and
obj
.
get
(
'difficult'
,
0
)
>
0
:
...
...
@@ -69,10 +71,14 @@ class DataTransformer(multiprocessing.Process):
gt_classes
[
object_idx
]
=
self
.
_class_to_ind
[
obj
[
'name'
]]
object_idx
+=
1
# Scale the boxes to the detecting scale
# Flip the boxes if necessary.
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
width
)
# Scale the boxes to the detecting scale.
boxes
*=
im_scale
# Attach the classes
# Attach the classes
.
gt_boxes
=
np
.
empty
((
num_objects
,
5
),
dtype
=
np
.
float32
)
gt_boxes
[:,
:
4
],
gt_boxes
[:,
4
]
=
boxes
,
gt_classes
...
...
@@ -81,7 +87,7 @@ class DataTransformer(multiprocessing.Process):
def
get
(
self
,
example
):
example
=
Example
(
example
)
# Resize
# Resize
.
img
,
im_scale
=
image_util
.
resize_image_with_target_size
(
example
.
image
,
target_size
=
npr
.
choice
(
self
.
_scales
),
...
...
@@ -89,22 +95,18 @@ class DataTransformer(multiprocessing.Process):
random_scales
=
self
.
_random_scales
,
)
# Flip
# Flip
.
flipped
=
False
if
self
.
_use_flipped
and
npr
.
randint
(
2
)
>
0
:
img
=
img
[:,
::
-
1
]
flipped
=
True
# Distort
# Distort
.
if
self
.
_use_distort
:
img
=
image_util
.
distort_image
(
img
)
# Boxes
boxes
=
self
.
get_boxes
(
example
,
im_scale
)
# Flip the boxes if necessary
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
img
.
shape
[
1
])
# Boxes.
boxes
=
self
.
get_boxes
(
example
,
im_scale
,
flipped
)
# Standard outputs.
outputs
=
{
'image'
:
img
,
...
...
seetadet/algo/mask_rcnn/data_transformer.py
View file @
e3b9b64
...
...
@@ -28,6 +28,8 @@ from seetadet.utils import image as image_util
class
DataTransformer
(
multiprocessing
.
Process
):
"""DataTransformer."""
def
__init__
(
self
,
**
kwargs
):
super
(
DataTransformer
,
self
)
.
__init__
()
self
.
_scales
=
cfg
.
TRAIN
.
SCALES
...
...
@@ -81,6 +83,10 @@ class DataTransformer(multiprocessing.Process):
gt_classes
[
object_idx
]
=
self
.
_class_to_ind
[
obj
[
'name'
]]
object_idx
+=
1
# Flip the boxes if necessary.
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
width
)
# Scale the boxes to the detecting scale.
boxes
*=
im_scale
...
...
@@ -115,10 +121,6 @@ class DataTransformer(multiprocessing.Process):
# Boxes and segmentations.
boxes
,
segms
=
self
.
get_boxes_and_segms
(
example
,
im_scale
,
flipped
)
# Flip the boxes if necessary.
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
img
.
shape
[
1
])
# Standard outputs.
outputs
=
{
'image'
:
img
,
'boxes'
:
boxes
,
...
...
seetadet/algo/mask_rcnn/proposal_target.py
View file @
e3b9b64
...
...
@@ -124,37 +124,37 @@ class ProposalTarget(object):
def
compute_targets
(
ex_
rois
,
gt_
roi
s
,
rois
,
gt_
boxe
s
,
gt_labels
,
gt
_segms
,
mask_flags
,
fg
_segms
,
fg_segms_flag
,
mask_size
,
im_scale
,
):
"""Compute the bounding-box regression targets."""
assert
ex_rois
.
shape
[
0
]
==
gt_roi
s
.
shape
[
0
]
assert
ex_
rois
.
shape
[
1
]
==
4
assert
gt_
roi
s
.
shape
[
1
]
==
4
assert
rois
.
shape
[
0
]
==
gt_boxe
s
.
shape
[
0
]
assert
rois
.
shape
[
1
]
==
4
assert
gt_
boxe
s
.
shape
[
1
]
==
4
# Compute bbox regression targets
fg_inds
=
np
.
where
(
gt_labels
>
0
)[
0
]
bbox_targets
=
box_util
.
bbox_transform
(
ex_rois
,
gt_rois
,
cfg
.
BBOX_REG_WEIGHTS
)
bbox_targets
=
box_util
.
bbox_transform
(
rois
,
gt_boxes
,
cfg
.
BBOX_REG_WEIGHTS
)
# Compute mask classification targets
mask_shape
=
[
mask_size
]
*
2
ex_rois_ori
=
np
.
round
(
ex_rois
/
im_scale
)
.
astype
(
int
)
mask_targets
=
-
np
.
ones
([
len
(
gt_labels
)]
+
mask_shape
,
'float32'
)
for
i
in
fg_inds
:
if
mask_flags
[
i
]
>
0
:
if
isinstance
(
gt_segms
[
i
],
list
):
ret
=
mask_util
.
warp_mask_via_polygons
(
gt_segms
[
i
],
ex_rois_ori
[
i
],
mask_shape
)
mask_targets
=
-
np
.
ones
([
len
(
rois
)]
+
mask_shape
,
'float32'
)
rois_ori
=
rois
/
im_scale
rois_ori_int
=
np
.
round
(
rois_ori
)
.
astype
(
int
)
gt_boxes_ori_int
=
np
.
round
(
gt_boxes
/
im_scale
)
.
astype
(
int
)
for
i
,
fg_idx
in
enumerate
(
fg_inds
):
if
fg_segms_flag
[
i
]
>
0
:
if
isinstance
(
fg_segms
[
i
],
list
):
target
=
mask_util
.
warp_mask_via_polygons
(
fg_segms
[
i
],
rois_ori
[
i
],
mask_shape
)
else
:
gt_rois_ori
=
np
.
round
(
gt_rois
/
im_scale
)
.
astype
(
int
)
ret
=
mask_util
.
warp_mask_via_intersection
(
gt_segms
[
i
],
ex_rois_ori
[
i
],
gt_rois_ori
[
i
],
mask_shape
)
if
ret
is
not
None
:
mask_targets
[
i
]
=
ret
.
astype
(
'float32'
)
target
=
mask_util
.
warp_mask_via_intersection
(
fg_segms
[
i
],
rois_ori_int
[
i
],
gt_boxes_ori_int
[
i
],
mask_shape
)
if
target
is
not
None
:
mask_targets
[
fg_idx
]
=
target
.
astype
(
mask_targets
.
dtype
)
return
bbox_targets
,
mask_targets
...
...
seetadet/algo/ssd/data_transformer.py
View file @
e3b9b64
...
...
@@ -27,6 +27,8 @@ from seetadet.utils import boxes as box_util
class
DataTransformer
(
multiprocessing
.
Process
):
"""DataTransformer."""
def
__init__
(
self
,
**
kwargs
):
super
(
DataTransformer
,
self
)
.
__init__
()
self
.
_scale
=
cfg
.
TRAIN
.
SCALES
[
0
]
...
...
@@ -44,7 +46,7 @@ class DataTransformer(multiprocessing.Process):
self
.
q_in
=
self
.
q_out
=
None
self
.
daemon
=
True
def
get_boxes
(
self
,
example
):
def
get_boxes
(
self
,
example
,
flipped
):
objects
,
num_objects
=
example
.
objects
,
0
height
,
width
=
example
.
height
,
example
.
width
if
not
self
.
_use_diff
:
...
...
@@ -70,6 +72,10 @@ class DataTransformer(multiprocessing.Process):
gt_classes
[
object_idx
]
=
self
.
_class_to_ind
[
obj
[
'name'
]]
object_idx
+=
1
# Flip the boxes if necessary.
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
width
)
# Normalize.
boxes
[:,
0
::
2
]
/=
width
boxes
[:,
1
::
2
]
/=
height
...
...
@@ -82,25 +88,31 @@ class DataTransformer(multiprocessing.Process):
def
get
(
self
,
example
):
example
=
Example
(
example
)
img
=
example
.
image
# Flip.
flipped
=
False
if
self
.
_use_flipped
and
npr
.
randint
(
2
)
>
0
:
img
=
img
[:,
::
-
1
]
flipped
=
True
# Boxes.
boxes
=
self
.
get_boxes
(
example
)
boxes
=
self
.
get_boxes
(
example
,
flipped
)
# Return to avoid the invalid transforms.
if
len
(
boxes
)
==
0
:
return
{
'boxes'
:
boxes
}
# Distort => Expand => Sample => Resize
img
,
boxes
=
self
.
_apply_transform
(
example
.
image
,
boxes
)
img
,
boxes
=
self
.
_apply_transform
(
img
,
boxes
)
# Restore to the blob scale.
boxes
[:,
:
4
]
*=
self
.
_scale
# Flip.
if
self
.
_use_flipped
and
npr
.
randint
(
2
)
>
0
:
img
=
img
[:,
::
-
1
]
boxes
=
box_util
.
flip_boxes
(
boxes
,
img
.
shape
[
1
])
# Standard outputs.
outputs
=
{
'image'
:
img
,
'boxes'
:
boxes
,
'im_info'
:
img
.
shape
[:
2
]}
outputs
=
{
'image'
:
img
,
'boxes'
:
boxes
,
'im_info'
:
img
.
shape
[:
2
]}
# Attach precomputed targets.
if
len
(
boxes
)
>
0
:
...
...
seetadet/core/config.py
View file @
e3b9b64
...
...
@@ -333,10 +333,12 @@ __C.FRCNN.NEGATIVE_OVERLAP_HI = 0.5
__C
.
FRCNN
.
NEGATIVE_OVERLAP_LO
=
0.0
# RoI transform function
# Values supported: 'RoIAlign', 'RoI
Align
'
# Values supported: 'RoIAlign', 'RoI
Pool
'
__C
.
FRCNN
.
ROI_XFORM_METHOD
=
'RoIAlign'
# RoI transform output resolution
__C
.
FRCNN
.
ROI_XFORM_RESOLUTION
=
7
# Resampling window size for RoI transformation
__C
.
FRCNN
.
ROI_XFORM_SAMPLING_RATIO
=
0
...
...
@@ -362,10 +364,12 @@ __C.MRCNN = AttrDict()
__C
.
MRCNN
.
RESOLUTION
=
28
# RoI transform function
# Values supported: 'RoIAlign', 'RoI
Align
'
# Values supported: 'RoIAlign', 'RoI
Pool
'
__C
.
MRCNN
.
ROI_XFORM_METHOD
=
'RoIAlign'
# RoI transform output resolution
__C
.
MRCNN
.
ROI_XFORM_RESOLUTION
=
14
# Resampling window size for RoI transformation
__C
.
MRCNN
.
ROI_XFORM_SAMPLING_RATIO
=
0
...
...
@@ -438,6 +442,7 @@ __C.SOLVER.DISPLAY = 20
# The interval to snapshot a model
__C
.
SOLVER
.
SNAPSHOT_EVERY
=
5000
# Prefix to yield the path: <prefix>_iter_XYZ.pkl
__C
.
SOLVER
.
SNAPSHOT_PREFIX
=
''
...
...
@@ -451,25 +456,34 @@ __C.SOLVER.MAX_STEPS = 40000
# Base learning rate for the specified schedule
__C
.
SOLVER
.
BASE_LR
=
0.001
# The uniform interval for LRScheduler
__C
.
SOLVER
.
DECAY_STEP
=
1
# The custom intervals for LRScheduler
__C
.
SOLVER
.
DECAY_STEPS
=
[]
# The decay factor for exponential LRScheduler
__C
.
SOLVER
.
DECAY_GAMMA
=
0.1
# Warm up to ``BASE_LR`` over this number of steps
__C
.
SOLVER
.
WARM_UP_STEPS
=
500
# Start the warm up from ``BASE_LR`` * ``FACTOR``
__C
.
SOLVER
.
WARM_UP_FACTOR
=
0.333
# The type of LRScheduler
__C
.
SOLVER
.
LR_POLICY
=
'steps_with_decay'
# Momentum to use with SGD
__C
.
SOLVER
.
MOMENTUM
=
0.9
# L2 regularization for weight parameters
__C
.
SOLVER
.
WEIGHT_DECAY
=
0.0001
# L2 regularization for legacy bias parameters
__C
.
SOLVER
.
WEIGHT_DECAY_BIAS
=
0.0
# L2 norm factor for clipping gradients
__C
.
SOLVER
.
CLIP_NORM
=
0.0
...
...
seetadet/utils/mask.py
View file @
e3b9b64
...
...
@@ -14,6 +14,8 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
copy
import
cv2
import
numpy
as
np
import
PIL.Image
...
...
@@ -37,32 +39,37 @@ def warp_mask_via_intersection(mask, box1, box2, size):
inter_mask
=
mask
[
y1
:
y2
+
1
,
x1
:
x2
+
1
]
target_h
=
box1
[
3
]
-
box1
[
1
]
+
1
target_w
=
box1
[
2
]
-
box1
[
0
]
+
1
warped_mask
=
np
.
zeros
((
target_h
,
target_w
),
dtype
=
mask
.
dtype
)
warped_mask
=
np
.
zeros
((
target_h
,
target_w
),
dtype
=
'uint8'
)
warped_mask
[
ex_start_y
:
ex_start_y
+
h
,
ex_start_x
:
ex_start_x
+
w
]
=
inter_mask
if
not
isinstance
(
size
,
(
tuple
,
list
)):
size
=
(
size
,
size
)
mask
=
PIL
.
Image
.
fromarray
(
warped_mask
)
return
np
.
array
(
mask
.
resize
((
size
[
1
],
size
[
0
]),
PIL
.
Image
.
NEAREST
))
mask
=
mask
.
resize
((
size
[
1
],
size
[
0
]),
PIL
.
Image
.
NEAREST
)
return
np
.
array
(
mask
)
def
warp_mask_via_polygons
(
polygons
,
box
,
size
):
"""Warp mask via polygons."""
w
=
np
.
maximum
(
box
[
2
]
-
box
[
0
],
1
)
h
=
np
.
maximum
(
box
[
3
]
-
box
[
1
],
1
)
w
,
h
=
box
[
2
]
-
box
[
0
],
box
[
3
]
-
box
[
1
]
if
not
isinstance
(
size
,
(
tuple
,
list
)):
size
=
(
size
,
size
)
polygons_norm
=
[]
for
poly
in
polygons
:
p
=
np
.
array
(
poly
,
dtype
=
np
.
float32
)
p
[
0
::
2
]
=
(
p
[
0
::
2
]
-
box
[
0
])
*
size
[
1
]
/
w
p
[
1
::
2
]
=
(
p
[
1
::
2
]
-
box
[
1
])
*
size
[
0
]
/
h
polygons_norm
.
append
(
p
)
rle
=
mask_tools
.
frPyObjects
(
polygons_norm
,
size
[
0
],
size
[
1
])
mask
=
np
.
array
(
mask_tools
.
decode
(
rle
))
mask
=
np
.
sum
(
mask
,
axis
=
2
)
mask
=
np
.
array
(
mask
>
0
)
return
mask
ratio_h
=
size
[
0
]
/
max
(
h
,
0.1
)
ratio_w
=
size
[
1
]
/
max
(
w
,
0.1
)
polygons
=
copy
.
deepcopy
(
polygons
)
for
p
in
polygons
:
p
[
0
::
2
]
=
p
[
0
::
2
]
-
box
[
0
]
p
[
1
::
2
]
=
p
[
1
::
2
]
-
box
[
1
]
if
ratio_h
==
ratio_w
:
for
p
in
polygons
:
p
*=
ratio_h
else
:
for
p
in
polygons
:
p
[
0
::
2
]
*=
ratio_w
p
[
1
::
2
]
*=
ratio_h
rle_objs
=
mask_tools
.
frPyObjects
(
polygons
,
size
[
0
],
size
[
1
])
rle_objs
=
[
mask_tools
.
merge
(
rle_objs
)]
return
mask_tools
.
decode
(
rle_objs
)[:,
:,
0
]
def
mask_overlap
(
box1
,
box2
,
mask1
,
mask2
):
...
...
@@ -148,7 +155,7 @@ def project_masks(
w
=
np
.
maximum
(
w
,
1
)
h
=
np
.
maximum
(
h
,
1
)
mask
=
cv2
.
resize
(
padded_mask
,
(
w
,
h
))
mask
=
np
.
array
(
mask
>
thresh
,
'uint8'
)
mask
=
np
.
array
(
mask
>
=
thresh
,
'uint8'
)
x1
=
max
(
ref_box
[
0
],
0
)
y1
=
max
(
ref_box
[
1
],
0
)
x2
=
min
(
ref_box
[
2
]
+
1
,
width
)
...
...
setup.py
View file @
e3b9b64
...
...
@@ -5,7 +5,7 @@
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
#
#
<https://opensource.org/licenses/BSD-2-Clause>
# <https://opensource.org/licenses/BSD-2-Clause>
#
# ------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment