Skip to content
Toggle navigation
P
Projects
G
Groups
S
Snippets
Help
SeetaResearch
/
SeetaDet
This project
Loading...
Sign in
Toggle navigation
Go to a project
Project
Repository
Issues
0
Merge Requests
0
Pipelines
Wiki
Snippets
Settings
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Commit e3b9b641
authored
Oct 26, 2020
by
Ting PAN
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix the bug of scaling flipped box
1 parent
9d12d142
Hide whitespace changes
Inline
Side-by-side
Showing
23 changed files
with
215 additions
and
187 deletions
configs/faster_rcnn/coco_faster_rcnn_R-50-FPN_800_1x.yml
configs/faster_rcnn/coco_faster_rcnn_R-50-FPN_800_2x.yml
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN_640.yml
configs/mask_rcnn/coco_mask_rcnn_R-50-FPN_800_1x.yml
configs/mask_rcnn/coco_mask_rcnn_R-50-FPN_800_2x.yml
configs/retinanet/coco_retinanet_R-50-FPN_416_6x.yml
configs/retinanet/coco_retinanet_R-50-FPN_512_6x.yml
configs/retinanet/coco_retinanet_R-50-FPN_800_1x.yml
configs/retinanet/coco_retinanet_R-50-FPN_800_2x.yml
configs/retinanet/voc_retinanet_R-50-FPN_416.yml
configs/retinanet/voc_retinanet_R-50-FPN_512.yml
configs/ssd/voc_ssd_VGG-16_300.yml
configs/ssd/voc_ssd_VGG-16_512.yml
scripts/coco/im2rec.py
scripts/coco/maker.py
scripts/coco/maskgen.py → scripts/coco/roidb.py
seetadet/algo/faster_rcnn/data_transformer.py
seetadet/algo/mask_rcnn/data_transformer.py
seetadet/algo/mask_rcnn/proposal_target.py
seetadet/algo/ssd/data_transformer.py
seetadet/core/config.py
seetadet/utils/mask.py
setup.py
configs/faster_rcnn/coco_faster_rcnn_R-50-FPN_800_1x.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
faster_rcnn
TYPE
:
'
faster_rcnn'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
@@ -21,24 +21,23 @@ MODEL:
...
@@ -21,24 +21,23 @@ MODEL:
'
teddy
bear'
,
'
hair
drier'
,
'
toothbrush'
]
'
teddy
bear'
,
'
hair
drier'
,
'
toothbrush'
]
SOLVER
:
SOLVER
:
BASE_LR
:
0.02
BASE_LR
:
0.02
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
60000
,
80000
]
DECAY_STEPS
:
[
60000
,
80000
]
MAX_STEPS
:
90000
MAX_STEPS
:
90000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_faster_rcnn_R-50-FPN_800_1x
SNAPSHOT_PREFIX
:
'
coco_faster_rcnn_R-50-FPN_800_1x'
FRCNN
:
FRCNN
:
BATCH_SIZE
:
512
BATCH_SIZE
:
512
ROI_XFORM_RESOLUTION
:
7
ROI_XFORM_RESOLUTION
:
7
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
IMS_PER_BATCH
:
1
SCALES
:
[
800
]
SCALES
:
[
800
]
...
...
configs/faster_rcnn/coco_faster_rcnn_R-50-FPN_800_2x.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
faster_rcnn
TYPE
:
'
faster_rcnn'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
@@ -21,24 +21,23 @@ MODEL:
...
@@ -21,24 +21,23 @@ MODEL:
'
teddy
bear'
,
'
hair
drier'
,
'
toothbrush'
]
'
teddy
bear'
,
'
hair
drier'
,
'
toothbrush'
]
SOLVER
:
SOLVER
:
BASE_LR
:
0.02
BASE_LR
:
0.02
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
120000
,
160000
]
DECAY_STEPS
:
[
120000
,
160000
]
MAX_STEPS
:
180000
MAX_STEPS
:
180000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_faster_rcnn_R-50-FPN_800_2x
SNAPSHOT_PREFIX
:
'
coco_faster_rcnn_R-50-FPN_800_2x'
FRCNN
:
FRCNN
:
BATCH_SIZE
:
512
BATCH_SIZE
:
512
ROI_XFORM_RESOLUTION
:
7
ROI_XFORM_RESOLUTION
:
7
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
IMS_PER_BATCH
:
1
SCALES
:
[
800
]
SCALES
:
[
800
]
...
...
configs/faster_rcnn/voc_faster_rcnn_R-50-FPN_640.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 1
...
@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
faster_rcnn
TYPE
:
'
faster_rcnn'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
...
@@ -18,7 +18,7 @@ SOLVER:
...
@@ -18,7 +18,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_faster_rcnn_R-50-FPN_640
SNAPSHOT_PREFIX
:
'
voc_faster_rcnn_R-50-FPN_640'
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/voc_0712_trainval'
DATASET
:
'
/data/voc_0712_trainval'
...
...
configs/mask_rcnn/coco_mask_rcnn_R-50-FPN_800_1x.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
mask_rcnn
TYPE
:
'
mask_rcnn'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
@@ -24,7 +24,7 @@ SOLVER:
...
@@ -24,7 +24,7 @@ SOLVER:
DECAY_STEPS
:
[
60000
,
80000
]
DECAY_STEPS
:
[
60000
,
80000
]
MAX_STEPS
:
90000
MAX_STEPS
:
90000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_mask_rcnn_R-50-FPN_800_1x
SNAPSHOT_PREFIX
:
'
coco_mask_rcnn_R-50-FPN_800_1x'
FRCNN
:
FRCNN
:
BATCH_SIZE
:
512
BATCH_SIZE
:
512
ROI_XFORM_RESOLUTION
:
7
ROI_XFORM_RESOLUTION
:
7
...
@@ -32,14 +32,14 @@ MRCNN:
...
@@ -32,14 +32,14 @@ MRCNN:
ROI_XFORM_RESOLUTION
:
14
ROI_XFORM_RESOLUTION
:
14
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
PROTOCOL
:
'
coco'
SCALES
:
[
800
]
SCALES
:
[
800
]
MAX_SIZE
:
1333
MAX_SIZE
:
1333
...
...
configs/mask_rcnn/coco_mask_rcnn_R-50-FPN_800_2x.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
mask_rcnn
TYPE
:
'
mask_rcnn'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
@@ -24,7 +24,7 @@ SOLVER:
...
@@ -24,7 +24,7 @@ SOLVER:
DECAY_STEPS
:
[
120000
,
160000
]
DECAY_STEPS
:
[
120000
,
160000
]
MAX_STEPS
:
180000
MAX_STEPS
:
180000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_mask_rcnn_R-50-FPN_800_2x
SNAPSHOT_PREFIX
:
'
coco_mask_rcnn_R-50-FPN_800_2x'
FRCNN
:
FRCNN
:
BATCH_SIZE
:
512
BATCH_SIZE
:
512
ROI_XFORM_RESOLUTION
:
7
ROI_XFORM_RESOLUTION
:
7
...
@@ -32,14 +32,14 @@ MRCNN:
...
@@ -32,14 +32,14 @@ MRCNN:
ROI_XFORM_RESOLUTION
:
14
ROI_XFORM_RESOLUTION
:
14
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
PROTOCOL
:
'
coco'
SCALES
:
[
800
]
SCALES
:
[
800
]
MAX_SIZE
:
1333
MAX_SIZE
:
1333
...
...
configs/retinanet/coco_retinanet_R-50-FPN_416_6x.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
retinanet
TYPE
:
'
retinanet'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
@@ -24,22 +24,21 @@ FPN:
...
@@ -24,22 +24,21 @@ FPN:
RPN_MAX_LEVEL
:
7
RPN_MAX_LEVEL
:
7
SOLVER
:
SOLVER
:
BASE_LR
:
0.01
BASE_LR
:
0.01
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
90000
,
120000
]
DECAY_STEPS
:
[
90000
,
120000
]
MAX_STEPS
:
135000
MAX_STEPS
:
135000
SNAPSHOT_EVERY
:
2500
SNAPSHOT_EVERY
:
2500
SNAPSHOT_PREFIX
:
coco_retinanet_R-50-FPN_416_6x
SNAPSHOT_PREFIX
:
'
coco_retinanet_R-50-FPN_416_6x'
PIPELINE
:
PIPELINE
:
TYPE
:
'
ssd'
TYPE
:
'
ssd'
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
8
IMS_PER_BATCH
:
8
SCALES
:
[
416
]
SCALES
:
[
416
]
USE_DIFF
:
False
# Do not use crowd objects
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
IMS_PER_BATCH
:
1
SCALES
:
[
416
]
SCALES
:
[
416
]
...
...
configs/retinanet/coco_retinanet_R-50-FPN_512_6x.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
retinanet
TYPE
:
'
retinanet'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
@@ -24,22 +24,21 @@ FPN:
...
@@ -24,22 +24,21 @@ FPN:
RPN_MAX_LEVEL
:
7
RPN_MAX_LEVEL
:
7
SOLVER
:
SOLVER
:
BASE_LR
:
0.01
BASE_LR
:
0.01
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
90000
,
120000
]
DECAY_STEPS
:
[
90000
,
120000
]
MAX_STEPS
:
135000
MAX_STEPS
:
135000
SNAPSHOT_EVERY
:
2500
SNAPSHOT_EVERY
:
2500
SNAPSHOT_PREFIX
:
coco_retinanet_R-50-FPN_512_6x
SNAPSHOT_PREFIX
:
'
coco_retinanet_R-50-FPN_512_6x'
PIPELINE
:
PIPELINE
:
TYPE
:
'
ssd'
TYPE
:
'
ssd'
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
8
IMS_PER_BATCH
:
8
SCALES
:
[
512
]
SCALES
:
[
512
]
USE_DIFF
:
False
# Do not use crowd objects
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
IMS_PER_BATCH
:
1
SCALES
:
[
512
]
SCALES
:
[
512
]
...
...
configs/retinanet/coco_retinanet_R-50-FPN_800_1x.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
retinanet
TYPE
:
'
retinanet'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
@@ -24,21 +24,20 @@ FPN:
...
@@ -24,21 +24,20 @@ FPN:
RPN_MAX_LEVEL
:
7
RPN_MAX_LEVEL
:
7
SOLVER
:
SOLVER
:
BASE_LR
:
0.01
BASE_LR
:
0.01
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
60000
,
80000
]
DECAY_STEPS
:
[
60000
,
80000
]
MAX_STEPS
:
90000
MAX_STEPS
:
90000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_retinanet_R-50-FPN_800_1x
SNAPSHOT_PREFIX
:
'
coco_retinanet_R-50-FPN_800_1x'
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
IMS_PER_BATCH
:
1
SCALES
:
[
800
]
SCALES
:
[
800
]
...
...
configs/retinanet/coco_retinanet_R-50-FPN_800_2x.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
...
@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
retinanet
TYPE
:
'
retinanet'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
person'
,
'
bicycle'
,
'
car'
,
'
motorcycle'
,
'
airplane'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
'
bus'
,
'
train'
,
'
truck'
,
'
boat'
,
'
traffic
light'
,
...
@@ -24,21 +24,20 @@ FPN:
...
@@ -24,21 +24,20 @@ FPN:
RPN_MAX_LEVEL
:
7
RPN_MAX_LEVEL
:
7
SOLVER
:
SOLVER
:
BASE_LR
:
0.01
BASE_LR
:
0.01
LR_POLICY
:
steps_with_decay
DECAY_STEPS
:
[
120000
,
160000
]
DECAY_STEPS
:
[
120000
,
160000
]
MAX_STEPS
:
180000
MAX_STEPS
:
180000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
coco_retinanet_R-50-FPN_800_2x
SNAPSHOT_PREFIX
:
'
coco_retinanet_R-50-FPN_800_2x'
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/R-50.pkl'
WEIGHTS
:
'
/model/R-50.pkl'
DATASET
:
'
/data/coco_201
4_trainval35k
'
DATASET
:
'
/data/coco_201
7_train
'
IMS_PER_BATCH
:
2
IMS_PER_BATCH
:
2
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
SCALES
:
[
640
,
672
,
704
,
736
,
768
,
800
]
MAX_SIZE
:
1333
MAX_SIZE
:
1333
USE_DIFF
:
False
# Do not use crowd objects
USE_DIFF
:
False
# Do not use crowd objects
TEST
:
TEST
:
DATASET
:
'
/data/coco_201
4_mini
val'
DATASET
:
'
/data/coco_201
7_
val'
JSON_FILE
:
'
/data/instances_
minival2014
.json'
JSON_FILE
:
'
/data/instances_
val2017
.json'
PROTOCOL
:
'
coco'
PROTOCOL
:
'
coco'
IMS_PER_BATCH
:
1
IMS_PER_BATCH
:
1
SCALES
:
[
800
]
SCALES
:
[
800
]
...
...
configs/retinanet/voc_retinanet_R-50-FPN_416.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 1
...
@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
retinanet
TYPE
:
'
retinanet'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
...
@@ -20,7 +20,7 @@ SOLVER:
...
@@ -20,7 +20,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_retinanet_R-50-FPN_416
SNAPSHOT_PREFIX
:
'
voc_retinanet_R-50-FPN_416'
PIPELINE
:
PIPELINE
:
TYPE
:
'
ssd'
TYPE
:
'
ssd'
TRAIN
:
TRAIN
:
...
...
configs/retinanet/voc_retinanet_R-50-FPN_512.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 2
...
@@ -2,8 +2,8 @@ NUM_GPUS: 2
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_STDS
:
[
57.375
,
57.12
,
58.395
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
retinanet
TYPE
:
'
retinanet'
BACKBONE
:
resnet50.fpn
BACKBONE
:
'
resnet50.fpn'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
...
@@ -20,7 +20,7 @@ SOLVER:
...
@@ -20,7 +20,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_retinanet_R-50-FPN_512
SNAPSHOT_PREFIX
:
'
voc_retinanet_R-50-FPN_512'
PIPELINE
:
PIPELINE
:
TYPE
:
'
ssd'
TYPE
:
'
ssd'
TRAIN
:
TRAIN
:
...
...
configs/ssd/voc_ssd_VGG-16_300.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 1
...
@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS
:
[
1.0
,
1.0
,
1.0
]
PIXEL_STDS
:
[
1.0
,
1.0
,
1.0
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
ssd
TYPE
:
'
ssd'
BACKBONE
:
vgg16_reduced_300
BACKBONE
:
'
vgg16_reduced_300'
COARSEST_STRIDE
:
0
COARSEST_STRIDE
:
0
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
...
@@ -31,7 +31,7 @@ SOLVER:
...
@@ -31,7 +31,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_ssd_VGG-16_300
SNAPSHOT_PREFIX
:
'
voc_ssd_VGG-16_300'
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/VGG16.SSD.pkl'
WEIGHTS
:
'
/model/VGG16.SSD.pkl'
DATASET
:
'
/data/voc_0712_trainval'
DATASET
:
'
/data/voc_0712_trainval'
...
...
configs/ssd/voc_ssd_VGG-16_512.yml
View file @
e3b9b64
...
@@ -2,8 +2,8 @@ NUM_GPUS: 2
...
@@ -2,8 +2,8 @@ NUM_GPUS: 2
PIXEL_STDS
:
[
1.0
,
1.0
,
1.0
]
PIXEL_STDS
:
[
1.0
,
1.0
,
1.0
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
PIXEL_MEANS
:
[
103.53
,
116.28
,
123.675
]
MODEL
:
MODEL
:
TYPE
:
ssd
TYPE
:
'
ssd'
BACKBONE
:
vgg16_reduced_512
BACKBONE
:
'
vgg16_reduced_512'
CLASSES
:
[
'
__background__'
,
CLASSES
:
[
'
__background__'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
aeroplane'
,
'
bicycle'
,
'
bird'
,
'
boat'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
'
bottle'
,
'
bus'
,
'
car'
,
'
cat'
,
'
chair'
,
...
@@ -32,7 +32,7 @@ SOLVER:
...
@@ -32,7 +32,7 @@ SOLVER:
DECAY_STEPS
:
[
80000
,
100000
]
DECAY_STEPS
:
[
80000
,
100000
]
MAX_STEPS
:
120000
MAX_STEPS
:
120000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_EVERY
:
5000
SNAPSHOT_PREFIX
:
voc_ssd_VGG-16_512
SNAPSHOT_PREFIX
:
'
voc_ssd_VGG-16_512'
TRAIN
:
TRAIN
:
WEIGHTS
:
'
/model/VGG16.SSD.pkl'
WEIGHTS
:
'
/model/VGG16.SSD.pkl'
DATASET
:
'
/data/voc_0712_trainval'
DATASET
:
'
/data/voc_0712_trainval'
...
...
scripts/coco/im2rec.py
View file @
e3b9b64
...
@@ -18,7 +18,7 @@ import os
...
@@ -18,7 +18,7 @@ import os
import
shutil
import
shutil
from
maker
import
make_record
from
maker
import
make_record
from
maskgen
import
make_mask
,
merge_mask
from
roidb
import
make_database
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
@@ -27,30 +27,25 @@ if __name__ == '__main__':
...
@@ -27,30 +27,25 @@ if __name__ == '__main__':
# Encode masks to RLE bytes
# Encode masks to RLE bytes
if
not
os
.
path
.
exists
(
'build'
):
if
not
os
.
path
.
exists
(
'build'
):
os
.
makedirs
(
'build'
)
os
.
makedirs
(
'build'
)
make_mask
(
'train'
,
'2014'
,
COCO_ROOT
)
make_database
(
'train'
,
'2017'
,
COCO_ROOT
)
make_mask
(
'valminusminival'
,
'2014'
,
COCO_ROOT
)
make_database
(
'val'
,
'2017'
,
COCO_ROOT
)
make_mask
(
'minival'
,
'2014'
,
COCO_ROOT
)
merge_mask
(
'trainval35k'
,
'2014'
,
[
'build/coco_2014_train_mask.pkl'
,
'build/coco_2014_valminusminival_mask.pkl'
])
# coco_201
4_trainval35k
# coco_201
7_train
make_record
(
make_record
(
record_file
=
os
.
path
.
join
(
COCO_ROOT
,
'coco_2014_trainval35k'
),
db_file
=
'build/coco_2017_train.db.pkl'
,
images_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'images/train2014'
),
record_file
=
os
.
path
.
join
(
COCO_ROOT
,
'coco_2017_train'
),
os
.
path
.
join
(
COCO_ROOT
,
'images/val2014'
)],
images_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'images/train2017'
)],
splits_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'splits'
),
splits_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'splits'
)],
os
.
path
.
join
(
COCO_ROOT
,
'splits'
)],
splits
=
[
'train2017'
],
mask_file
=
'build/coco_2014_trainval35k_mask.pkl'
,
splits
=
[
'train'
,
'valminusminival'
],
)
)
# coco_201
4_mini
val
# coco_201
7_
val
make_record
(
make_record
(
record_file
=
os
.
path
.
join
(
COCO_ROOT
,
'coco_2014_minival'
)
,
db_file
=
'build/coco_2017_val.db.pkl'
,
images_path
=
os
.
path
.
join
(
COCO_ROOT
,
'images/val2014
'
),
record_file
=
os
.
path
.
join
(
COCO_ROOT
,
'coco_2017_val
'
),
mask_file
=
'build/coco_2014_minival_mask.pkl'
,
images_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'images/val2017'
)]
,
splits_path
=
os
.
path
.
join
(
COCO_ROOT
,
'splits'
)
,
splits_path
=
[
os
.
path
.
join
(
COCO_ROOT
,
'splits'
)]
,
splits
=
[
'
minival
'
],
splits
=
[
'
val2017
'
],
)
)
shutil
.
rmtree
(
'build'
)
shutil
.
rmtree
(
'build'
)
scripts/coco/maker.py
View file @
e3b9b64
...
@@ -18,7 +18,7 @@ import dragon
...
@@ -18,7 +18,7 @@ import dragon
import
numpy
as
np
import
numpy
as
np
def
make_example
(
image_file
,
mask_
objects
,
im_scale
=
None
):
def
make_example
(
image_file
,
objects
,
im_scale
=
None
):
filename
=
os
.
path
.
split
(
image_file
)[
-
1
]
filename
=
os
.
path
.
split
(
image_file
)[
-
1
]
example
=
{
'id'
:
filename
.
split
(
'.'
)[
0
],
'object'
:
[]}
example
=
{
'id'
:
filename
.
split
(
'.'
)[
0
],
'object'
:
[]}
...
@@ -39,7 +39,7 @@ def make_example(image_file, mask_objects, im_scale=None):
...
@@ -39,7 +39,7 @@ def make_example(image_file, mask_objects, im_scale=None):
example
[
'height'
],
example
[
'width'
],
example
[
'depth'
]
=
img
.
shape
example
[
'height'
],
example
[
'width'
],
example
[
'depth'
]
=
img
.
shape
example
[
'content'
]
=
img_bytes
example
[
'content'
]
=
img_bytes
for
ix
,
obj
in
enumerate
(
mask_objects
)
:
for
obj
in
objects
:
x1
,
y1
,
x2
,
y2
=
obj
[
'bbox'
]
x1
,
y1
,
x2
,
y2
=
obj
[
'bbox'
]
example
[
'object'
]
.
append
({
example
[
'object'
]
.
append
({
'name'
:
obj
[
'name'
],
'name'
:
obj
[
'name'
],
...
@@ -58,7 +58,7 @@ def make_example(image_file, mask_objects, im_scale=None):
...
@@ -58,7 +58,7 @@ def make_example(image_file, mask_objects, im_scale=None):
def
make_record
(
def
make_record
(
record_file
,
record_file
,
images_path
,
images_path
,
mask
_file
,
db
_file
,
splits_path
,
splits_path
,
splits
,
splits
,
ext
=
'.jpg'
,
ext
=
'.jpg'
,
...
@@ -75,11 +75,11 @@ def make_record(
...
@@ -75,11 +75,11 @@ def make_record(
assert
len
(
splits
)
==
len
(
splits_path
)
assert
len
(
splits
)
==
len
(
splits_path
)
assert
len
(
splits
)
==
len
(
images_path
)
assert
len
(
splits
)
==
len
(
images_path
)
if
mask
_file
is
not
None
:
if
db
_file
is
not
None
:
with
open
(
mask
_file
,
'rb'
)
as
f
:
with
open
(
db
_file
,
'rb'
)
as
f
:
all_
mask
s
=
pickle
.
load
(
f
)
all_
entrie
s
=
pickle
.
load
(
f
)
else
:
else
:
all_
mask
s
=
{}
all_
entrie
s
=
{}
print
(
'Start Time:'
,
time
.
strftime
(
"
%
a,
%
d
%
b
%
Y
%
H:
%
M:
%
S"
,
time
.
gmtime
()))
print
(
'Start Time:'
,
time
.
strftime
(
"
%
a,
%
d
%
b
%
Y
%
H:
%
M:
%
S"
,
time
.
gmtime
()))
...
@@ -133,8 +133,8 @@ def make_record(
...
@@ -133,8 +133,8 @@ def make_record(
count
,
total_line
,
now_time
-
start_time
))
count
,
total_line
,
now_time
-
start_time
))
filename
=
line
.
strip
()
filename
=
line
.
strip
()
image_file
=
os
.
path
.
join
(
images_path
[
db_idx
],
filename
+
ext
)
image_file
=
os
.
path
.
join
(
images_path
[
db_idx
],
filename
+
ext
)
mask_objects
=
all_masks
[
filename
]
if
filename
in
all_mask
s
else
{}
objects
=
all_entries
[
filename
]
if
filename
in
all_entrie
s
else
{}
writer
.
write
(
make_example
(
image_file
,
mask_
objects
,
im_scale
))
writer
.
write
(
make_example
(
image_file
,
objects
,
im_scale
))
now_time
=
time
.
time
()
now_time
=
time
.
time
()
print
(
'{} / {} in {:.2f} sec'
.
format
(
count
,
total_line
,
now_time
-
start_time
))
print
(
'{} / {} in {:.2f} sec'
.
format
(
count
,
total_line
,
now_time
-
start_time
))
...
...
scripts/coco/
maskgen
.py
→
scripts/coco/
roidb
.py
View file @
e3b9b64
...
@@ -74,11 +74,13 @@ class COCOWrapper(object):
...
@@ -74,11 +74,13 @@ class COCOWrapper(object):
def
image_path_from_index
(
self
,
index
):
def
image_path_from_index
(
self
,
index
):
"""Construct an image path from the image's "index" identifier."""
"""Construct an image path from the image's "index" identifier."""
# Example image path for index=119993:
# Example image path for index=119993:
# images/train2014/COCO_train2014_000000119993.jpg
# images/train2014/COCO_train2014_000000119993.jpg
file_name
=
(
'COCO_'
+
self
.
_data_name
+
'_'
+
# images/train2017/000000119993.jpg
str
(
index
)
.
zfill
(
12
)
+
'.jpg'
)
filename
=
str
(
index
)
.
zfill
(
12
)
+
'.jpg'
if
'2014'
in
self
.
_data_name
:
filename
=
'COCO_{}_{}'
.
format
(
self
.
_data_name
,
filename
)
image_path
=
osp
.
join
(
self
.
_data_path
,
'images'
,
image_path
=
osp
.
join
(
self
.
_data_path
,
'images'
,
self
.
_data_name
,
file
_
name
)
self
.
_data_name
,
filename
)
assert
osp
.
exists
(
image_path
),
\
assert
osp
.
exists
(
image_path
),
\
'Path does not exist: {}'
.
format
(
image_path
)
'Path does not exist: {}'
.
format
(
image_path
)
return
image_path
return
image_path
...
@@ -99,19 +101,18 @@ class COCOWrapper(object):
...
@@ -99,19 +101,18 @@ class COCOWrapper(object):
objects
=
self
.
_COCO
.
loadAnns
(
ann_ids
)
objects
=
self
.
_COCO
.
loadAnns
(
ann_ids
)
# Sanitize boxes -- some are invalid
# Sanitize boxes -- some are invalid
valid_objects
=
[]
valid_objects
=
[]
mask
,
polygons
=
b
''
,
[]
for
obj
in
objects
:
for
obj
in
objects
:
x1
=
float
(
max
(
0
,
obj
[
'bbox'
][
0
]))
x1
=
float
(
max
(
0
,
obj
[
'bbox'
][
0
]))
y1
=
float
(
max
(
0
,
obj
[
'bbox'
][
1
]))
y1
=
float
(
max
(
0
,
obj
[
'bbox'
][
1
]))
x2
=
float
(
min
(
width
-
1
,
x1
+
max
(
0
,
obj
[
'bbox'
][
2
]
-
1
)))
x2
=
float
(
min
(
width
-
1
,
x1
+
max
(
0
,
obj
[
'bbox'
][
2
]
-
1
)))
y2
=
float
(
min
(
height
-
1
,
y1
+
max
(
0
,
obj
[
'bbox'
][
3
]
-
1
)))
y2
=
float
(
min
(
height
-
1
,
y1
+
max
(
0
,
obj
[
'bbox'
][
3
]
-
1
)))
mask
,
polygons
=
b
''
,
[]
if
isinstance
(
obj
[
'segmentation'
],
list
):
if
isinstance
(
obj
[
'segmentation'
],
list
):
for
p
in
obj
[
'segmentation'
]:
for
p
in
obj
[
'segmentation'
]:
if
len
(
p
)
<
6
:
if
len
(
p
)
<
6
:
print
(
'Remove Invalid segm.'
)
print
(
'Remove Invalid segm.'
)
# Valid polygons have >= 3 points, so require >= 6 coordinates
# Valid polygons have >= 3 points, so require >= 6 coordinates
polygons
=
[
p
for
p
in
obj
[
'segmentation'
]
if
len
(
p
)
>=
6
]
polygons
=
[
p
for
p
in
obj
[
'segmentation'
]
if
len
(
p
)
>=
6
]
# mask_bytes = mask_utils.poly2bytes(poly, height, width)
else
:
else
:
# Crowd masks
# Crowd masks
# Some are encoded with height or width
# Some are encoded with height or width
...
@@ -141,25 +142,26 @@ class COCOWrapper(object):
...
@@ -141,25 +142,26 @@ class COCOWrapper(object):
return
len
(
self
.
_classes
)
return
len
(
self
.
_classes
)
def
make_
mask
(
split
,
year
,
data_dir
):
def
make_
database
(
split
,
year
,
data_dir
):
coco
=
COCOWrapper
(
split
,
year
,
data_dir
)
coco
=
COCOWrapper
(
split
,
year
,
data_dir
)
print
(
'Preparing to make split: {}, total {} images'
print
(
'Preparing to make split: {}, total {} images'
.
format
(
split
,
coco
.
num_images
))
.
format
(
split
,
coco
.
num_images
))
if
not
osp
.
exists
(
osp
.
join
(
coco
.
_data_path
,
'splits'
)):
if
not
osp
.
exists
(
osp
.
join
(
coco
.
_data_path
,
'splits'
)):
os
.
makedirs
(
osp
.
join
(
coco
.
_data_path
,
'splits'
))
os
.
makedirs
(
osp
.
join
(
coco
.
_data_path
,
'splits'
))
gt_rec
s
=
collections
.
OrderedDict
()
entrie
s
=
collections
.
OrderedDict
()
for
i
in
range
(
coco
.
num_images
):
for
i
in
range
(
coco
.
num_images
):
filename
=
osp
.
basename
(
coco
.
image_path_at
(
i
))
.
split
(
'.'
)[
0
]
filename
=
osp
.
basename
(
coco
.
image_path_at
(
i
))
.
split
(
'.'
)[
0
]
h
,
w
,
objects
=
coco
.
annotation_at
(
i
)
h
,
w
,
objects
=
coco
.
annotation_at
(
i
)
gt_rec
s
[
filename
]
=
objects
entrie
s
[
filename
]
=
objects
with
open
(
osp
.
join
(
'build'
,
with
open
(
osp
.
join
(
'build'
,
'coco_'
+
year
+
'coco_'
+
year
+
'_'
+
split
+
'
_'
+
split
+
'_mask
.pkl'
),
'wb'
)
as
f
:
'
.db
.pkl'
),
'wb'
)
as
f
:
pickle
.
dump
(
gt_rec
s
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
pickle
.
dump
(
entrie
s
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
with
open
(
osp
.
join
(
coco
.
_data_path
,
'splits'
,
split
+
'.txt'
),
'w'
)
as
f
:
with
open
(
osp
.
join
(
coco
.
_data_path
,
'splits'
,
split
+
year
+
'.txt'
),
'w'
)
as
f
:
for
i
in
range
(
coco
.
num_images
):
for
i
in
range
(
coco
.
num_images
):
filename
=
str
(
osp
.
basename
(
coco
.
image_path_at
(
i
))
.
split
(
'.'
)[
0
])
filename
=
str
(
osp
.
basename
(
coco
.
image_path_at
(
i
))
.
split
(
'.'
)[
0
])
if
i
!=
coco
.
num_images
-
1
:
if
i
!=
coco
.
num_images
-
1
:
...
@@ -167,16 +169,16 @@ def make_mask(split, year, data_dir):
...
@@ -167,16 +169,16 @@ def make_mask(split, year, data_dir):
f
.
write
(
filename
)
f
.
write
(
filename
)
def
merge_
mask
(
split
,
year
,
mask
_files
):
def
merge_
database
(
split
,
year
,
db
_files
):
gt_rec
s
=
collections
.
OrderedDict
()
entrie
s
=
collections
.
OrderedDict
()
data_path
=
os
.
path
.
dirname
(
mask
_files
[
0
])
data_path
=
os
.
path
.
dirname
(
db
_files
[
0
])
for
mask_file
in
mask
_files
:
for
db_file
in
db
_files
:
with
open
(
mask
_file
,
'rb'
)
as
f
:
with
open
(
db
_file
,
'rb'
)
as
f
:
rec
s
=
pickle
.
load
(
f
)
entrie
s
=
pickle
.
load
(
f
)
gt_recs
.
update
(
rec
s
)
entries
.
update
(
entrie
s
)
with
open
(
osp
.
join
(
data_path
,
with
open
(
osp
.
join
(
data_path
,
'coco_'
+
year
+
'coco_'
+
year
+
'_'
+
split
+
'
_'
+
split
+
'_mask
.pkl'
),
'wb'
)
as
f
:
'
.db
.pkl'
),
'wb'
)
as
f
:
pickle
.
dump
(
gt_rec
s
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
pickle
.
dump
(
entrie
s
,
f
,
pickle
.
HIGHEST_PROTOCOL
)
seetadet/algo/faster_rcnn/data_transformer.py
View file @
e3b9b64
...
@@ -27,6 +27,8 @@ from seetadet.utils import image as image_util
...
@@ -27,6 +27,8 @@ from seetadet.utils import image as image_util
class
DataTransformer
(
multiprocessing
.
Process
):
class
DataTransformer
(
multiprocessing
.
Process
):
"""DataTransformer."""
def
__init__
(
self
,
**
kwargs
):
def
__init__
(
self
,
**
kwargs
):
super
(
DataTransformer
,
self
)
.
__init__
()
super
(
DataTransformer
,
self
)
.
__init__
()
self
.
_scales
=
cfg
.
TRAIN
.
SCALES
self
.
_scales
=
cfg
.
TRAIN
.
SCALES
...
@@ -43,7 +45,7 @@ class DataTransformer(multiprocessing.Process):
...
@@ -43,7 +45,7 @@ class DataTransformer(multiprocessing.Process):
self
.
q_in
=
self
.
q_out
=
None
self
.
q_in
=
self
.
q_out
=
None
self
.
daemon
=
True
self
.
daemon
=
True
def
get_boxes
(
self
,
example
,
im_scale
):
def
get_boxes
(
self
,
example
,
im_scale
,
flipped
):
objects
,
num_objects
=
example
.
objects
,
0
objects
,
num_objects
=
example
.
objects
,
0
height
,
width
=
example
.
height
,
example
.
width
height
,
width
=
example
.
height
,
example
.
width
if
not
self
.
_use_diff
:
if
not
self
.
_use_diff
:
...
@@ -56,7 +58,7 @@ class DataTransformer(multiprocessing.Process):
...
@@ -56,7 +58,7 @@ class DataTransformer(multiprocessing.Process):
boxes
=
np
.
zeros
((
num_objects
,
4
),
'float32'
)
boxes
=
np
.
zeros
((
num_objects
,
4
),
'float32'
)
gt_classes
=
np
.
zeros
((
num_objects
,),
'float32'
)
gt_classes
=
np
.
zeros
((
num_objects
,),
'float32'
)
# Filter the difficult instances
# Filter the difficult instances
.
object_idx
=
0
object_idx
=
0
for
obj
in
objects
:
for
obj
in
objects
:
if
not
self
.
_use_diff
and
obj
.
get
(
'difficult'
,
0
)
>
0
:
if
not
self
.
_use_diff
and
obj
.
get
(
'difficult'
,
0
)
>
0
:
...
@@ -69,10 +71,14 @@ class DataTransformer(multiprocessing.Process):
...
@@ -69,10 +71,14 @@ class DataTransformer(multiprocessing.Process):
gt_classes
[
object_idx
]
=
self
.
_class_to_ind
[
obj
[
'name'
]]
gt_classes
[
object_idx
]
=
self
.
_class_to_ind
[
obj
[
'name'
]]
object_idx
+=
1
object_idx
+=
1
# Scale the boxes to the detecting scale
# Flip the boxes if necessary.
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
width
)
# Scale the boxes to the detecting scale.
boxes
*=
im_scale
boxes
*=
im_scale
# Attach the classes
# Attach the classes
.
gt_boxes
=
np
.
empty
((
num_objects
,
5
),
dtype
=
np
.
float32
)
gt_boxes
=
np
.
empty
((
num_objects
,
5
),
dtype
=
np
.
float32
)
gt_boxes
[:,
:
4
],
gt_boxes
[:,
4
]
=
boxes
,
gt_classes
gt_boxes
[:,
:
4
],
gt_boxes
[:,
4
]
=
boxes
,
gt_classes
...
@@ -81,7 +87,7 @@ class DataTransformer(multiprocessing.Process):
...
@@ -81,7 +87,7 @@ class DataTransformer(multiprocessing.Process):
def
get
(
self
,
example
):
def
get
(
self
,
example
):
example
=
Example
(
example
)
example
=
Example
(
example
)
# Resize
# Resize
.
img
,
im_scale
=
image_util
.
resize_image_with_target_size
(
img
,
im_scale
=
image_util
.
resize_image_with_target_size
(
example
.
image
,
example
.
image
,
target_size
=
npr
.
choice
(
self
.
_scales
),
target_size
=
npr
.
choice
(
self
.
_scales
),
...
@@ -89,22 +95,18 @@ class DataTransformer(multiprocessing.Process):
...
@@ -89,22 +95,18 @@ class DataTransformer(multiprocessing.Process):
random_scales
=
self
.
_random_scales
,
random_scales
=
self
.
_random_scales
,
)
)
# Flip
# Flip
.
flipped
=
False
flipped
=
False
if
self
.
_use_flipped
and
npr
.
randint
(
2
)
>
0
:
if
self
.
_use_flipped
and
npr
.
randint
(
2
)
>
0
:
img
=
img
[:,
::
-
1
]
img
=
img
[:,
::
-
1
]
flipped
=
True
flipped
=
True
# Distort
# Distort
.
if
self
.
_use_distort
:
if
self
.
_use_distort
:
img
=
image_util
.
distort_image
(
img
)
img
=
image_util
.
distort_image
(
img
)
# Boxes
# Boxes.
boxes
=
self
.
get_boxes
(
example
,
im_scale
)
boxes
=
self
.
get_boxes
(
example
,
im_scale
,
flipped
)
# Flip the boxes if necessary
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
img
.
shape
[
1
])
# Standard outputs.
# Standard outputs.
outputs
=
{
'image'
:
img
,
outputs
=
{
'image'
:
img
,
...
...
seetadet/algo/mask_rcnn/data_transformer.py
View file @
e3b9b64
...
@@ -28,6 +28,8 @@ from seetadet.utils import image as image_util
...
@@ -28,6 +28,8 @@ from seetadet.utils import image as image_util
class
DataTransformer
(
multiprocessing
.
Process
):
class
DataTransformer
(
multiprocessing
.
Process
):
"""DataTransformer."""
def
__init__
(
self
,
**
kwargs
):
def
__init__
(
self
,
**
kwargs
):
super
(
DataTransformer
,
self
)
.
__init__
()
super
(
DataTransformer
,
self
)
.
__init__
()
self
.
_scales
=
cfg
.
TRAIN
.
SCALES
self
.
_scales
=
cfg
.
TRAIN
.
SCALES
...
@@ -81,6 +83,10 @@ class DataTransformer(multiprocessing.Process):
...
@@ -81,6 +83,10 @@ class DataTransformer(multiprocessing.Process):
gt_classes
[
object_idx
]
=
self
.
_class_to_ind
[
obj
[
'name'
]]
gt_classes
[
object_idx
]
=
self
.
_class_to_ind
[
obj
[
'name'
]]
object_idx
+=
1
object_idx
+=
1
# Flip the boxes if necessary.
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
width
)
# Scale the boxes to the detecting scale.
# Scale the boxes to the detecting scale.
boxes
*=
im_scale
boxes
*=
im_scale
...
@@ -115,10 +121,6 @@ class DataTransformer(multiprocessing.Process):
...
@@ -115,10 +121,6 @@ class DataTransformer(multiprocessing.Process):
# Boxes and segmentations.
# Boxes and segmentations.
boxes
,
segms
=
self
.
get_boxes_and_segms
(
example
,
im_scale
,
flipped
)
boxes
,
segms
=
self
.
get_boxes_and_segms
(
example
,
im_scale
,
flipped
)
# Flip the boxes if necessary.
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
img
.
shape
[
1
])
# Standard outputs.
# Standard outputs.
outputs
=
{
'image'
:
img
,
outputs
=
{
'image'
:
img
,
'boxes'
:
boxes
,
'boxes'
:
boxes
,
...
...
seetadet/algo/mask_rcnn/proposal_target.py
View file @
e3b9b64
...
@@ -124,37 +124,37 @@ class ProposalTarget(object):
...
@@ -124,37 +124,37 @@ class ProposalTarget(object):
def
compute_targets
(
def
compute_targets
(
ex_
rois
,
rois
,
gt_
roi
s
,
gt_
boxe
s
,
gt_labels
,
gt_labels
,
gt
_segms
,
fg
_segms
,
mask_flags
,
fg_segms_flag
,
mask_size
,
mask_size
,
im_scale
,
im_scale
,
):
):
"""Compute the bounding-box regression targets."""
"""Compute the bounding-box regression targets."""
assert
ex_rois
.
shape
[
0
]
==
gt_roi
s
.
shape
[
0
]
assert
rois
.
shape
[
0
]
==
gt_boxe
s
.
shape
[
0
]
assert
ex_
rois
.
shape
[
1
]
==
4
assert
rois
.
shape
[
1
]
==
4
assert
gt_
roi
s
.
shape
[
1
]
==
4
assert
gt_
boxe
s
.
shape
[
1
]
==
4
# Compute bbox regression targets
# Compute bbox regression targets
fg_inds
=
np
.
where
(
gt_labels
>
0
)[
0
]
fg_inds
=
np
.
where
(
gt_labels
>
0
)[
0
]
bbox_targets
=
box_util
.
bbox_transform
(
bbox_targets
=
box_util
.
bbox_transform
(
rois
,
gt_boxes
,
cfg
.
BBOX_REG_WEIGHTS
)
ex_rois
,
gt_rois
,
cfg
.
BBOX_REG_WEIGHTS
)
# Compute mask classification targets
# Compute mask classification targets
mask_shape
=
[
mask_size
]
*
2
mask_shape
=
[
mask_size
]
*
2
ex_rois_ori
=
np
.
round
(
ex_rois
/
im_scale
)
.
astype
(
int
)
mask_targets
=
-
np
.
ones
([
len
(
rois
)]
+
mask_shape
,
'float32'
)
mask_targets
=
-
np
.
ones
([
len
(
gt_labels
)]
+
mask_shape
,
'float32'
)
rois_ori
=
rois
/
im_scale
for
i
in
fg_inds
:
rois_ori_int
=
np
.
round
(
rois_ori
)
.
astype
(
int
)
if
mask_flags
[
i
]
>
0
:
gt_boxes_ori_int
=
np
.
round
(
gt_boxes
/
im_scale
)
.
astype
(
int
)
if
isinstance
(
gt_segms
[
i
],
list
):
for
i
,
fg_idx
in
enumerate
(
fg_inds
):
ret
=
mask_util
.
warp_mask_via_polygons
(
if
fg_segms_flag
[
i
]
>
0
:
gt_segms
[
i
],
ex_rois_ori
[
i
],
mask_shape
)
if
isinstance
(
fg_segms
[
i
],
list
):
target
=
mask_util
.
warp_mask_via_polygons
(
fg_segms
[
i
],
rois_ori
[
i
],
mask_shape
)
else
:
else
:
gt_rois_ori
=
np
.
round
(
gt_rois
/
im_scale
)
.
astype
(
int
)
target
=
mask_util
.
warp_mask_via_intersection
(
ret
=
mask_util
.
warp_mask_via_intersection
(
fg_segms
[
i
],
rois_ori_int
[
i
],
gt_boxes_ori_int
[
i
],
mask_shape
)
gt_segms
[
i
],
ex_rois_ori
[
i
],
gt_rois_ori
[
i
],
mask_shape
)
if
target
is
not
None
:
if
ret
is
not
None
:
mask_targets
[
fg_idx
]
=
target
.
astype
(
mask_targets
.
dtype
)
mask_targets
[
i
]
=
ret
.
astype
(
'float32'
)
return
bbox_targets
,
mask_targets
return
bbox_targets
,
mask_targets
...
...
seetadet/algo/ssd/data_transformer.py
View file @
e3b9b64
...
@@ -27,6 +27,8 @@ from seetadet.utils import boxes as box_util
...
@@ -27,6 +27,8 @@ from seetadet.utils import boxes as box_util
class
DataTransformer
(
multiprocessing
.
Process
):
class
DataTransformer
(
multiprocessing
.
Process
):
"""DataTransformer."""
def
__init__
(
self
,
**
kwargs
):
def
__init__
(
self
,
**
kwargs
):
super
(
DataTransformer
,
self
)
.
__init__
()
super
(
DataTransformer
,
self
)
.
__init__
()
self
.
_scale
=
cfg
.
TRAIN
.
SCALES
[
0
]
self
.
_scale
=
cfg
.
TRAIN
.
SCALES
[
0
]
...
@@ -44,7 +46,7 @@ class DataTransformer(multiprocessing.Process):
...
@@ -44,7 +46,7 @@ class DataTransformer(multiprocessing.Process):
self
.
q_in
=
self
.
q_out
=
None
self
.
q_in
=
self
.
q_out
=
None
self
.
daemon
=
True
self
.
daemon
=
True
def
get_boxes
(
self
,
example
):
def
get_boxes
(
self
,
example
,
flipped
):
objects
,
num_objects
=
example
.
objects
,
0
objects
,
num_objects
=
example
.
objects
,
0
height
,
width
=
example
.
height
,
example
.
width
height
,
width
=
example
.
height
,
example
.
width
if
not
self
.
_use_diff
:
if
not
self
.
_use_diff
:
...
@@ -70,6 +72,10 @@ class DataTransformer(multiprocessing.Process):
...
@@ -70,6 +72,10 @@ class DataTransformer(multiprocessing.Process):
gt_classes
[
object_idx
]
=
self
.
_class_to_ind
[
obj
[
'name'
]]
gt_classes
[
object_idx
]
=
self
.
_class_to_ind
[
obj
[
'name'
]]
object_idx
+=
1
object_idx
+=
1
# Flip the boxes if necessary.
if
flipped
:
boxes
=
box_util
.
flip_boxes
(
boxes
,
width
)
# Normalize.
# Normalize.
boxes
[:,
0
::
2
]
/=
width
boxes
[:,
0
::
2
]
/=
width
boxes
[:,
1
::
2
]
/=
height
boxes
[:,
1
::
2
]
/=
height
...
@@ -82,25 +88,31 @@ class DataTransformer(multiprocessing.Process):
...
@@ -82,25 +88,31 @@ class DataTransformer(multiprocessing.Process):
def
get
(
self
,
example
):
def
get
(
self
,
example
):
example
=
Example
(
example
)
example
=
Example
(
example
)
img
=
example
.
image
# Flip.
flipped
=
False
if
self
.
_use_flipped
and
npr
.
randint
(
2
)
>
0
:
img
=
img
[:,
::
-
1
]
flipped
=
True
# Boxes.
# Boxes.
boxes
=
self
.
get_boxes
(
example
)
boxes
=
self
.
get_boxes
(
example
,
flipped
)
# Return to avoid the invalid transforms.
if
len
(
boxes
)
==
0
:
if
len
(
boxes
)
==
0
:
return
{
'boxes'
:
boxes
}
return
{
'boxes'
:
boxes
}
# Distort => Expand => Sample => Resize
# Distort => Expand => Sample => Resize
img
,
boxes
=
self
.
_apply_transform
(
example
.
image
,
boxes
)
img
,
boxes
=
self
.
_apply_transform
(
img
,
boxes
)
# Restore to the blob scale.
# Restore to the blob scale.
boxes
[:,
:
4
]
*=
self
.
_scale
boxes
[:,
:
4
]
*=
self
.
_scale
# Flip.
if
self
.
_use_flipped
and
npr
.
randint
(
2
)
>
0
:
img
=
img
[:,
::
-
1
]
boxes
=
box_util
.
flip_boxes
(
boxes
,
img
.
shape
[
1
])
# Standard outputs.
# Standard outputs.
outputs
=
{
'image'
:
img
,
'boxes'
:
boxes
,
'im_info'
:
img
.
shape
[:
2
]}
outputs
=
{
'image'
:
img
,
'boxes'
:
boxes
,
'im_info'
:
img
.
shape
[:
2
]}
# Attach precomputed targets.
# Attach precomputed targets.
if
len
(
boxes
)
>
0
:
if
len
(
boxes
)
>
0
:
...
...
seetadet/core/config.py
View file @
e3b9b64
...
@@ -333,10 +333,12 @@ __C.FRCNN.NEGATIVE_OVERLAP_HI = 0.5
...
@@ -333,10 +333,12 @@ __C.FRCNN.NEGATIVE_OVERLAP_HI = 0.5
__C
.
FRCNN
.
NEGATIVE_OVERLAP_LO
=
0.0
__C
.
FRCNN
.
NEGATIVE_OVERLAP_LO
=
0.0
# RoI transform function
# RoI transform function
# Values supported: 'RoIAlign', 'RoI
Align
'
# Values supported: 'RoIAlign', 'RoI
Pool
'
__C
.
FRCNN
.
ROI_XFORM_METHOD
=
'RoIAlign'
__C
.
FRCNN
.
ROI_XFORM_METHOD
=
'RoIAlign'
# RoI transform output resolution
# RoI transform output resolution
__C
.
FRCNN
.
ROI_XFORM_RESOLUTION
=
7
__C
.
FRCNN
.
ROI_XFORM_RESOLUTION
=
7
# Resampling window size for RoI transformation
# Resampling window size for RoI transformation
__C
.
FRCNN
.
ROI_XFORM_SAMPLING_RATIO
=
0
__C
.
FRCNN
.
ROI_XFORM_SAMPLING_RATIO
=
0
...
@@ -362,10 +364,12 @@ __C.MRCNN = AttrDict()
...
@@ -362,10 +364,12 @@ __C.MRCNN = AttrDict()
__C
.
MRCNN
.
RESOLUTION
=
28
__C
.
MRCNN
.
RESOLUTION
=
28
# RoI transform function
# RoI transform function
# Values supported: 'RoIAlign', 'RoI
Align
'
# Values supported: 'RoIAlign', 'RoI
Pool
'
__C
.
MRCNN
.
ROI_XFORM_METHOD
=
'RoIAlign'
__C
.
MRCNN
.
ROI_XFORM_METHOD
=
'RoIAlign'
# RoI transform output resolution
# RoI transform output resolution
__C
.
MRCNN
.
ROI_XFORM_RESOLUTION
=
14
__C
.
MRCNN
.
ROI_XFORM_RESOLUTION
=
14
# Resampling window size for RoI transformation
# Resampling window size for RoI transformation
__C
.
MRCNN
.
ROI_XFORM_SAMPLING_RATIO
=
0
__C
.
MRCNN
.
ROI_XFORM_SAMPLING_RATIO
=
0
...
@@ -438,6 +442,7 @@ __C.SOLVER.DISPLAY = 20
...
@@ -438,6 +442,7 @@ __C.SOLVER.DISPLAY = 20
# The interval to snapshot a model
# The interval to snapshot a model
__C
.
SOLVER
.
SNAPSHOT_EVERY
=
5000
__C
.
SOLVER
.
SNAPSHOT_EVERY
=
5000
# Prefix to yield the path: <prefix>_iter_XYZ.pkl
# Prefix to yield the path: <prefix>_iter_XYZ.pkl
__C
.
SOLVER
.
SNAPSHOT_PREFIX
=
''
__C
.
SOLVER
.
SNAPSHOT_PREFIX
=
''
...
@@ -451,25 +456,34 @@ __C.SOLVER.MAX_STEPS = 40000
...
@@ -451,25 +456,34 @@ __C.SOLVER.MAX_STEPS = 40000
# Base learning rate for the specified schedule
# Base learning rate for the specified schedule
__C
.
SOLVER
.
BASE_LR
=
0.001
__C
.
SOLVER
.
BASE_LR
=
0.001
# The uniform interval for LRScheduler
# The uniform interval for LRScheduler
__C
.
SOLVER
.
DECAY_STEP
=
1
__C
.
SOLVER
.
DECAY_STEP
=
1
# The custom intervals for LRScheduler
# The custom intervals for LRScheduler
__C
.
SOLVER
.
DECAY_STEPS
=
[]
__C
.
SOLVER
.
DECAY_STEPS
=
[]
# The decay factor for exponential LRScheduler
# The decay factor for exponential LRScheduler
__C
.
SOLVER
.
DECAY_GAMMA
=
0.1
__C
.
SOLVER
.
DECAY_GAMMA
=
0.1
# Warm up to ``BASE_LR`` over this number of steps
# Warm up to ``BASE_LR`` over this number of steps
__C
.
SOLVER
.
WARM_UP_STEPS
=
500
__C
.
SOLVER
.
WARM_UP_STEPS
=
500
# Start the warm up from ``BASE_LR`` * ``FACTOR``
# Start the warm up from ``BASE_LR`` * ``FACTOR``
__C
.
SOLVER
.
WARM_UP_FACTOR
=
0.333
__C
.
SOLVER
.
WARM_UP_FACTOR
=
0.333
# The type of LRScheduler
# The type of LRScheduler
__C
.
SOLVER
.
LR_POLICY
=
'steps_with_decay'
__C
.
SOLVER
.
LR_POLICY
=
'steps_with_decay'
# Momentum to use with SGD
# Momentum to use with SGD
__C
.
SOLVER
.
MOMENTUM
=
0.9
__C
.
SOLVER
.
MOMENTUM
=
0.9
# L2 regularization for weight parameters
# L2 regularization for weight parameters
__C
.
SOLVER
.
WEIGHT_DECAY
=
0.0001
__C
.
SOLVER
.
WEIGHT_DECAY
=
0.0001
# L2 regularization for legacy bias parameters
# L2 regularization for legacy bias parameters
__C
.
SOLVER
.
WEIGHT_DECAY_BIAS
=
0.0
__C
.
SOLVER
.
WEIGHT_DECAY_BIAS
=
0.0
# L2 norm factor for clipping gradients
# L2 norm factor for clipping gradients
__C
.
SOLVER
.
CLIP_NORM
=
0.0
__C
.
SOLVER
.
CLIP_NORM
=
0.0
...
...
seetadet/utils/mask.py
View file @
e3b9b64
...
@@ -14,6 +14,8 @@ from __future__ import absolute_import
...
@@ -14,6 +14,8 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
import
copy
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
import
PIL.Image
import
PIL.Image
...
@@ -37,32 +39,37 @@ def warp_mask_via_intersection(mask, box1, box2, size):
...
@@ -37,32 +39,37 @@ def warp_mask_via_intersection(mask, box1, box2, size):
inter_mask
=
mask
[
y1
:
y2
+
1
,
x1
:
x2
+
1
]
inter_mask
=
mask
[
y1
:
y2
+
1
,
x1
:
x2
+
1
]
target_h
=
box1
[
3
]
-
box1
[
1
]
+
1
target_h
=
box1
[
3
]
-
box1
[
1
]
+
1
target_w
=
box1
[
2
]
-
box1
[
0
]
+
1
target_w
=
box1
[
2
]
-
box1
[
0
]
+
1
warped_mask
=
np
.
zeros
((
target_h
,
target_w
),
dtype
=
mask
.
dtype
)
warped_mask
=
np
.
zeros
((
target_h
,
target_w
),
dtype
=
'uint8'
)
warped_mask
[
ex_start_y
:
ex_start_y
+
h
,
warped_mask
[
ex_start_y
:
ex_start_y
+
h
,
ex_start_x
:
ex_start_x
+
w
]
=
inter_mask
ex_start_x
:
ex_start_x
+
w
]
=
inter_mask
if
not
isinstance
(
size
,
(
tuple
,
list
)):
if
not
isinstance
(
size
,
(
tuple
,
list
)):
size
=
(
size
,
size
)
size
=
(
size
,
size
)
mask
=
PIL
.
Image
.
fromarray
(
warped_mask
)
mask
=
PIL
.
Image
.
fromarray
(
warped_mask
)
return
np
.
array
(
mask
.
resize
((
size
[
1
],
size
[
0
]),
PIL
.
Image
.
NEAREST
))
mask
=
mask
.
resize
((
size
[
1
],
size
[
0
]),
PIL
.
Image
.
NEAREST
)
return
np
.
array
(
mask
)
def
warp_mask_via_polygons
(
polygons
,
box
,
size
):
def
warp_mask_via_polygons
(
polygons
,
box
,
size
):
"""Warp mask via polygons."""
"""Warp mask via polygons."""
w
=
np
.
maximum
(
box
[
2
]
-
box
[
0
],
1
)
w
,
h
=
box
[
2
]
-
box
[
0
],
box
[
3
]
-
box
[
1
]
h
=
np
.
maximum
(
box
[
3
]
-
box
[
1
],
1
)
if
not
isinstance
(
size
,
(
tuple
,
list
)):
if
not
isinstance
(
size
,
(
tuple
,
list
)):
size
=
(
size
,
size
)
size
=
(
size
,
size
)
polygons_norm
=
[]
ratio_h
=
size
[
0
]
/
max
(
h
,
0.1
)
for
poly
in
polygons
:
ratio_w
=
size
[
1
]
/
max
(
w
,
0.1
)
p
=
np
.
array
(
poly
,
dtype
=
np
.
float32
)
polygons
=
copy
.
deepcopy
(
polygons
)
p
[
0
::
2
]
=
(
p
[
0
::
2
]
-
box
[
0
])
*
size
[
1
]
/
w
for
p
in
polygons
:
p
[
1
::
2
]
=
(
p
[
1
::
2
]
-
box
[
1
])
*
size
[
0
]
/
h
p
[
0
::
2
]
=
p
[
0
::
2
]
-
box
[
0
]
polygons_norm
.
append
(
p
)
p
[
1
::
2
]
=
p
[
1
::
2
]
-
box
[
1
]
rle
=
mask_tools
.
frPyObjects
(
polygons_norm
,
size
[
0
],
size
[
1
])
if
ratio_h
==
ratio_w
:
mask
=
np
.
array
(
mask_tools
.
decode
(
rle
))
for
p
in
polygons
:
mask
=
np
.
sum
(
mask
,
axis
=
2
)
p
*=
ratio_h
mask
=
np
.
array
(
mask
>
0
)
else
:
return
mask
for
p
in
polygons
:
p
[
0
::
2
]
*=
ratio_w
p
[
1
::
2
]
*=
ratio_h
rle_objs
=
mask_tools
.
frPyObjects
(
polygons
,
size
[
0
],
size
[
1
])
rle_objs
=
[
mask_tools
.
merge
(
rle_objs
)]
return
mask_tools
.
decode
(
rle_objs
)[:,
:,
0
]
def
mask_overlap
(
box1
,
box2
,
mask1
,
mask2
):
def
mask_overlap
(
box1
,
box2
,
mask1
,
mask2
):
...
@@ -148,7 +155,7 @@ def project_masks(
...
@@ -148,7 +155,7 @@ def project_masks(
w
=
np
.
maximum
(
w
,
1
)
w
=
np
.
maximum
(
w
,
1
)
h
=
np
.
maximum
(
h
,
1
)
h
=
np
.
maximum
(
h
,
1
)
mask
=
cv2
.
resize
(
padded_mask
,
(
w
,
h
))
mask
=
cv2
.
resize
(
padded_mask
,
(
w
,
h
))
mask
=
np
.
array
(
mask
>
thresh
,
'uint8'
)
mask
=
np
.
array
(
mask
>
=
thresh
,
'uint8'
)
x1
=
max
(
ref_box
[
0
],
0
)
x1
=
max
(
ref_box
[
0
],
0
)
y1
=
max
(
ref_box
[
1
],
0
)
y1
=
max
(
ref_box
[
1
],
0
)
x2
=
min
(
ref_box
[
2
]
+
1
,
width
)
x2
=
min
(
ref_box
[
2
]
+
1
,
width
)
...
...
setup.py
View file @
e3b9b64
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
# You should have received a copy of the BSD 2-Clause License
# You should have received a copy of the BSD 2-Clause License
# along with the software. If not, See,
# along with the software. If not, See,
#
#
#
<https://opensource.org/licenses/BSD-2-Clause>
# <https://opensource.org/licenses/BSD-2-Clause>
#
#
# ------------------------------------------------------------
# ------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
Attach a file
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to post a comment