Commit e3b9b641 by Ting PAN

Fix the bug of scaling flipped box

1 parent 9d12d142
...@@ -2,8 +2,8 @@ NUM_GPUS: 8 ...@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: faster_rcnn TYPE: 'faster_rcnn'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
...@@ -21,24 +21,23 @@ MODEL: ...@@ -21,24 +21,23 @@ MODEL:
'teddy bear', 'hair drier', 'toothbrush'] 'teddy bear', 'hair drier', 'toothbrush']
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
LR_POLICY: steps_with_decay
DECAY_STEPS: [60000, 80000] DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000 MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn_R-50-FPN_800_1x SNAPSHOT_PREFIX: 'coco_faster_rcnn_R-50-FPN_800_1x'
FRCNN: FRCNN:
BATCH_SIZE: 512 BATCH_SIZE: 512
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.pkl' WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800] SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
SCALES: [800] SCALES: [800]
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 8 ...@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: faster_rcnn TYPE: 'faster_rcnn'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
...@@ -21,24 +21,23 @@ MODEL: ...@@ -21,24 +21,23 @@ MODEL:
'teddy bear', 'hair drier', 'toothbrush'] 'teddy bear', 'hair drier', 'toothbrush']
SOLVER: SOLVER:
BASE_LR: 0.02 BASE_LR: 0.02
LR_POLICY: steps_with_decay
DECAY_STEPS: [120000, 160000] DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000 MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn_R-50-FPN_800_2x SNAPSHOT_PREFIX: 'coco_faster_rcnn_R-50-FPN_800_2x'
FRCNN: FRCNN:
BATCH_SIZE: 512 BATCH_SIZE: 512
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.pkl' WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800] SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
SCALES: [800] SCALES: [800]
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 1 ...@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: faster_rcnn TYPE: 'faster_rcnn'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
...@@ -18,7 +18,7 @@ SOLVER: ...@@ -18,7 +18,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000] DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn_R-50-FPN_640 SNAPSHOT_PREFIX: 'voc_faster_rcnn_R-50-FPN_640'
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.pkl' WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 8 ...@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: mask_rcnn TYPE: 'mask_rcnn'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
...@@ -24,7 +24,7 @@ SOLVER: ...@@ -24,7 +24,7 @@ SOLVER:
DECAY_STEPS: [60000, 80000] DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000 MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_mask_rcnn_R-50-FPN_800_1x SNAPSHOT_PREFIX: 'coco_mask_rcnn_R-50-FPN_800_1x'
FRCNN: FRCNN:
BATCH_SIZE: 512 BATCH_SIZE: 512
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
...@@ -32,14 +32,14 @@ MRCNN: ...@@ -32,14 +32,14 @@ MRCNN:
ROI_XFORM_RESOLUTION: 14 ROI_XFORM_RESOLUTION: 14
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.pkl' WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800] SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 8 ...@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: mask_rcnn TYPE: 'mask_rcnn'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
...@@ -24,7 +24,7 @@ SOLVER: ...@@ -24,7 +24,7 @@ SOLVER:
DECAY_STEPS: [120000, 160000] DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000 MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_mask_rcnn_R-50-FPN_800_2x SNAPSHOT_PREFIX: 'coco_mask_rcnn_R-50-FPN_800_2x'
FRCNN: FRCNN:
BATCH_SIZE: 512 BATCH_SIZE: 512
ROI_XFORM_RESOLUTION: 7 ROI_XFORM_RESOLUTION: 7
...@@ -32,14 +32,14 @@ MRCNN: ...@@ -32,14 +32,14 @@ MRCNN:
ROI_XFORM_RESOLUTION: 14 ROI_XFORM_RESOLUTION: 14
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.pkl' WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800] SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
SCALES: [800] SCALES: [800]
MAX_SIZE: 1333 MAX_SIZE: 1333
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 8 ...@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: retinanet TYPE: 'retinanet'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
...@@ -24,22 +24,21 @@ FPN: ...@@ -24,22 +24,21 @@ FPN:
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
SOLVER: SOLVER:
BASE_LR: 0.01 BASE_LR: 0.01
LR_POLICY: steps_with_decay
DECAY_STEPS: [90000, 120000] DECAY_STEPS: [90000, 120000]
MAX_STEPS: 135000 MAX_STEPS: 135000
SNAPSHOT_EVERY: 2500 SNAPSHOT_EVERY: 2500
SNAPSHOT_PREFIX: coco_retinanet_R-50-FPN_416_6x SNAPSHOT_PREFIX: 'coco_retinanet_R-50-FPN_416_6x'
PIPELINE: PIPELINE:
TYPE: 'ssd' TYPE: 'ssd'
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.pkl' WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
SCALES: [416] SCALES: [416]
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
SCALES: [416] SCALES: [416]
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 8 ...@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: retinanet TYPE: 'retinanet'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
...@@ -24,22 +24,21 @@ FPN: ...@@ -24,22 +24,21 @@ FPN:
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
SOLVER: SOLVER:
BASE_LR: 0.01 BASE_LR: 0.01
LR_POLICY: steps_with_decay
DECAY_STEPS: [90000, 120000] DECAY_STEPS: [90000, 120000]
MAX_STEPS: 135000 MAX_STEPS: 135000
SNAPSHOT_EVERY: 2500 SNAPSHOT_EVERY: 2500
SNAPSHOT_PREFIX: coco_retinanet_R-50-FPN_512_6x SNAPSHOT_PREFIX: 'coco_retinanet_R-50-FPN_512_6x'
PIPELINE: PIPELINE:
TYPE: 'ssd' TYPE: 'ssd'
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.pkl' WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 8 IMS_PER_BATCH: 8
SCALES: [512] SCALES: [512]
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
SCALES: [512] SCALES: [512]
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 8 ...@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: retinanet TYPE: 'retinanet'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
...@@ -24,21 +24,20 @@ FPN: ...@@ -24,21 +24,20 @@ FPN:
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
SOLVER: SOLVER:
BASE_LR: 0.01 BASE_LR: 0.01
LR_POLICY: steps_with_decay
DECAY_STEPS: [60000, 80000] DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000 MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_R-50-FPN_800_1x SNAPSHOT_PREFIX: 'coco_retinanet_R-50-FPN_800_1x'
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.pkl' WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800] SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
SCALES: [800] SCALES: [800]
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 8 ...@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: retinanet TYPE: 'retinanet'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light', 'bus', 'train', 'truck', 'boat', 'traffic light',
...@@ -24,21 +24,20 @@ FPN: ...@@ -24,21 +24,20 @@ FPN:
RPN_MAX_LEVEL: 7 RPN_MAX_LEVEL: 7
SOLVER: SOLVER:
BASE_LR: 0.01 BASE_LR: 0.01
LR_POLICY: steps_with_decay
DECAY_STEPS: [120000, 160000] DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000 MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_R-50-FPN_800_2x SNAPSHOT_PREFIX: 'coco_retinanet_R-50-FPN_800_2x'
TRAIN: TRAIN:
WEIGHTS: '/model/R-50.pkl' WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k' DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2 IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800] SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333 MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects USE_DIFF: False # Do not use crowd objects
TEST: TEST:
DATASET: '/data/coco_2014_minival' DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_minival2014.json' JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco' PROTOCOL: 'coco'
IMS_PER_BATCH: 1 IMS_PER_BATCH: 1
SCALES: [800] SCALES: [800]
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 1 ...@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: retinanet TYPE: 'retinanet'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
...@@ -20,7 +20,7 @@ SOLVER: ...@@ -20,7 +20,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000] DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_R-50-FPN_416 SNAPSHOT_PREFIX: 'voc_retinanet_R-50-FPN_416'
PIPELINE: PIPELINE:
TYPE: 'ssd' TYPE: 'ssd'
TRAIN: TRAIN:
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 2 ...@@ -2,8 +2,8 @@ NUM_GPUS: 2
PIXEL_STDS: [57.375, 57.12, 58.395] PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: retinanet TYPE: 'retinanet'
BACKBONE: resnet50.fpn BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
...@@ -20,7 +20,7 @@ SOLVER: ...@@ -20,7 +20,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000] DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_R-50-FPN_512 SNAPSHOT_PREFIX: 'voc_retinanet_R-50-FPN_512'
PIPELINE: PIPELINE:
TYPE: 'ssd' TYPE: 'ssd'
TRAIN: TRAIN:
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 1 ...@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS: [1.0, 1.0, 1.0] PIXEL_STDS: [1.0, 1.0, 1.0]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: ssd TYPE: 'ssd'
BACKBONE: vgg16_reduced_300 BACKBONE: 'vgg16_reduced_300'
COARSEST_STRIDE: 0 COARSEST_STRIDE: 0
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
...@@ -31,7 +31,7 @@ SOLVER: ...@@ -31,7 +31,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000] DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_VGG-16_300 SNAPSHOT_PREFIX: 'voc_ssd_VGG-16_300'
TRAIN: TRAIN:
WEIGHTS: '/model/VGG16.SSD.pkl' WEIGHTS: '/model/VGG16.SSD.pkl'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
......
...@@ -2,8 +2,8 @@ NUM_GPUS: 2 ...@@ -2,8 +2,8 @@ NUM_GPUS: 2
PIXEL_STDS: [1.0, 1.0, 1.0] PIXEL_STDS: [1.0, 1.0, 1.0]
PIXEL_MEANS: [103.53, 116.28, 123.675] PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL: MODEL:
TYPE: ssd TYPE: 'ssd'
BACKBONE: vgg16_reduced_512 BACKBONE: 'vgg16_reduced_512'
CLASSES: ['__background__', CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat', 'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair', 'bottle', 'bus', 'car', 'cat', 'chair',
...@@ -32,7 +32,7 @@ SOLVER: ...@@ -32,7 +32,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000] DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000 MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000 SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_VGG-16_512 SNAPSHOT_PREFIX: 'voc_ssd_VGG-16_512'
TRAIN: TRAIN:
WEIGHTS: '/model/VGG16.SSD.pkl' WEIGHTS: '/model/VGG16.SSD.pkl'
DATASET: '/data/voc_0712_trainval' DATASET: '/data/voc_0712_trainval'
......
...@@ -18,7 +18,7 @@ import os ...@@ -18,7 +18,7 @@ import os
import shutil import shutil
from maker import make_record from maker import make_record
from maskgen import make_mask, merge_mask from roidb import make_database
if __name__ == '__main__': if __name__ == '__main__':
...@@ -27,30 +27,25 @@ if __name__ == '__main__': ...@@ -27,30 +27,25 @@ if __name__ == '__main__':
# Encode masks to RLE bytes # Encode masks to RLE bytes
if not os.path.exists('build'): if not os.path.exists('build'):
os.makedirs('build') os.makedirs('build')
make_mask('train', '2014', COCO_ROOT) make_database('train', '2017', COCO_ROOT)
make_mask('valminusminival', '2014', COCO_ROOT) make_database('val', '2017', COCO_ROOT)
make_mask('minival', '2014', COCO_ROOT)
merge_mask('trainval35k', '2014', ['build/coco_2014_train_mask.pkl',
'build/coco_2014_valminusminival_mask.pkl'])
# coco_2014_trainval35k # coco_2017_train
make_record( make_record(
record_file=os.path.join(COCO_ROOT, 'coco_2014_trainval35k'), db_file='build/coco_2017_train.db.pkl',
images_path=[os.path.join(COCO_ROOT, 'images/train2014'), record_file=os.path.join(COCO_ROOT, 'coco_2017_train'),
os.path.join(COCO_ROOT, 'images/val2014')], images_path=[os.path.join(COCO_ROOT, 'images/train2017')],
splits_path=[os.path.join(COCO_ROOT, 'splits'), splits_path=[os.path.join(COCO_ROOT, 'splits')],
os.path.join(COCO_ROOT, 'splits')], splits=['train2017'],
mask_file='build/coco_2014_trainval35k_mask.pkl',
splits=['train', 'valminusminival'],
) )
# coco_2014_minival # coco_2017_val
make_record( make_record(
record_file=os.path.join(COCO_ROOT, 'coco_2014_minival'), db_file='build/coco_2017_val.db.pkl',
images_path=os.path.join(COCO_ROOT, 'images/val2014'), record_file=os.path.join(COCO_ROOT, 'coco_2017_val'),
mask_file='build/coco_2014_minival_mask.pkl', images_path=[os.path.join(COCO_ROOT, 'images/val2017')],
splits_path=os.path.join(COCO_ROOT, 'splits'), splits_path=[os.path.join(COCO_ROOT, 'splits')],
splits=['minival'], splits=['val2017'],
) )
shutil.rmtree('build') shutil.rmtree('build')
...@@ -18,7 +18,7 @@ import dragon ...@@ -18,7 +18,7 @@ import dragon
import numpy as np import numpy as np
def make_example(image_file, mask_objects, im_scale=None): def make_example(image_file, objects, im_scale=None):
filename = os.path.split(image_file)[-1] filename = os.path.split(image_file)[-1]
example = {'id': filename.split('.')[0], 'object': []} example = {'id': filename.split('.')[0], 'object': []}
...@@ -39,7 +39,7 @@ def make_example(image_file, mask_objects, im_scale=None): ...@@ -39,7 +39,7 @@ def make_example(image_file, mask_objects, im_scale=None):
example['height'], example['width'], example['depth'] = img.shape example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes example['content'] = img_bytes
for ix, obj in enumerate(mask_objects): for obj in objects:
x1, y1, x2, y2 = obj['bbox'] x1, y1, x2, y2 = obj['bbox']
example['object'].append({ example['object'].append({
'name': obj['name'], 'name': obj['name'],
...@@ -58,7 +58,7 @@ def make_example(image_file, mask_objects, im_scale=None): ...@@ -58,7 +58,7 @@ def make_example(image_file, mask_objects, im_scale=None):
def make_record( def make_record(
record_file, record_file,
images_path, images_path,
mask_file, db_file,
splits_path, splits_path,
splits, splits,
ext='.jpg', ext='.jpg',
...@@ -75,11 +75,11 @@ def make_record( ...@@ -75,11 +75,11 @@ def make_record(
assert len(splits) == len(splits_path) assert len(splits) == len(splits_path)
assert len(splits) == len(images_path) assert len(splits) == len(images_path)
if mask_file is not None: if db_file is not None:
with open(mask_file, 'rb') as f: with open(db_file, 'rb') as f:
all_masks = pickle.load(f) all_entries = pickle.load(f)
else: else:
all_masks = {} all_entries = {}
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
...@@ -133,8 +133,8 @@ def make_record( ...@@ -133,8 +133,8 @@ def make_record(
count, total_line, now_time - start_time)) count, total_line, now_time - start_time))
filename = line.strip() filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + ext) image_file = os.path.join(images_path[db_idx], filename + ext)
mask_objects = all_masks[filename] if filename in all_masks else {} objects = all_entries[filename] if filename in all_entries else {}
writer.write(make_example(image_file, mask_objects, im_scale)) writer.write(make_example(image_file, objects, im_scale))
now_time = time.time() now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time)) print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
......
...@@ -75,10 +75,12 @@ class COCOWrapper(object): ...@@ -75,10 +75,12 @@ class COCOWrapper(object):
"""Construct an image path from the image's "index" identifier.""" """Construct an image path from the image's "index" identifier."""
# Example image path for index=119993: # Example image path for index=119993:
# images/train2014/COCO_train2014_000000119993.jpg # images/train2014/COCO_train2014_000000119993.jpg
file_name = ('COCO_' + self._data_name + '_' + # images/train2017/000000119993.jpg
str(index).zfill(12) + '.jpg') filename = str(index).zfill(12) + '.jpg'
if '2014' in self._data_name:
filename = 'COCO_{}_{}'.format(self._data_name, filename)
image_path = osp.join(self._data_path, 'images', image_path = osp.join(self._data_path, 'images',
self._data_name, file_name) self._data_name, filename)
assert osp.exists(image_path), \ assert osp.exists(image_path), \
'Path does not exist: {}'.format(image_path) 'Path does not exist: {}'.format(image_path)
return image_path return image_path
...@@ -99,19 +101,18 @@ class COCOWrapper(object): ...@@ -99,19 +101,18 @@ class COCOWrapper(object):
objects = self._COCO.loadAnns(ann_ids) objects = self._COCO.loadAnns(ann_ids)
# Sanitize boxes -- some are invalid # Sanitize boxes -- some are invalid
valid_objects = [] valid_objects = []
mask, polygons = b'', []
for obj in objects: for obj in objects:
x1 = float(max(0, obj['bbox'][0])) x1 = float(max(0, obj['bbox'][0]))
y1 = float(max(0, obj['bbox'][1])) y1 = float(max(0, obj['bbox'][1]))
x2 = float(min(width - 1, x1 + max(0, obj['bbox'][2] - 1))) x2 = float(min(width - 1, x1 + max(0, obj['bbox'][2] - 1)))
y2 = float(min(height - 1, y1 + max(0, obj['bbox'][3] - 1))) y2 = float(min(height - 1, y1 + max(0, obj['bbox'][3] - 1)))
mask, polygons = b'', []
if isinstance(obj['segmentation'], list): if isinstance(obj['segmentation'], list):
for p in obj['segmentation']: for p in obj['segmentation']:
if len(p) < 6: if len(p) < 6:
print('Remove Invalid segm.') print('Remove Invalid segm.')
# Valid polygons have >= 3 points, so require >= 6 coordinates # Valid polygons have >= 3 points, so require >= 6 coordinates
polygons = [p for p in obj['segmentation'] if len(p) >= 6] polygons = [p for p in obj['segmentation'] if len(p) >= 6]
# mask_bytes = mask_utils.poly2bytes(poly, height, width)
else: else:
# Crowd masks # Crowd masks
# Some are encoded with height or width # Some are encoded with height or width
...@@ -141,25 +142,26 @@ class COCOWrapper(object): ...@@ -141,25 +142,26 @@ class COCOWrapper(object):
return len(self._classes) return len(self._classes)
def make_mask(split, year, data_dir): def make_database(split, year, data_dir):
coco = COCOWrapper(split, year, data_dir) coco = COCOWrapper(split, year, data_dir)
print('Preparing to make split: {}, total {} images' print('Preparing to make split: {}, total {} images'
.format(split, coco.num_images)) .format(split, coco.num_images))
if not osp.exists(osp.join(coco._data_path, 'splits')): if not osp.exists(osp.join(coco._data_path, 'splits')):
os.makedirs(osp.join(coco._data_path, 'splits')) os.makedirs(osp.join(coco._data_path, 'splits'))
gt_recs = collections.OrderedDict() entries = collections.OrderedDict()
for i in range(coco.num_images): for i in range(coco.num_images):
filename = osp.basename(coco.image_path_at(i)).split('.')[0] filename = osp.basename(coco.image_path_at(i)).split('.')[0]
h, w, objects = coco.annotation_at(i) h, w, objects = coco.annotation_at(i)
gt_recs[filename] = objects entries[filename] = objects
with open(osp.join('build', with open(osp.join('build',
'coco_' + year + 'coco_' + year + '_' + split +
'_' + split + '_mask.pkl'), 'wb') as f: '.db.pkl'), 'wb') as f:
pickle.dump(gt_recs, f, pickle.HIGHEST_PROTOCOL) pickle.dump(entries, f, pickle.HIGHEST_PROTOCOL)
with open(osp.join(coco._data_path, 'splits', split + '.txt'), 'w') as f: with open(osp.join(coco._data_path, 'splits',
split + year + '.txt'), 'w') as f:
for i in range(coco.num_images): for i in range(coco.num_images):
filename = str(osp.basename(coco.image_path_at(i)).split('.')[0]) filename = str(osp.basename(coco.image_path_at(i)).split('.')[0])
if i != coco.num_images - 1: if i != coco.num_images - 1:
...@@ -167,16 +169,16 @@ def make_mask(split, year, data_dir): ...@@ -167,16 +169,16 @@ def make_mask(split, year, data_dir):
f.write(filename) f.write(filename)
def merge_mask(split, year, mask_files): def merge_database(split, year, db_files):
gt_recs = collections.OrderedDict() entries = collections.OrderedDict()
data_path = os.path.dirname(mask_files[0]) data_path = os.path.dirname(db_files[0])
for mask_file in mask_files: for db_file in db_files:
with open(mask_file, 'rb') as f: with open(db_file, 'rb') as f:
recs = pickle.load(f) entries = pickle.load(f)
gt_recs.update(recs) entries.update(entries)
with open(osp.join(data_path, with open(osp.join(data_path,
'coco_' + year + 'coco_' + year + '_' + split +
'_' + split + '_mask.pkl'), 'wb') as f: '.db.pkl'), 'wb') as f:
pickle.dump(gt_recs, f, pickle.HIGHEST_PROTOCOL) pickle.dump(entries, f, pickle.HIGHEST_PROTOCOL)
...@@ -27,6 +27,8 @@ from seetadet.utils import image as image_util ...@@ -27,6 +27,8 @@ from seetadet.utils import image as image_util
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
"""DataTransformer."""
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._scales = cfg.TRAIN.SCALES self._scales = cfg.TRAIN.SCALES
...@@ -43,7 +45,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -43,7 +45,7 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q_out = None self.q_in = self.q_out = None
self.daemon = True self.daemon = True
def get_boxes(self, example, im_scale): def get_boxes(self, example, im_scale, flipped):
objects, num_objects = example.objects, 0 objects, num_objects = example.objects, 0
height, width = example.height, example.width height, width = example.height, example.width
if not self._use_diff: if not self._use_diff:
...@@ -56,7 +58,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -56,7 +58,7 @@ class DataTransformer(multiprocessing.Process):
boxes = np.zeros((num_objects, 4), 'float32') boxes = np.zeros((num_objects, 4), 'float32')
gt_classes = np.zeros((num_objects,), 'float32') gt_classes = np.zeros((num_objects,), 'float32')
# Filter the difficult instances # Filter the difficult instances.
object_idx = 0 object_idx = 0
for obj in objects: for obj in objects:
if not self._use_diff and obj.get('difficult', 0) > 0: if not self._use_diff and obj.get('difficult', 0) > 0:
...@@ -69,10 +71,14 @@ class DataTransformer(multiprocessing.Process): ...@@ -69,10 +71,14 @@ class DataTransformer(multiprocessing.Process):
gt_classes[object_idx] = self._class_to_ind[obj['name']] gt_classes[object_idx] = self._class_to_ind[obj['name']]
object_idx += 1 object_idx += 1
# Scale the boxes to the detecting scale # Flip the boxes if necessary.
if flipped:
boxes = box_util.flip_boxes(boxes, width)
# Scale the boxes to the detecting scale.
boxes *= im_scale boxes *= im_scale
# Attach the classes # Attach the classes.
gt_boxes = np.empty((num_objects, 5), dtype=np.float32) gt_boxes = np.empty((num_objects, 5), dtype=np.float32)
gt_boxes[:, :4], gt_boxes[:, 4] = boxes, gt_classes gt_boxes[:, :4], gt_boxes[:, 4] = boxes, gt_classes
...@@ -81,7 +87,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -81,7 +87,7 @@ class DataTransformer(multiprocessing.Process):
def get(self, example): def get(self, example):
example = Example(example) example = Example(example)
# Resize # Resize.
img, im_scale = image_util.resize_image_with_target_size( img, im_scale = image_util.resize_image_with_target_size(
example.image, example.image,
target_size=npr.choice(self._scales), target_size=npr.choice(self._scales),
...@@ -89,22 +95,18 @@ class DataTransformer(multiprocessing.Process): ...@@ -89,22 +95,18 @@ class DataTransformer(multiprocessing.Process):
random_scales=self._random_scales, random_scales=self._random_scales,
) )
# Flip # Flip.
flipped = False flipped = False
if self._use_flipped and npr.randint(2) > 0: if self._use_flipped and npr.randint(2) > 0:
img = img[:, ::-1] img = img[:, ::-1]
flipped = True flipped = True
# Distort # Distort.
if self._use_distort: if self._use_distort:
img = image_util.distort_image(img) img = image_util.distort_image(img)
# Boxes # Boxes.
boxes = self.get_boxes(example, im_scale) boxes = self.get_boxes(example, im_scale, flipped)
# Flip the boxes if necessary
if flipped:
boxes = box_util.flip_boxes(boxes, img.shape[1])
# Standard outputs. # Standard outputs.
outputs = {'image': img, outputs = {'image': img,
......
...@@ -28,6 +28,8 @@ from seetadet.utils import image as image_util ...@@ -28,6 +28,8 @@ from seetadet.utils import image as image_util
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
"""DataTransformer."""
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._scales = cfg.TRAIN.SCALES self._scales = cfg.TRAIN.SCALES
...@@ -81,6 +83,10 @@ class DataTransformer(multiprocessing.Process): ...@@ -81,6 +83,10 @@ class DataTransformer(multiprocessing.Process):
gt_classes[object_idx] = self._class_to_ind[obj['name']] gt_classes[object_idx] = self._class_to_ind[obj['name']]
object_idx += 1 object_idx += 1
# Flip the boxes if necessary.
if flipped:
boxes = box_util.flip_boxes(boxes, width)
# Scale the boxes to the detecting scale. # Scale the boxes to the detecting scale.
boxes *= im_scale boxes *= im_scale
...@@ -115,10 +121,6 @@ class DataTransformer(multiprocessing.Process): ...@@ -115,10 +121,6 @@ class DataTransformer(multiprocessing.Process):
# Boxes and segmentations. # Boxes and segmentations.
boxes, segms = self.get_boxes_and_segms(example, im_scale, flipped) boxes, segms = self.get_boxes_and_segms(example, im_scale, flipped)
# Flip the boxes if necessary.
if flipped:
boxes = box_util.flip_boxes(boxes, img.shape[1])
# Standard outputs. # Standard outputs.
outputs = {'image': img, outputs = {'image': img,
'boxes': boxes, 'boxes': boxes,
......
...@@ -124,37 +124,37 @@ class ProposalTarget(object): ...@@ -124,37 +124,37 @@ class ProposalTarget(object):
def compute_targets( def compute_targets(
ex_rois, rois,
gt_rois, gt_boxes,
gt_labels, gt_labels,
gt_segms, fg_segms,
mask_flags, fg_segms_flag,
mask_size, mask_size,
im_scale, im_scale,
): ):
"""Compute the bounding-box regression targets.""" """Compute the bounding-box regression targets."""
assert ex_rois.shape[0] == gt_rois.shape[0] assert rois.shape[0] == gt_boxes.shape[0]
assert ex_rois.shape[1] == 4 assert rois.shape[1] == 4
assert gt_rois.shape[1] == 4 assert gt_boxes.shape[1] == 4
# Compute bbox regression targets # Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0] fg_inds = np.where(gt_labels > 0)[0]
bbox_targets = box_util.bbox_transform( bbox_targets = box_util.bbox_transform(rois, gt_boxes, cfg.BBOX_REG_WEIGHTS)
ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
# Compute mask classification targets # Compute mask classification targets
mask_shape = [mask_size] * 2 mask_shape = [mask_size] * 2
ex_rois_ori = np.round(ex_rois / im_scale).astype(int) mask_targets = -np.ones([len(rois)] + mask_shape, 'float32')
mask_targets = -np.ones([len(gt_labels)] + mask_shape, 'float32') rois_ori = rois / im_scale
for i in fg_inds: rois_ori_int = np.round(rois_ori).astype(int)
if mask_flags[i] > 0: gt_boxes_ori_int = np.round(gt_boxes / im_scale).astype(int)
if isinstance(gt_segms[i], list): for i, fg_idx in enumerate(fg_inds):
ret = mask_util.warp_mask_via_polygons( if fg_segms_flag[i] > 0:
gt_segms[i], ex_rois_ori[i], mask_shape) if isinstance(fg_segms[i], list):
target = mask_util.warp_mask_via_polygons(
fg_segms[i], rois_ori[i], mask_shape)
else: else:
gt_rois_ori = np.round(gt_rois / im_scale).astype(int) target = mask_util.warp_mask_via_intersection(
ret = mask_util.warp_mask_via_intersection( fg_segms[i], rois_ori_int[i], gt_boxes_ori_int[i], mask_shape)
gt_segms[i], ex_rois_ori[i], gt_rois_ori[i], mask_shape) if target is not None:
if ret is not None: mask_targets[fg_idx] = target.astype(mask_targets.dtype)
mask_targets[i] = ret.astype('float32')
return bbox_targets, mask_targets return bbox_targets, mask_targets
......
...@@ -27,6 +27,8 @@ from seetadet.utils import boxes as box_util ...@@ -27,6 +27,8 @@ from seetadet.utils import boxes as box_util
class DataTransformer(multiprocessing.Process): class DataTransformer(multiprocessing.Process):
"""DataTransformer."""
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DataTransformer, self).__init__() super(DataTransformer, self).__init__()
self._scale = cfg.TRAIN.SCALES[0] self._scale = cfg.TRAIN.SCALES[0]
...@@ -44,7 +46,7 @@ class DataTransformer(multiprocessing.Process): ...@@ -44,7 +46,7 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q_out = None self.q_in = self.q_out = None
self.daemon = True self.daemon = True
def get_boxes(self, example): def get_boxes(self, example, flipped):
objects, num_objects = example.objects, 0 objects, num_objects = example.objects, 0
height, width = example.height, example.width height, width = example.height, example.width
if not self._use_diff: if not self._use_diff:
...@@ -70,6 +72,10 @@ class DataTransformer(multiprocessing.Process): ...@@ -70,6 +72,10 @@ class DataTransformer(multiprocessing.Process):
gt_classes[object_idx] = self._class_to_ind[obj['name']] gt_classes[object_idx] = self._class_to_ind[obj['name']]
object_idx += 1 object_idx += 1
# Flip the boxes if necessary.
if flipped:
boxes = box_util.flip_boxes(boxes, width)
# Normalize. # Normalize.
boxes[:, 0::2] /= width boxes[:, 0::2] /= width
boxes[:, 1::2] /= height boxes[:, 1::2] /= height
...@@ -82,25 +88,31 @@ class DataTransformer(multiprocessing.Process): ...@@ -82,25 +88,31 @@ class DataTransformer(multiprocessing.Process):
def get(self, example): def get(self, example):
example = Example(example) example = Example(example)
img = example.image
# Flip.
flipped = False
if self._use_flipped and npr.randint(2) > 0:
img = img[:, ::-1]
flipped = True
# Boxes. # Boxes.
boxes = self.get_boxes(example) boxes = self.get_boxes(example, flipped)
# Return to avoid the invalid transforms.
if len(boxes) == 0: if len(boxes) == 0:
return {'boxes': boxes} return {'boxes': boxes}
# Distort => Expand => Sample => Resize # Distort => Expand => Sample => Resize
img, boxes = self._apply_transform(example.image, boxes) img, boxes = self._apply_transform(img, boxes)
# Restore to the blob scale. # Restore to the blob scale.
boxes[:, :4] *= self._scale boxes[:, :4] *= self._scale
# Flip.
if self._use_flipped and npr.randint(2) > 0:
img = img[:, ::-1]
boxes = box_util.flip_boxes(boxes, img.shape[1])
# Standard outputs. # Standard outputs.
outputs = {'image': img, 'boxes': boxes, 'im_info': img.shape[:2]} outputs = {'image': img,
'boxes': boxes,
'im_info': img.shape[:2]}
# Attach precomputed targets. # Attach precomputed targets.
if len(boxes) > 0: if len(boxes) > 0:
......
...@@ -333,10 +333,12 @@ __C.FRCNN.NEGATIVE_OVERLAP_HI = 0.5 ...@@ -333,10 +333,12 @@ __C.FRCNN.NEGATIVE_OVERLAP_HI = 0.5
__C.FRCNN.NEGATIVE_OVERLAP_LO = 0.0 __C.FRCNN.NEGATIVE_OVERLAP_LO = 0.0
# RoI transform function # RoI transform function
# Values supported: 'RoIAlign', 'RoIAlign' # Values supported: 'RoIAlign', 'RoIPool'
__C.FRCNN.ROI_XFORM_METHOD = 'RoIAlign' __C.FRCNN.ROI_XFORM_METHOD = 'RoIAlign'
# RoI transform output resolution # RoI transform output resolution
__C.FRCNN.ROI_XFORM_RESOLUTION = 7 __C.FRCNN.ROI_XFORM_RESOLUTION = 7
# Resampling window size for RoI transformation # Resampling window size for RoI transformation
__C.FRCNN.ROI_XFORM_SAMPLING_RATIO = 0 __C.FRCNN.ROI_XFORM_SAMPLING_RATIO = 0
...@@ -362,10 +364,12 @@ __C.MRCNN = AttrDict() ...@@ -362,10 +364,12 @@ __C.MRCNN = AttrDict()
__C.MRCNN.RESOLUTION = 28 __C.MRCNN.RESOLUTION = 28
# RoI transform function # RoI transform function
# Values supported: 'RoIAlign', 'RoIAlign' # Values supported: 'RoIAlign', 'RoIPool'
__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign' __C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
# RoI transform output resolution # RoI transform output resolution
__C.MRCNN.ROI_XFORM_RESOLUTION = 14 __C.MRCNN.ROI_XFORM_RESOLUTION = 14
# Resampling window size for RoI transformation # Resampling window size for RoI transformation
__C.MRCNN.ROI_XFORM_SAMPLING_RATIO = 0 __C.MRCNN.ROI_XFORM_SAMPLING_RATIO = 0
...@@ -438,6 +442,7 @@ __C.SOLVER.DISPLAY = 20 ...@@ -438,6 +442,7 @@ __C.SOLVER.DISPLAY = 20
# The interval to snapshot a model # The interval to snapshot a model
__C.SOLVER.SNAPSHOT_EVERY = 5000 __C.SOLVER.SNAPSHOT_EVERY = 5000
# Prefix to yield the path: <prefix>_iter_XYZ.pkl # Prefix to yield the path: <prefix>_iter_XYZ.pkl
__C.SOLVER.SNAPSHOT_PREFIX = '' __C.SOLVER.SNAPSHOT_PREFIX = ''
...@@ -451,25 +456,34 @@ __C.SOLVER.MAX_STEPS = 40000 ...@@ -451,25 +456,34 @@ __C.SOLVER.MAX_STEPS = 40000
# Base learning rate for the specified schedule # Base learning rate for the specified schedule
__C.SOLVER.BASE_LR = 0.001 __C.SOLVER.BASE_LR = 0.001
# The uniform interval for LRScheduler # The uniform interval for LRScheduler
__C.SOLVER.DECAY_STEP = 1 __C.SOLVER.DECAY_STEP = 1
# The custom intervals for LRScheduler # The custom intervals for LRScheduler
__C.SOLVER.DECAY_STEPS = [] __C.SOLVER.DECAY_STEPS = []
# The decay factor for exponential LRScheduler # The decay factor for exponential LRScheduler
__C.SOLVER.DECAY_GAMMA = 0.1 __C.SOLVER.DECAY_GAMMA = 0.1
# Warm up to ``BASE_LR`` over this number of steps # Warm up to ``BASE_LR`` over this number of steps
__C.SOLVER.WARM_UP_STEPS = 500 __C.SOLVER.WARM_UP_STEPS = 500
# Start the warm up from ``BASE_LR`` * ``FACTOR`` # Start the warm up from ``BASE_LR`` * ``FACTOR``
__C.SOLVER.WARM_UP_FACTOR = 0.333 __C.SOLVER.WARM_UP_FACTOR = 0.333
# The type of LRScheduler # The type of LRScheduler
__C.SOLVER.LR_POLICY = 'steps_with_decay' __C.SOLVER.LR_POLICY = 'steps_with_decay'
# Momentum to use with SGD # Momentum to use with SGD
__C.SOLVER.MOMENTUM = 0.9 __C.SOLVER.MOMENTUM = 0.9
# L2 regularization for weight parameters # L2 regularization for weight parameters
__C.SOLVER.WEIGHT_DECAY = 0.0001 __C.SOLVER.WEIGHT_DECAY = 0.0001
# L2 regularization for legacy bias parameters # L2 regularization for legacy bias parameters
__C.SOLVER.WEIGHT_DECAY_BIAS = 0.0 __C.SOLVER.WEIGHT_DECAY_BIAS = 0.0
# L2 norm factor for clipping gradients # L2 norm factor for clipping gradients
__C.SOLVER.CLIP_NORM = 0.0 __C.SOLVER.CLIP_NORM = 0.0
......
...@@ -14,6 +14,8 @@ from __future__ import absolute_import ...@@ -14,6 +14,8 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import copy
import cv2 import cv2
import numpy as np import numpy as np
import PIL.Image import PIL.Image
...@@ -37,32 +39,37 @@ def warp_mask_via_intersection(mask, box1, box2, size): ...@@ -37,32 +39,37 @@ def warp_mask_via_intersection(mask, box1, box2, size):
inter_mask = mask[y1:y2 + 1, x1:x2 + 1] inter_mask = mask[y1:y2 + 1, x1:x2 + 1]
target_h = box1[3] - box1[1] + 1 target_h = box1[3] - box1[1] + 1
target_w = box1[2] - box1[0] + 1 target_w = box1[2] - box1[0] + 1
warped_mask = np.zeros((target_h, target_w), dtype=mask.dtype) warped_mask = np.zeros((target_h, target_w), dtype='uint8')
warped_mask[ex_start_y:ex_start_y + h, warped_mask[ex_start_y:ex_start_y + h,
ex_start_x:ex_start_x + w] = inter_mask ex_start_x:ex_start_x + w] = inter_mask
if not isinstance(size, (tuple, list)): if not isinstance(size, (tuple, list)):
size = (size, size) size = (size, size)
mask = PIL.Image.fromarray(warped_mask) mask = PIL.Image.fromarray(warped_mask)
return np.array(mask.resize((size[1], size[0]), PIL.Image.NEAREST)) mask = mask.resize((size[1], size[0]), PIL.Image.NEAREST)
return np.array(mask)
def warp_mask_via_polygons(polygons, box, size): def warp_mask_via_polygons(polygons, box, size):
"""Warp mask via polygons.""" """Warp mask via polygons."""
w = np.maximum(box[2] - box[0], 1) w, h = box[2] - box[0], box[3] - box[1]
h = np.maximum(box[3] - box[1], 1)
if not isinstance(size, (tuple, list)): if not isinstance(size, (tuple, list)):
size = (size, size) size = (size, size)
polygons_norm = [] ratio_h = size[0] / max(h, 0.1)
for poly in polygons: ratio_w = size[1] / max(w, 0.1)
p = np.array(poly, dtype=np.float32) polygons = copy.deepcopy(polygons)
p[0::2] = (p[0::2] - box[0]) * size[1] / w for p in polygons:
p[1::2] = (p[1::2] - box[1]) * size[0] / h p[0::2] = p[0::2] - box[0]
polygons_norm.append(p) p[1::2] = p[1::2] - box[1]
rle = mask_tools.frPyObjects(polygons_norm, size[0], size[1]) if ratio_h == ratio_w:
mask = np.array(mask_tools.decode(rle)) for p in polygons:
mask = np.sum(mask, axis=2) p *= ratio_h
mask = np.array(mask > 0) else:
return mask for p in polygons:
p[0::2] *= ratio_w
p[1::2] *= ratio_h
rle_objs = mask_tools.frPyObjects(polygons, size[0], size[1])
rle_objs = [mask_tools.merge(rle_objs)]
return mask_tools.decode(rle_objs)[:, :, 0]
def mask_overlap(box1, box2, mask1, mask2): def mask_overlap(box1, box2, mask1, mask2):
...@@ -148,7 +155,7 @@ def project_masks( ...@@ -148,7 +155,7 @@ def project_masks(
w = np.maximum(w, 1) w = np.maximum(w, 1)
h = np.maximum(h, 1) h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h)) mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > thresh, 'uint8') mask = np.array(mask >= thresh, 'uint8')
x1 = max(ref_box[0], 0) x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0) y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, width) x2 = min(ref_box[2] + 1, width)
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!