Commit e3b9b641 by Ting PAN

Fix the bug of scaling flipped box

1 parent 9d12d142
......@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: faster_rcnn
BACKBONE: resnet50.fpn
TYPE: 'faster_rcnn'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
......@@ -21,24 +21,23 @@ MODEL:
'teddy bear', 'hair drier', 'toothbrush']
SOLVER:
BASE_LR: 0.02
LR_POLICY: steps_with_decay
DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn_R-50-FPN_800_1x
SNAPSHOT_PREFIX: 'coco_faster_rcnn_R-50-FPN_800_1x'
FRCNN:
BATCH_SIZE: 512
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k'
DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [800]
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: faster_rcnn
BACKBONE: resnet50.fpn
TYPE: 'faster_rcnn'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
......@@ -21,24 +21,23 @@ MODEL:
'teddy bear', 'hair drier', 'toothbrush']
SOLVER:
BASE_LR: 0.02
LR_POLICY: steps_with_decay
DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_faster_rcnn_R-50-FPN_800_2x
SNAPSHOT_PREFIX: 'coco_faster_rcnn_R-50-FPN_800_2x'
FRCNN:
BATCH_SIZE: 512
ROI_XFORM_RESOLUTION: 7
TRAIN:
WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k'
DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [800]
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: faster_rcnn
BACKBONE: resnet50.fpn
TYPE: 'faster_rcnn'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
......@@ -18,7 +18,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_faster_rcnn_R-50-FPN_640
SNAPSHOT_PREFIX: 'voc_faster_rcnn_R-50-FPN_640'
TRAIN:
WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/voc_0712_trainval'
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: mask_rcnn
BACKBONE: resnet50.fpn
TYPE: 'mask_rcnn'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
......@@ -24,7 +24,7 @@ SOLVER:
DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_mask_rcnn_R-50-FPN_800_1x
SNAPSHOT_PREFIX: 'coco_mask_rcnn_R-50-FPN_800_1x'
FRCNN:
BATCH_SIZE: 512
ROI_XFORM_RESOLUTION: 7
......@@ -32,14 +32,14 @@ MRCNN:
ROI_XFORM_RESOLUTION: 14
TRAIN:
WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k'
DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco'
SCALES: [800]
MAX_SIZE: 1333
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: mask_rcnn
BACKBONE: resnet50.fpn
TYPE: 'mask_rcnn'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
......@@ -24,7 +24,7 @@ SOLVER:
DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_mask_rcnn_R-50-FPN_800_2x
SNAPSHOT_PREFIX: 'coco_mask_rcnn_R-50-FPN_800_2x'
FRCNN:
BATCH_SIZE: 512
ROI_XFORM_RESOLUTION: 7
......@@ -32,14 +32,14 @@ MRCNN:
ROI_XFORM_RESOLUTION: 14
TRAIN:
WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k'
DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco'
SCALES: [800]
MAX_SIZE: 1333
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: retinanet
BACKBONE: resnet50.fpn
TYPE: 'retinanet'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
......@@ -24,22 +24,21 @@ FPN:
RPN_MAX_LEVEL: 7
SOLVER:
BASE_LR: 0.01
LR_POLICY: steps_with_decay
DECAY_STEPS: [90000, 120000]
MAX_STEPS: 135000
SNAPSHOT_EVERY: 2500
SNAPSHOT_PREFIX: coco_retinanet_R-50-FPN_416_6x
SNAPSHOT_PREFIX: 'coco_retinanet_R-50-FPN_416_6x'
PIPELINE:
TYPE: 'ssd'
TRAIN:
WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k'
DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 8
SCALES: [416]
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [416]
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: retinanet
BACKBONE: resnet50.fpn
TYPE: 'retinanet'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
......@@ -24,22 +24,21 @@ FPN:
RPN_MAX_LEVEL: 7
SOLVER:
BASE_LR: 0.01
LR_POLICY: steps_with_decay
DECAY_STEPS: [90000, 120000]
MAX_STEPS: 135000
SNAPSHOT_EVERY: 2500
SNAPSHOT_PREFIX: coco_retinanet_R-50-FPN_512_6x
SNAPSHOT_PREFIX: 'coco_retinanet_R-50-FPN_512_6x'
PIPELINE:
TYPE: 'ssd'
TRAIN:
WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k'
DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 8
SCALES: [512]
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [512]
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: retinanet
BACKBONE: resnet50.fpn
TYPE: 'retinanet'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
......@@ -24,21 +24,20 @@ FPN:
RPN_MAX_LEVEL: 7
SOLVER:
BASE_LR: 0.01
LR_POLICY: steps_with_decay
DECAY_STEPS: [60000, 80000]
MAX_STEPS: 90000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_R-50-FPN_800_1x
SNAPSHOT_PREFIX: 'coco_retinanet_R-50-FPN_800_1x'
TRAIN:
WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k'
DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [800]
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 8
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: retinanet
BACKBONE: resnet50.fpn
TYPE: 'retinanet'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'person', 'bicycle', 'car', 'motorcycle', 'airplane',
'bus', 'train', 'truck', 'boat', 'traffic light',
......@@ -24,21 +24,20 @@ FPN:
RPN_MAX_LEVEL: 7
SOLVER:
BASE_LR: 0.01
LR_POLICY: steps_with_decay
DECAY_STEPS: [120000, 160000]
MAX_STEPS: 180000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: coco_retinanet_R-50-FPN_800_2x
SNAPSHOT_PREFIX: 'coco_retinanet_R-50-FPN_800_2x'
TRAIN:
WEIGHTS: '/model/R-50.pkl'
DATASET: '/data/coco_2014_trainval35k'
DATASET: '/data/coco_2017_train'
IMS_PER_BATCH: 2
SCALES: [640, 672, 704, 736, 768, 800]
MAX_SIZE: 1333
USE_DIFF: False # Do not use crowd objects
TEST:
DATASET: '/data/coco_2014_minival'
JSON_FILE: '/data/instances_minival2014.json'
DATASET: '/data/coco_2017_val'
JSON_FILE: '/data/instances_val2017.json'
PROTOCOL: 'coco'
IMS_PER_BATCH: 1
SCALES: [800]
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: retinanet
BACKBONE: resnet50.fpn
TYPE: 'retinanet'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
......@@ -20,7 +20,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_R-50-FPN_416
SNAPSHOT_PREFIX: 'voc_retinanet_R-50-FPN_416'
PIPELINE:
TYPE: 'ssd'
TRAIN:
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 2
PIXEL_STDS: [57.375, 57.12, 58.395]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: retinanet
BACKBONE: resnet50.fpn
TYPE: 'retinanet'
BACKBONE: 'resnet50.fpn'
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
......@@ -20,7 +20,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_retinanet_R-50-FPN_512
SNAPSHOT_PREFIX: 'voc_retinanet_R-50-FPN_512'
PIPELINE:
TYPE: 'ssd'
TRAIN:
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 1
PIXEL_STDS: [1.0, 1.0, 1.0]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: ssd
BACKBONE: vgg16_reduced_300
TYPE: 'ssd'
BACKBONE: 'vgg16_reduced_300'
COARSEST_STRIDE: 0
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
......@@ -31,7 +31,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_VGG-16_300
SNAPSHOT_PREFIX: 'voc_ssd_VGG-16_300'
TRAIN:
WEIGHTS: '/model/VGG16.SSD.pkl'
DATASET: '/data/voc_0712_trainval'
......
......@@ -2,8 +2,8 @@ NUM_GPUS: 2
PIXEL_STDS: [1.0, 1.0, 1.0]
PIXEL_MEANS: [103.53, 116.28, 123.675]
MODEL:
TYPE: ssd
BACKBONE: vgg16_reduced_512
TYPE: 'ssd'
BACKBONE: 'vgg16_reduced_512'
CLASSES: ['__background__',
'aeroplane', 'bicycle', 'bird', 'boat',
'bottle', 'bus', 'car', 'cat', 'chair',
......@@ -32,7 +32,7 @@ SOLVER:
DECAY_STEPS: [80000, 100000]
MAX_STEPS: 120000
SNAPSHOT_EVERY: 5000
SNAPSHOT_PREFIX: voc_ssd_VGG-16_512
SNAPSHOT_PREFIX: 'voc_ssd_VGG-16_512'
TRAIN:
WEIGHTS: '/model/VGG16.SSD.pkl'
DATASET: '/data/voc_0712_trainval'
......
......@@ -18,7 +18,7 @@ import os
import shutil
from maker import make_record
from maskgen import make_mask, merge_mask
from roidb import make_database
if __name__ == '__main__':
......@@ -27,30 +27,25 @@ if __name__ == '__main__':
# Encode masks to RLE bytes
if not os.path.exists('build'):
os.makedirs('build')
make_mask('train', '2014', COCO_ROOT)
make_mask('valminusminival', '2014', COCO_ROOT)
make_mask('minival', '2014', COCO_ROOT)
merge_mask('trainval35k', '2014', ['build/coco_2014_train_mask.pkl',
'build/coco_2014_valminusminival_mask.pkl'])
make_database('train', '2017', COCO_ROOT)
make_database('val', '2017', COCO_ROOT)
# coco_2014_trainval35k
# coco_2017_train
make_record(
record_file=os.path.join(COCO_ROOT, 'coco_2014_trainval35k'),
images_path=[os.path.join(COCO_ROOT, 'images/train2014'),
os.path.join(COCO_ROOT, 'images/val2014')],
splits_path=[os.path.join(COCO_ROOT, 'splits'),
os.path.join(COCO_ROOT, 'splits')],
mask_file='build/coco_2014_trainval35k_mask.pkl',
splits=['train', 'valminusminival'],
db_file='build/coco_2017_train.db.pkl',
record_file=os.path.join(COCO_ROOT, 'coco_2017_train'),
images_path=[os.path.join(COCO_ROOT, 'images/train2017')],
splits_path=[os.path.join(COCO_ROOT, 'splits')],
splits=['train2017'],
)
# coco_2014_minival
# coco_2017_val
make_record(
record_file=os.path.join(COCO_ROOT, 'coco_2014_minival'),
images_path=os.path.join(COCO_ROOT, 'images/val2014'),
mask_file='build/coco_2014_minival_mask.pkl',
splits_path=os.path.join(COCO_ROOT, 'splits'),
splits=['minival'],
db_file='build/coco_2017_val.db.pkl',
record_file=os.path.join(COCO_ROOT, 'coco_2017_val'),
images_path=[os.path.join(COCO_ROOT, 'images/val2017')],
splits_path=[os.path.join(COCO_ROOT, 'splits')],
splits=['val2017'],
)
shutil.rmtree('build')
......@@ -18,7 +18,7 @@ import dragon
import numpy as np
def make_example(image_file, mask_objects, im_scale=None):
def make_example(image_file, objects, im_scale=None):
filename = os.path.split(image_file)[-1]
example = {'id': filename.split('.')[0], 'object': []}
......@@ -39,7 +39,7 @@ def make_example(image_file, mask_objects, im_scale=None):
example['height'], example['width'], example['depth'] = img.shape
example['content'] = img_bytes
for ix, obj in enumerate(mask_objects):
for obj in objects:
x1, y1, x2, y2 = obj['bbox']
example['object'].append({
'name': obj['name'],
......@@ -58,7 +58,7 @@ def make_example(image_file, mask_objects, im_scale=None):
def make_record(
record_file,
images_path,
mask_file,
db_file,
splits_path,
splits,
ext='.jpg',
......@@ -75,11 +75,11 @@ def make_record(
assert len(splits) == len(splits_path)
assert len(splits) == len(images_path)
if mask_file is not None:
with open(mask_file, 'rb') as f:
all_masks = pickle.load(f)
if db_file is not None:
with open(db_file, 'rb') as f:
all_entries = pickle.load(f)
else:
all_masks = {}
all_entries = {}
print('Start Time:', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))
......@@ -133,8 +133,8 @@ def make_record(
count, total_line, now_time - start_time))
filename = line.strip()
image_file = os.path.join(images_path[db_idx], filename + ext)
mask_objects = all_masks[filename] if filename in all_masks else {}
writer.write(make_example(image_file, mask_objects, im_scale))
objects = all_entries[filename] if filename in all_entries else {}
writer.write(make_example(image_file, objects, im_scale))
now_time = time.time()
print('{} / {} in {:.2f} sec'.format(count, total_line, now_time - start_time))
......
......@@ -75,10 +75,12 @@ class COCOWrapper(object):
"""Construct an image path from the image's "index" identifier."""
# Example image path for index=119993:
# images/train2014/COCO_train2014_000000119993.jpg
file_name = ('COCO_' + self._data_name + '_' +
str(index).zfill(12) + '.jpg')
# images/train2017/000000119993.jpg
filename = str(index).zfill(12) + '.jpg'
if '2014' in self._data_name:
filename = 'COCO_{}_{}'.format(self._data_name, filename)
image_path = osp.join(self._data_path, 'images',
self._data_name, file_name)
self._data_name, filename)
assert osp.exists(image_path), \
'Path does not exist: {}'.format(image_path)
return image_path
......@@ -99,19 +101,18 @@ class COCOWrapper(object):
objects = self._COCO.loadAnns(ann_ids)
# Sanitize boxes -- some are invalid
valid_objects = []
mask, polygons = b'', []
for obj in objects:
x1 = float(max(0, obj['bbox'][0]))
y1 = float(max(0, obj['bbox'][1]))
x2 = float(min(width - 1, x1 + max(0, obj['bbox'][2] - 1)))
y2 = float(min(height - 1, y1 + max(0, obj['bbox'][3] - 1)))
mask, polygons = b'', []
if isinstance(obj['segmentation'], list):
for p in obj['segmentation']:
if len(p) < 6:
print('Remove Invalid segm.')
# Valid polygons have >= 3 points, so require >= 6 coordinates
polygons = [p for p in obj['segmentation'] if len(p) >= 6]
# mask_bytes = mask_utils.poly2bytes(poly, height, width)
else:
# Crowd masks
# Some are encoded with height or width
......@@ -141,25 +142,26 @@ class COCOWrapper(object):
return len(self._classes)
def make_mask(split, year, data_dir):
def make_database(split, year, data_dir):
coco = COCOWrapper(split, year, data_dir)
print('Preparing to make split: {}, total {} images'
.format(split, coco.num_images))
if not osp.exists(osp.join(coco._data_path, 'splits')):
os.makedirs(osp.join(coco._data_path, 'splits'))
gt_recs = collections.OrderedDict()
entries = collections.OrderedDict()
for i in range(coco.num_images):
filename = osp.basename(coco.image_path_at(i)).split('.')[0]
h, w, objects = coco.annotation_at(i)
gt_recs[filename] = objects
entries[filename] = objects
with open(osp.join('build',
'coco_' + year +
'_' + split + '_mask.pkl'), 'wb') as f:
pickle.dump(gt_recs, f, pickle.HIGHEST_PROTOCOL)
'coco_' + year + '_' + split +
'.db.pkl'), 'wb') as f:
pickle.dump(entries, f, pickle.HIGHEST_PROTOCOL)
with open(osp.join(coco._data_path, 'splits', split + '.txt'), 'w') as f:
with open(osp.join(coco._data_path, 'splits',
split + year + '.txt'), 'w') as f:
for i in range(coco.num_images):
filename = str(osp.basename(coco.image_path_at(i)).split('.')[0])
if i != coco.num_images - 1:
......@@ -167,16 +169,16 @@ def make_mask(split, year, data_dir):
f.write(filename)
def merge_mask(split, year, mask_files):
gt_recs = collections.OrderedDict()
data_path = os.path.dirname(mask_files[0])
def merge_database(split, year, db_files):
entries = collections.OrderedDict()
data_path = os.path.dirname(db_files[0])
for mask_file in mask_files:
with open(mask_file, 'rb') as f:
recs = pickle.load(f)
gt_recs.update(recs)
for db_file in db_files:
with open(db_file, 'rb') as f:
entries = pickle.load(f)
entries.update(entries)
with open(osp.join(data_path,
'coco_' + year +
'_' + split + '_mask.pkl'), 'wb') as f:
pickle.dump(gt_recs, f, pickle.HIGHEST_PROTOCOL)
'coco_' + year + '_' + split +
'.db.pkl'), 'wb') as f:
pickle.dump(entries, f, pickle.HIGHEST_PROTOCOL)
......@@ -27,6 +27,8 @@ from seetadet.utils import image as image_util
class DataTransformer(multiprocessing.Process):
"""DataTransformer."""
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._scales = cfg.TRAIN.SCALES
......@@ -43,7 +45,7 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q_out = None
self.daemon = True
def get_boxes(self, example, im_scale):
def get_boxes(self, example, im_scale, flipped):
objects, num_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff:
......@@ -56,7 +58,7 @@ class DataTransformer(multiprocessing.Process):
boxes = np.zeros((num_objects, 4), 'float32')
gt_classes = np.zeros((num_objects,), 'float32')
# Filter the difficult instances
# Filter the difficult instances.
object_idx = 0
for obj in objects:
if not self._use_diff and obj.get('difficult', 0) > 0:
......@@ -69,10 +71,14 @@ class DataTransformer(multiprocessing.Process):
gt_classes[object_idx] = self._class_to_ind[obj['name']]
object_idx += 1
# Scale the boxes to the detecting scale
# Flip the boxes if necessary.
if flipped:
boxes = box_util.flip_boxes(boxes, width)
# Scale the boxes to the detecting scale.
boxes *= im_scale
# Attach the classes
# Attach the classes.
gt_boxes = np.empty((num_objects, 5), dtype=np.float32)
gt_boxes[:, :4], gt_boxes[:, 4] = boxes, gt_classes
......@@ -81,7 +87,7 @@ class DataTransformer(multiprocessing.Process):
def get(self, example):
example = Example(example)
# Resize
# Resize.
img, im_scale = image_util.resize_image_with_target_size(
example.image,
target_size=npr.choice(self._scales),
......@@ -89,22 +95,18 @@ class DataTransformer(multiprocessing.Process):
random_scales=self._random_scales,
)
# Flip
# Flip.
flipped = False
if self._use_flipped and npr.randint(2) > 0:
img = img[:, ::-1]
flipped = True
# Distort
# Distort.
if self._use_distort:
img = image_util.distort_image(img)
# Boxes
boxes = self.get_boxes(example, im_scale)
# Flip the boxes if necessary
if flipped:
boxes = box_util.flip_boxes(boxes, img.shape[1])
# Boxes.
boxes = self.get_boxes(example, im_scale, flipped)
# Standard outputs.
outputs = {'image': img,
......
......@@ -28,6 +28,8 @@ from seetadet.utils import image as image_util
class DataTransformer(multiprocessing.Process):
"""DataTransformer."""
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._scales = cfg.TRAIN.SCALES
......@@ -81,6 +83,10 @@ class DataTransformer(multiprocessing.Process):
gt_classes[object_idx] = self._class_to_ind[obj['name']]
object_idx += 1
# Flip the boxes if necessary.
if flipped:
boxes = box_util.flip_boxes(boxes, width)
# Scale the boxes to the detecting scale.
boxes *= im_scale
......@@ -115,10 +121,6 @@ class DataTransformer(multiprocessing.Process):
# Boxes and segmentations.
boxes, segms = self.get_boxes_and_segms(example, im_scale, flipped)
# Flip the boxes if necessary.
if flipped:
boxes = box_util.flip_boxes(boxes, img.shape[1])
# Standard outputs.
outputs = {'image': img,
'boxes': boxes,
......
......@@ -124,37 +124,37 @@ class ProposalTarget(object):
def compute_targets(
ex_rois,
gt_rois,
rois,
gt_boxes,
gt_labels,
gt_segms,
mask_flags,
fg_segms,
fg_segms_flag,
mask_size,
im_scale,
):
"""Compute the bounding-box regression targets."""
assert ex_rois.shape[0] == gt_rois.shape[0]
assert ex_rois.shape[1] == 4
assert gt_rois.shape[1] == 4
assert rois.shape[0] == gt_boxes.shape[0]
assert rois.shape[1] == 4
assert gt_boxes.shape[1] == 4
# Compute bbox regression targets
fg_inds = np.where(gt_labels > 0)[0]
bbox_targets = box_util.bbox_transform(
ex_rois, gt_rois, cfg.BBOX_REG_WEIGHTS)
bbox_targets = box_util.bbox_transform(rois, gt_boxes, cfg.BBOX_REG_WEIGHTS)
# Compute mask classification targets
mask_shape = [mask_size] * 2
ex_rois_ori = np.round(ex_rois / im_scale).astype(int)
mask_targets = -np.ones([len(gt_labels)] + mask_shape, 'float32')
for i in fg_inds:
if mask_flags[i] > 0:
if isinstance(gt_segms[i], list):
ret = mask_util.warp_mask_via_polygons(
gt_segms[i], ex_rois_ori[i], mask_shape)
mask_targets = -np.ones([len(rois)] + mask_shape, 'float32')
rois_ori = rois / im_scale
rois_ori_int = np.round(rois_ori).astype(int)
gt_boxes_ori_int = np.round(gt_boxes / im_scale).astype(int)
for i, fg_idx in enumerate(fg_inds):
if fg_segms_flag[i] > 0:
if isinstance(fg_segms[i], list):
target = mask_util.warp_mask_via_polygons(
fg_segms[i], rois_ori[i], mask_shape)
else:
gt_rois_ori = np.round(gt_rois / im_scale).astype(int)
ret = mask_util.warp_mask_via_intersection(
gt_segms[i], ex_rois_ori[i], gt_rois_ori[i], mask_shape)
if ret is not None:
mask_targets[i] = ret.astype('float32')
target = mask_util.warp_mask_via_intersection(
fg_segms[i], rois_ori_int[i], gt_boxes_ori_int[i], mask_shape)
if target is not None:
mask_targets[fg_idx] = target.astype(mask_targets.dtype)
return bbox_targets, mask_targets
......
......@@ -27,6 +27,8 @@ from seetadet.utils import boxes as box_util
class DataTransformer(multiprocessing.Process):
"""DataTransformer."""
def __init__(self, **kwargs):
super(DataTransformer, self).__init__()
self._scale = cfg.TRAIN.SCALES[0]
......@@ -44,7 +46,7 @@ class DataTransformer(multiprocessing.Process):
self.q_in = self.q_out = None
self.daemon = True
def get_boxes(self, example):
def get_boxes(self, example, flipped):
objects, num_objects = example.objects, 0
height, width = example.height, example.width
if not self._use_diff:
......@@ -70,6 +72,10 @@ class DataTransformer(multiprocessing.Process):
gt_classes[object_idx] = self._class_to_ind[obj['name']]
object_idx += 1
# Flip the boxes if necessary.
if flipped:
boxes = box_util.flip_boxes(boxes, width)
# Normalize.
boxes[:, 0::2] /= width
boxes[:, 1::2] /= height
......@@ -82,25 +88,31 @@ class DataTransformer(multiprocessing.Process):
def get(self, example):
example = Example(example)
img = example.image
# Flip.
flipped = False
if self._use_flipped and npr.randint(2) > 0:
img = img[:, ::-1]
flipped = True
# Boxes.
boxes = self.get_boxes(example)
boxes = self.get_boxes(example, flipped)
# Return to avoid the invalid transforms.
if len(boxes) == 0:
return {'boxes': boxes}
# Distort => Expand => Sample => Resize
img, boxes = self._apply_transform(example.image, boxes)
img, boxes = self._apply_transform(img, boxes)
# Restore to the blob scale.
boxes[:, :4] *= self._scale
# Flip.
if self._use_flipped and npr.randint(2) > 0:
img = img[:, ::-1]
boxes = box_util.flip_boxes(boxes, img.shape[1])
# Standard outputs.
outputs = {'image': img, 'boxes': boxes, 'im_info': img.shape[:2]}
outputs = {'image': img,
'boxes': boxes,
'im_info': img.shape[:2]}
# Attach precomputed targets.
if len(boxes) > 0:
......
......@@ -333,10 +333,12 @@ __C.FRCNN.NEGATIVE_OVERLAP_HI = 0.5
__C.FRCNN.NEGATIVE_OVERLAP_LO = 0.0
# RoI transform function
# Values supported: 'RoIAlign', 'RoIAlign'
# Values supported: 'RoIAlign', 'RoIPool'
__C.FRCNN.ROI_XFORM_METHOD = 'RoIAlign'
# RoI transform output resolution
__C.FRCNN.ROI_XFORM_RESOLUTION = 7
# Resampling window size for RoI transformation
__C.FRCNN.ROI_XFORM_SAMPLING_RATIO = 0
......@@ -362,10 +364,12 @@ __C.MRCNN = AttrDict()
__C.MRCNN.RESOLUTION = 28
# RoI transform function
# Values supported: 'RoIAlign', 'RoIAlign'
# Values supported: 'RoIAlign', 'RoIPool'
__C.MRCNN.ROI_XFORM_METHOD = 'RoIAlign'
# RoI transform output resolution
__C.MRCNN.ROI_XFORM_RESOLUTION = 14
# Resampling window size for RoI transformation
__C.MRCNN.ROI_XFORM_SAMPLING_RATIO = 0
......@@ -438,6 +442,7 @@ __C.SOLVER.DISPLAY = 20
# The interval to snapshot a model
__C.SOLVER.SNAPSHOT_EVERY = 5000
# Prefix to yield the path: <prefix>_iter_XYZ.pkl
__C.SOLVER.SNAPSHOT_PREFIX = ''
......@@ -451,25 +456,34 @@ __C.SOLVER.MAX_STEPS = 40000
# Base learning rate for the specified schedule
__C.SOLVER.BASE_LR = 0.001
# The uniform interval for LRScheduler
__C.SOLVER.DECAY_STEP = 1
# The custom intervals for LRScheduler
__C.SOLVER.DECAY_STEPS = []
# The decay factor for exponential LRScheduler
__C.SOLVER.DECAY_GAMMA = 0.1
# Warm up to ``BASE_LR`` over this number of steps
__C.SOLVER.WARM_UP_STEPS = 500
# Start the warm up from ``BASE_LR`` * ``FACTOR``
__C.SOLVER.WARM_UP_FACTOR = 0.333
# The type of LRScheduler
__C.SOLVER.LR_POLICY = 'steps_with_decay'
# Momentum to use with SGD
__C.SOLVER.MOMENTUM = 0.9
# L2 regularization for weight parameters
__C.SOLVER.WEIGHT_DECAY = 0.0001
# L2 regularization for legacy bias parameters
__C.SOLVER.WEIGHT_DECAY_BIAS = 0.0
# L2 norm factor for clipping gradients
__C.SOLVER.CLIP_NORM = 0.0
......
......@@ -14,6 +14,8 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import cv2
import numpy as np
import PIL.Image
......@@ -37,32 +39,37 @@ def warp_mask_via_intersection(mask, box1, box2, size):
inter_mask = mask[y1:y2 + 1, x1:x2 + 1]
target_h = box1[3] - box1[1] + 1
target_w = box1[2] - box1[0] + 1
warped_mask = np.zeros((target_h, target_w), dtype=mask.dtype)
warped_mask = np.zeros((target_h, target_w), dtype='uint8')
warped_mask[ex_start_y:ex_start_y + h,
ex_start_x:ex_start_x + w] = inter_mask
if not isinstance(size, (tuple, list)):
size = (size, size)
mask = PIL.Image.fromarray(warped_mask)
return np.array(mask.resize((size[1], size[0]), PIL.Image.NEAREST))
mask = mask.resize((size[1], size[0]), PIL.Image.NEAREST)
return np.array(mask)
def warp_mask_via_polygons(polygons, box, size):
"""Warp mask via polygons."""
w = np.maximum(box[2] - box[0], 1)
h = np.maximum(box[3] - box[1], 1)
w, h = box[2] - box[0], box[3] - box[1]
if not isinstance(size, (tuple, list)):
size = (size, size)
polygons_norm = []
for poly in polygons:
p = np.array(poly, dtype=np.float32)
p[0::2] = (p[0::2] - box[0]) * size[1] / w
p[1::2] = (p[1::2] - box[1]) * size[0] / h
polygons_norm.append(p)
rle = mask_tools.frPyObjects(polygons_norm, size[0], size[1])
mask = np.array(mask_tools.decode(rle))
mask = np.sum(mask, axis=2)
mask = np.array(mask > 0)
return mask
ratio_h = size[0] / max(h, 0.1)
ratio_w = size[1] / max(w, 0.1)
polygons = copy.deepcopy(polygons)
for p in polygons:
p[0::2] = p[0::2] - box[0]
p[1::2] = p[1::2] - box[1]
if ratio_h == ratio_w:
for p in polygons:
p *= ratio_h
else:
for p in polygons:
p[0::2] *= ratio_w
p[1::2] *= ratio_h
rle_objs = mask_tools.frPyObjects(polygons, size[0], size[1])
rle_objs = [mask_tools.merge(rle_objs)]
return mask_tools.decode(rle_objs)[:, :, 0]
def mask_overlap(box1, box2, mask1, mask2):
......@@ -148,7 +155,7 @@ def project_masks(
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > thresh, 'uint8')
mask = np.array(mask >= thresh, 'uint8')
x1 = max(ref_box[0], 0)
y1 = max(ref_box[1], 0)
x2 = min(ref_box[2] + 1, width)
......
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!