-
Notifications
You must be signed in to change notification settings - Fork 82
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit dfb0eca
Showing
8 changed files
with
945 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# for airbus_rle_to_coco | ||
|
||
# Data files and directories common in repo root | ||
datasets/ | ||
dataset/ | ||
logs/ | ||
*.h5 | ||
results/ | ||
temp/ | ||
tmp/ | ||
test/ | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# Distribution / packaging | ||
.Python | ||
env/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# VS Studio Code | ||
.vscode | ||
|
||
# PyCharm | ||
.idea/ | ||
|
||
# Dropbox | ||
.dropbox.attr | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# dotenv | ||
.env | ||
|
||
# virtualenv | ||
.venv | ||
venv/ | ||
ENV/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import os | ||
import pandas as pd | ||
import numpy as np | ||
from PIL import Image | ||
|
||
dataset_train = '../datasets/ships_train2018' | ||
csv_train = '../datasets/train_ship_segmentations_v2.csv' | ||
|
||
if __name__ == '__main__': | ||
# read_csv_file | ||
df = pd.read_csv(csv_train) | ||
print("Dataframe lines : ",df.shape[0]) | ||
|
||
# delete annotations without ship | ||
df = df.dropna(axis=0) | ||
num_of_ships = df.shape[0] | ||
print("Inastances : ",num_of_ships) | ||
|
||
# create an empty set to store images with ship | ||
images = set() | ||
for line in range(num_of_ships): | ||
if df.iloc[line,0] not in images: | ||
images.add(df.iloc[line,0]) | ||
print("Images with ship: ",len(images)) | ||
|
||
# Delete images without ship | ||
count = 0 | ||
ims = os.listdir(dataset_train) | ||
for im in ims: | ||
if im not in images: | ||
os.remove(os.path.join(im_path, im)) | ||
count += 1 | ||
print('%d images is deleted.'%(count)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import numpy as np | ||
import pandas as pd | ||
from skimage.data import imread | ||
import matplotlib.pyplot as plt | ||
import os | ||
np.set_printoptions(threshold=np.inf) # print all numpy ndarray | ||
|
||
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode | ||
# mask_rle(string) --> rle_decode() --> np.ndarry(np.unit8) | ||
# shape: (height,width) , 1 - mask, 0 - background | ||
def rle_decode(mask_rle, shape=(768, 768)): | ||
s = mask_rle.split() | ||
starts = np.asarray(s[0::2], dtype=int) | ||
lengths = np.asarray(s[1::2], dtype=int) | ||
|
||
starts -= 1 | ||
ends = starts + lengths | ||
img = np.zeros(shape[0]*shape[1], dtype=np.uint8) | ||
for lo, hi in zip(starts, ends): | ||
img[lo:hi] = 1 | ||
return img.reshape(shape).T # Needed to align to RLE direction | ||
|
||
def csv_show_rle(ImageId, dataset_dir, df): | ||
img = imread(os.path.join(dataset_dir, ImageId)) | ||
rle_masks = df.loc[df['ImageId'] == ImageId, 'EncodedPixels'].tolist() | ||
|
||
# Take the individual ship masks and create a single mask array for all ships | ||
all_masks = np.zeros((768, 768)) | ||
for mask in rle_masks: | ||
all_masks += rle_decode(mask) | ||
|
||
fig, axarr = plt.subplots(1, 3) | ||
axarr[0].axis('off'), | ||
axarr[1].axis('off'), | ||
axarr[2].axis('off') | ||
axarr[0].imshow(img), | ||
axarr[1].imshow(all_masks), | ||
axarr[2].imshow(img) | ||
axarr[2].imshow(all_masks, alpha=0.4) | ||
plt.tight_layout(h_pad=0.1, w_pad=0.1) | ||
# plt.savefig( os.path.join(ROOT_DIR, '../tmp', 'tmp.png') ) | ||
plt.show() | ||
|
||
if __name__ == "__main__": | ||
dataset_train = '../datasets/ships_train2018' | ||
dataset_test = '../datasets/ships_test2018' | ||
csv_train = '../datasets/train_ship_segmentations_v2.csv' | ||
csv_test = '../submit/rle_submit.csv' | ||
|
||
df = pd.read_csv(csv_train) | ||
ImageId = '0ba29cbcf.jpg' | ||
csv_show_rle(ImageId, dataset_train, df) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
import datetime | ||
import json | ||
import os | ||
import re | ||
import fnmatch | ||
from PIL import Image | ||
import numpy as np | ||
from pycococreatortools import pycococreatortools | ||
import pandas as pd | ||
|
||
from skimage.data import imread | ||
import matplotlib.pyplot as plt | ||
|
||
dataset_train = '../datasets/ships_train2018' | ||
csv_train = '../datasets/train_ship_segmentations_v2.csv' | ||
IMAGE_DIR = dataset_train | ||
|
||
df = pd.read_csv(csv_train ) # read csv file | ||
|
||
INFO = { | ||
"description": "Kaggle Dataset", | ||
"url": "/~https://github.com/pascal1129", | ||
"version": "0.1.0", | ||
"year": 2018, | ||
"contributor": "pascal1129", | ||
"date_created": datetime.datetime.utcnow().isoformat(' ') | ||
} | ||
|
||
LICENSES = [ | ||
{ | ||
"id": 1, | ||
"name": "Attribution-NonCommercial-ShareAlike License", | ||
"url": "http://creativecommons.org/licenses/by-nc-sa/2.0/" | ||
} | ||
] | ||
|
||
CATEGORIES = [ | ||
{ | ||
'id': 1, | ||
'name': 'ship', | ||
'supercategory': 'ship', | ||
}, | ||
] | ||
|
||
# ref: https://www.kaggle.com/paulorzp/run-length-encode-and-decode | ||
# mask_rle(string) --> rle_decode() --> np.ndarry(np.unit8) | ||
# shape: (height,width) , 1 - mask, 0 - background | ||
def rle_decode(mask_rle, shape=(768, 768)): | ||
s = mask_rle.split() | ||
starts = np.asarray(s[0::2], dtype=int) | ||
lengths = np.asarray(s[1::2], dtype=int) | ||
|
||
starts -= 1 | ||
ends = starts + lengths | ||
img = np.zeros(shape[0]*shape[1], dtype=np.uint8) | ||
for lo, hi in zip(starts, ends): | ||
img[lo:hi] = 1 | ||
return img.reshape(shape).T # Needed to align to RLE direction | ||
|
||
|
||
def filter_for_jpeg(root, files): | ||
file_types = ['*.jpeg', '*.jpg'] | ||
file_types = r'|'.join([fnmatch.translate(x) for x in file_types]) | ||
files = [os.path.join(root, f) for f in files] | ||
files = [f for f in files if re.match(file_types, f)] | ||
|
||
return files | ||
|
||
def save_bad_ann(image_name, mask, segmentation_id): | ||
img = imread(os.path.join(IMAGE_DIR, image_name)) | ||
fig, axarr = plt.subplots(1, 3) | ||
axarr[0].axis('off') | ||
axarr[1].axis('off') | ||
axarr[2].axis('off') | ||
axarr[0].imshow(img) | ||
axarr[1].imshow(mask) | ||
axarr[2].imshow(img) | ||
axarr[2].imshow(mask, alpha=0.4) | ||
plt.tight_layout(h_pad=0.1, w_pad=0.1) | ||
if not os.path.exists('tmp'): | ||
os.makedirs('tmp') | ||
plt.savefig( os.path.join('./tmp', image_name.split('.')[0] +'_' +str(segmentation_id) +'.png') ) | ||
plt.close() | ||
|
||
def main(): | ||
# 最终放进json文件里的字典 | ||
coco_output = { | ||
"info": INFO, | ||
"licenses": LICENSES, | ||
"categories": CATEGORIES, | ||
"images": [], # 放一个空列表占位置,后面再append | ||
"annotations": [] | ||
} | ||
|
||
image_id = 1 | ||
segmentation_id = 1 | ||
|
||
# 最外层的循环是图片,因为图片的基本信息需要共享 | ||
# IMAGE_DIR路径下找到所有的图片 | ||
for root, _, files in os.walk(IMAGE_DIR): | ||
image_paths = filter_for_jpeg(root, files) # 图片文件地址 | ||
num_of_image_files = len(image_paths) # 图片个数 | ||
|
||
# 遍历每一张图片 | ||
for image_path in image_paths: | ||
# 提取图片信息 | ||
image = Image.open(image_path) | ||
image_name = os.path.basename(image_path) # 不需要具体的路径,只要图片文件名 | ||
image_info = pycococreatortools.create_image_info( | ||
image_id, image_name, image.size) | ||
coco_output["images"].append(image_info) | ||
|
||
# 内层循环是mask,把每一张图片的mask搜索出来 | ||
rle_masks = df.loc[df['ImageId'] == image_name, 'EncodedPixels'].tolist() | ||
num_of_rle_masks = len(rle_masks) | ||
|
||
for index in range(num_of_rle_masks): | ||
binary_mask = rle_decode(rle_masks[index]) | ||
class_id = 1 # 所有图片的类别都是1,ship | ||
category_info = {'id': class_id, 'is_crowd': 0} | ||
annotation_info = pycococreatortools.create_annotation_info( | ||
segmentation_id, image_id, category_info, binary_mask, | ||
image.size, tolerance=2) | ||
|
||
# 不是所有的标注都会被转换,低质量标注会被过滤掉 | ||
# 正常的标注加入数据集,不好的标注保存供观察 | ||
if annotation_info is not None: | ||
coco_output["annotations"].append(annotation_info) | ||
else: | ||
save_bad_ann(image_name, binary_mask, segmentation_id) | ||
|
||
# 无论标注是否被写入数据集,均分配一个编号 | ||
segmentation_id = segmentation_id + 1 | ||
|
||
print("%d of %d is done."%(image_id,num_of_image_files)) | ||
image_id = image_id + 1 | ||
|
||
with open('{}/annotations/instances_ships_train2018.json'.format(ROOT_DIR), 'w') as output_json_file: | ||
# json.dump(coco_output, output_json_file) | ||
json.dump(coco_output, output_json_file,indent=4) | ||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.