-
Notifications
You must be signed in to change notification settings - Fork 33
/
jpg2nii_data_convert.py
113 lines (100 loc) · 5.12 KB
/
jpg2nii_data_convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#processed cases accoring to case_id_list, and save a csv file, with image path and image caption
import os
import cv2
import csv
import json
import subprocess
import pandas as pd
import numpy as np
import SimpleITK as sitk
from tqdm import tqdm
from collections import defaultdict
def get_image(single_image_dir,single_image_filenames):
# single_image_filenames
single_image_filenames.sort(key=lambda x: int(x.split('.')[0]))
image_list = []
for image_filename in single_image_filenames:
image_file = os.path.join(single_image_dir, image_filename)
#read jpeg to 2D array
image_array = cv2.imread(image_file,0)
if image_array is not None:
image_size = image_array.shape
image_array = cv2.resize(image_array,(512,512),interpolation = cv2.INTER_LINEAR)
image_list.append(image_array)
else:
pass
image_array = np.array(image_list) #c,w,h
if len(image_array.shape) == 3:
if image_array.shape[0] < image_array.shape[1]:
image_array = image_array.transpose((1, 2, 0))
# image_array = np.transpose(image_array, (2,0,1)) # w,h,c
return image_array
gray_list = ['CT','MRI','X-ray','Ultrasound','Mammography']
def convert_case(case_id,image_root_dir,json_root_dir,save_case_dict,save_root_dir=None):
# save_image_dir
case_images_dir = os.path.join(image_root_dir, case_id)
case_json_path = os.path.join(json_root_dir, case_id+'.json')
with open(case_json_path, 'r') as f:
data = json.load(f)
image_nums = (len(data.keys())-1)//2
for image_num in range(1,image_nums+1):
case_dict = defaultdict(list)
image_dir = os.path.join(case_images_dir, str(image_num)) #./images/1/1
image_caption = data[str(image_num) + '详情']
image_modality = data[str(image_num)][0]['modality']
single_image_names = os.listdir(image_dir)
single_image_names.sort(key=lambda x: int(x.split('_')[1]))
save_image_series = []
for single_image_name in single_image_names:
single_image_dir = os.path.join(image_dir, single_image_name)
save_npy_dir = os.path.join(save_root_dir,str(case_id),str(image_num))
single_image_filenames = os.listdir(single_image_dir)
if len(os.listdir(single_image_dir)) == 1:
# 2D image
image_file = os.path.join(single_image_dir, single_image_filenames[0])
save_image_array = cv2.imread(image_file) # w,h,c
else:
save_image_array = get_image(single_image_dir,single_image_filenames)
if not os.path.exists(save_npy_dir):
os.makedirs(save_npy_dir)
# print(save_image_array.shape)
if save_image_array is not None:
if len(save_image_array.shape) <= 5 and len(save_image_array.shape) >=2:
save_nii_path = os.path.join(save_npy_dir,single_image_name+'.nii.gz')
out = sitk.GetImageFromArray(save_image_array)
sitk.WriteImage(out, save_nii_path)
save_image_series.append(save_nii_path)
else:
save_npy_path = os.path.join(save_npy_dir,single_image_name+'.npy')
np.save(save_npy_path,save_image_array)
save_image_series.append(save_npy_path)
case_dict['image'] = save_image_series
case_dict['image_caption'] = image_caption
case_dict['image_modality'] = image_modality
save_case_dict.append(case_dict)
if __name__ == "__main__":
# case_id,image_root_dir,json_root_dir
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--index', default=0, type=int)
parser.add_argument('--add_index', default=0, type=int)
parser.add_argument('--start_index', default=1, type=int)
parser.add_argument('--end_index', default=1000, type=int)
args = parser.parse_args()
image_root_dir = '/mnt/petrelfs/share_data/zhangxiaoman/DATA/Radio_VQA/processed_file/images'
json_root_dir = '/mnt/petrelfs/share_data/zhangxiaoman/DATA/Radio_VQA/processed_file/jsons'
save_root_dir = '/mnt/petrelfs/share_data/zhangxiaoman/DATA/Radio_VQA/processed_file/npys'
save_case_dict = []
args.start_index = args.index*1000+1 + args.add_index
args.end_index = (args.index+1)*1000+1
for case_id in tqdm(range(args.start_index,args.end_index)):
case_id = str(case_id)
convert_case(case_id,image_root_dir,json_root_dir,save_case_dict,save_root_dir)
# CT_0 (200, 630, 630, 3)
# save to csv
save_json_file = '/mnt/petrelfs/share_data/zhangxiaoman/DATA/Radio_VQA/processed_file/processed_jsons/processed_json_'+str(args.index)+'.json'
with open(save_json_file, 'w', encoding='utf-8') as f:
json.dump(save_case_dict, f, ensure_ascii=False,indent=4)
# B, S, T, W, H, Z
# srun --partition=medai --mpi=pmi2 --quotatype=auto --gres=gpu:0 -n1 --ntasks-per-node=1 python data_convert.py --index 2 --add_index 24
# cd /mnt/petrelfs/share_data/zhangxiaoman/DATA/Radio_VQA/jpeg2npy