-
Notifications
You must be signed in to change notification settings - Fork 0
/
split-data-into-train-test.py
33 lines (30 loc) · 1.13 KB
/
split-data-into-train-test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import xml.etree.ElementTree as ET
import os
import cv2
import argparse
import shutil
import random
parser = argparse.ArgumentParser()
parser.add_argument('--file_name', help = 'give name of the dataset file', required = True)
parser.add_argument('--train_name', help = 'give name of the train file', required = True)
parser.add_argument('--test_name', help = 'give name of the test file', required = True)
def split(file_name, train_name, test_name):
train = open(train_name, 'w')
test = open(test_name, 'w')
with open(file_name, 'r') as f:
read_data = f.read()
items = read_data.split('\n')
random.shuffle(items)
div = int(len(items) * 0.8)
train_file = items[:div]
test_file = items[div:]
for i in train_file:
train.write(f'{i}\n')
for i in test_file:
test.write(f'{i}\n')
print(len(items), div, len(train_file), len(test_file))
if __name__ == "__main__":
#Converts PASCAL-VOC to YOLO format
args = parser.parse_args()
print(args.file_name, args.train_name, args.test_name)
split(args.file_name, args.train_name, args.test_name)