add script to seperate dataset based on cells

This commit is contained in:
Carl Philipp Klemm 2024-10-28 16:20:41 +01:00
parent 96b143dc24
commit 72b4b21c99

View File

@ -0,0 +1,37 @@
#!/bin/python
import tarfile
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser("KissSplitDataset")
parser.add_argument('--data', '-d', required=True, help="Data input tar file")
parser.add_argument('--out', '-o', required=True, help="output file")
parser.add_argument('--extract_cells', '-e', required=True, help="The cells to extract from the input")
parser.add_argument('--create_inverse', '-i', action="store_true", help="The cells to extract from the input")
args = parser.parse_args()
cells = list[int]()
tokens = args.extract_cells.split(',')
for token in tokens:
cells.append(int(token))
print("will extract cell " + token)
with tarfile.open(args.out, mode="w") as outfile:
with tarfile.open(args.data, mode="r") as tar:
if args.create_inverse:
inverseoutfile = tarfile.open("inverse_" + args.out, mode="w")
for file_info in tar:
if file_info.isfile():
tokens = file_info.name.split("-")
if len(tokens) != 3:
print("Could not parse filename " + file_info.name)
continue
if int(tokens[1]) in cells:
print("Adding " + file_info.name)
outfile.addfile(file_info, tar.extractfile(file_info))
elif args.create_inverse:
print("Inverse " + file_info.name)
inverseoutfile.addfile(file_info, tar.extractfile(file_info))
if args.create_inverse:
inverseoutfile.close()