diff --git a/scripts/seperatedataset.py b/scripts/seperatedataset.py new file mode 100644 index 00000000..6933e2ba --- /dev/null +++ b/scripts/seperatedataset.py @@ -0,0 +1,37 @@ +#!/bin/python +import tarfile +import argparse + +if __name__ == "__main__": + parser = argparse.ArgumentParser("KissSplitDataset") + parser.add_argument('--data', '-d', required=True, help="Data input tar file") + parser.add_argument('--out', '-o', required=True, help="output file") + parser.add_argument('--extract_cells', '-e', required=True, help="The cells to extract from the input") + parser.add_argument('--create_inverse', '-i', action="store_true", help="The cells to extract from the input") + args = parser.parse_args() + + cells = list[int]() + tokens = args.extract_cells.split(',') + for token in tokens: + cells.append(int(token)) + print("will extract cell " + token) + + with tarfile.open(args.out, mode="w") as outfile: + with tarfile.open(args.data, mode="r") as tar: + if args.create_inverse: + inverseoutfile = tarfile.open("inverse_" + args.out, mode="w") + for file_info in tar: + if file_info.isfile(): + tokens = file_info.name.split("-") + if len(tokens) != 3: + print("Could not parse filename " + file_info.name) + continue + if int(tokens[1]) in cells: + print("Adding " + file_info.name) + outfile.addfile(file_info, tar.extractfile(file_info)) + elif args.create_inverse: + print("Inverse " + file_info.name) + inverseoutfile.addfile(file_info, tar.extractfile(file_info)) + if args.create_inverse: + inverseoutfile.close() +