38 lines
1.4 KiB
Python
38 lines
1.4 KiB
Python
#!/bin/python
|
|
import tarfile
|
|
import argparse
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser("KissSplitDataset")
|
|
parser.add_argument('--data', '-d', required=True, help="Data input tar file")
|
|
parser.add_argument('--out', '-o', required=True, help="output file")
|
|
parser.add_argument('--extract_cells', '-e', required=True, help="The cells to extract from the input")
|
|
parser.add_argument('--create_inverse', '-i', action="store_true", help="The cells to extract from the input")
|
|
args = parser.parse_args()
|
|
|
|
cells = list[int]()
|
|
tokens = args.extract_cells.split(',')
|
|
for token in tokens:
|
|
cells.append(int(token))
|
|
print("will extract cell " + token)
|
|
|
|
with tarfile.open(args.out, mode="w") as outfile:
|
|
with tarfile.open(args.data, mode="r") as tar:
|
|
if args.create_inverse:
|
|
inverseoutfile = tarfile.open("inverse_" + args.out, mode="w")
|
|
for file_info in tar:
|
|
if file_info.isfile():
|
|
tokens = file_info.name.split("-")
|
|
if len(tokens) != 3:
|
|
print("Could not parse filename " + file_info.name)
|
|
continue
|
|
if int(tokens[1]) in cells:
|
|
print("Adding " + file_info.name)
|
|
outfile.addfile(file_info, tar.extractfile(file_info))
|
|
elif args.create_inverse:
|
|
print("Inverse " + file_info.name)
|
|
inverseoutfile.addfile(file_info, tar.extractfile(file_info))
|
|
if args.create_inverse:
|
|
inverseoutfile.close()
|
|
|