move data formating scripts into this repo
This commit is contained in:
35
scripts/extractmeta.py
Normal file
35
scripts/extractmeta.py
Normal file
@ -0,0 +1,35 @@
|
||||
#!/bin/python
|
||||
|
||||
import tarfile
|
||||
from tqdm import tqdm
|
||||
from eisgenerator import EisSpectra
|
||||
import csv
|
||||
import argparse
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("KissExpiramentExtractMeta")
|
||||
parser.add_argument('--data', '-d', required=True, help="Data input tar file")
|
||||
parser.add_argument('--out', '-o', required=True, help="output file")
|
||||
args = parser.parse_args()
|
||||
|
||||
with open(args.out, 'w', newline='') as outfile:
|
||||
csvwriter = csv.writer(outfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
with tarfile.open(args.data, mode="r") as tar:
|
||||
master_labels = None
|
||||
rows = list()
|
||||
for file_info in tqdm(tar, desc="Extracting Metadata", total=len(list(tar))):
|
||||
if file_info.isfile():
|
||||
filestr = tar.extractfile(file_info).read()
|
||||
spectra = EisSpectra.loadFromString(filestr)
|
||||
if master_labels is None:
|
||||
master_labels = spectra.labelNames
|
||||
master_labels_copy = master_labels.copy()
|
||||
for i in range(len(master_labels_copy)):
|
||||
print(master_labels_copy[i])
|
||||
master_labels_copy[i] = master_labels_copy[i].strip(' "')
|
||||
csvwriter.writerow(master_labels_copy)
|
||||
elif master_labels != spectra.labelNames:
|
||||
print(f"Error: not all files in {args.data} have the same labelNames")
|
||||
exit(1)
|
||||
csvwriter.writerow(spectra.labels)
|
||||
tar.close()
|
Reference in New Issue
Block a user