move data formating scripts into this repo
This commit is contained in:
		
							parent
							
								
									a1cfee1955
								
							
						
					
					
						commit
						31444f919e
					
				
					 7 changed files with 490 additions and 0 deletions
				
			
		
							
								
								
									
										35
									
								
								scripts/extractmeta.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								scripts/extractmeta.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,35 @@ | |||
| #!/bin/python | ||||
| 
 | ||||
| import tarfile | ||||
| from tqdm import tqdm | ||||
| from eisgenerator import EisSpectra | ||||
| import csv | ||||
| import argparse | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| 	parser = argparse.ArgumentParser("KissExpiramentExtractMeta") | ||||
| 	parser.add_argument('--data', '-d', required=True, help="Data input tar file") | ||||
| 	parser.add_argument('--out', '-o', required=True, help="output file") | ||||
| 	args = parser.parse_args() | ||||
| 
 | ||||
| 	with open(args.out, 'w', newline='') as outfile: | ||||
| 		csvwriter = csv.writer(outfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) | ||||
| 		with tarfile.open(args.data, mode="r") as tar: | ||||
| 			master_labels = None | ||||
| 			rows = list() | ||||
| 			for file_info in tqdm(tar, desc="Extracting Metadata", total=len(list(tar))): | ||||
| 				if file_info.isfile(): | ||||
| 					filestr = tar.extractfile(file_info).read() | ||||
| 					spectra = EisSpectra.loadFromString(filestr) | ||||
| 					if master_labels is None: | ||||
| 						master_labels = spectra.labelNames | ||||
| 						master_labels_copy = master_labels.copy() | ||||
| 						for i in range(len(master_labels_copy)): | ||||
| 							print(master_labels_copy[i]) | ||||
| 							master_labels_copy[i] = master_labels_copy[i].strip(' "') | ||||
| 						csvwriter.writerow(master_labels_copy) | ||||
| 					elif master_labels != spectra.labelNames: | ||||
| 						print(f"Error: not all files in {args.data} have the same labelNames") | ||||
| 						exit(1) | ||||
| 					csvwriter.writerow(spectra.labels) | ||||
| 			tar.close() | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Carl Philipp Klemm
						Carl Philipp Klemm