move data formating scripts into this repo
This commit is contained in:
		
							parent
							
								
									a1cfee1955
								
							
						
					
					
						commit
						31444f919e
					
				
					 7 changed files with 490 additions and 0 deletions
				
			
		
							
								
								
									
										65
									
								
								scripts/createdataset.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								scripts/createdataset.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,65 @@
 | 
			
		|||
import argparse
 | 
			
		||||
import os
 | 
			
		||||
from tqdm import tqdm
 | 
			
		||||
import tarfile
 | 
			
		||||
 | 
			
		||||
from chargefile import ChargeFile
 | 
			
		||||
from spectrafile import SpectraFile
 | 
			
		||||
from soc_estimation import add_soc_estimate
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
	parser = argparse.ArgumentParser("KissExpiramentCreateDataset")
 | 
			
		||||
	parser.add_argument('--data', '-d', required=True, help="Data input directory")
 | 
			
		||||
	parser.add_argument('--out', '-o', required=True, help="output directory")
 | 
			
		||||
	args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
	filenames = [f for f in os.listdir(args.data) if os.path.isfile(os.path.join(args.data, f))]
 | 
			
		||||
	charge_filenames = [f for f in filenames if f.startswith("charge") or f.startswith("single_cell_")]
 | 
			
		||||
	spectra_filenames = [f for f in filenames if not f.startswith("charge") and not f.startswith("single_cell_") and not f.startswith("voltage_equlaization_") and f != "expiramentlog.csv"]
 | 
			
		||||
 | 
			
		||||
	print(f"found {len(spectra_filenames)} spectra")
 | 
			
		||||
	print(f"found {len(charge_filenames)} charge/discharge sequences")
 | 
			
		||||
 | 
			
		||||
	if not os.path.exists(args.out + ".tmp"):
 | 
			
		||||
		os.makedirs(args.out + ".tmp")
 | 
			
		||||
 | 
			
		||||
	charge_files = list()
 | 
			
		||||
	for filename in charge_filenames:
 | 
			
		||||
		charge_files.append(ChargeFile(os.path.join(args.data, filename)))
 | 
			
		||||
 | 
			
		||||
	cells = set()
 | 
			
		||||
	for filename in tqdm(spectra_filenames, desc="Finding cells"):
 | 
			
		||||
		tokens = filename.split('.')[0].split('-')
 | 
			
		||||
		cellid = int(tokens[1])
 | 
			
		||||
		cells.add(cellid)
 | 
			
		||||
 | 
			
		||||
	print(f"{len(cells)} cells where involved")
 | 
			
		||||
 | 
			
		||||
	spectras = list()
 | 
			
		||||
 | 
			
		||||
	for filename in tqdm(spectra_filenames, desc="Resolveing data"):
 | 
			
		||||
		tokens = filename.split('.')[0].split('-')
 | 
			
		||||
		step = int(tokens[0])
 | 
			
		||||
		cellid = int(tokens[1])
 | 
			
		||||
		substep = int(tokens[2])
 | 
			
		||||
		sf = SpectraFile(os.path.join(args.data, filename), cellid, step, substep, charge_files, len(cells))
 | 
			
		||||
		spectras.append(sf)
 | 
			
		||||
 | 
			
		||||
	add_soc_estimate(spectras)
 | 
			
		||||
 | 
			
		||||
	for spectra in spectras:
 | 
			
		||||
		spectra.write(args.out + ".tmp")
 | 
			
		||||
 | 
			
		||||
	try:
 | 
			
		||||
		os.remove(f"{args.out}.tar")
 | 
			
		||||
	except FileNotFoundError:
 | 
			
		||||
		pass
 | 
			
		||||
	tar = tarfile.open(f"{args.out}.tar", mode="x")
 | 
			
		||||
	for filename in tqdm(os.listdir(args.out + ".tmp"), desc="Saveing data"):
 | 
			
		||||
		path = os.path.join(args.out + ".tmp", filename)
 | 
			
		||||
		tar.add(path, arcname=os.path.split(path)[-1])
 | 
			
		||||
		os.remove(path)
 | 
			
		||||
	os.rmdir(args.out + ".tmp")
 | 
			
		||||
	tar.close()
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue