move data formating scripts into this repo
This commit is contained in:
65
scripts/createdataset.py
Normal file
65
scripts/createdataset.py
Normal file
@ -0,0 +1,65 @@
|
||||
import argparse
|
||||
import os
|
||||
from tqdm import tqdm
|
||||
import tarfile
|
||||
|
||||
from chargefile import ChargeFile
|
||||
from spectrafile import SpectraFile
|
||||
from soc_estimation import add_soc_estimate
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser("KissExpiramentCreateDataset")
|
||||
parser.add_argument('--data', '-d', required=True, help="Data input directory")
|
||||
parser.add_argument('--out', '-o', required=True, help="output directory")
|
||||
args = parser.parse_args()
|
||||
|
||||
filenames = [f for f in os.listdir(args.data) if os.path.isfile(os.path.join(args.data, f))]
|
||||
charge_filenames = [f for f in filenames if f.startswith("charge") or f.startswith("single_cell_")]
|
||||
spectra_filenames = [f for f in filenames if not f.startswith("charge") and not f.startswith("single_cell_") and not f.startswith("voltage_equlaization_") and f != "expiramentlog.csv"]
|
||||
|
||||
print(f"found {len(spectra_filenames)} spectra")
|
||||
print(f"found {len(charge_filenames)} charge/discharge sequences")
|
||||
|
||||
if not os.path.exists(args.out + ".tmp"):
|
||||
os.makedirs(args.out + ".tmp")
|
||||
|
||||
charge_files = list()
|
||||
for filename in charge_filenames:
|
||||
charge_files.append(ChargeFile(os.path.join(args.data, filename)))
|
||||
|
||||
cells = set()
|
||||
for filename in tqdm(spectra_filenames, desc="Finding cells"):
|
||||
tokens = filename.split('.')[0].split('-')
|
||||
cellid = int(tokens[1])
|
||||
cells.add(cellid)
|
||||
|
||||
print(f"{len(cells)} cells where involved")
|
||||
|
||||
spectras = list()
|
||||
|
||||
for filename in tqdm(spectra_filenames, desc="Resolveing data"):
|
||||
tokens = filename.split('.')[0].split('-')
|
||||
step = int(tokens[0])
|
||||
cellid = int(tokens[1])
|
||||
substep = int(tokens[2])
|
||||
sf = SpectraFile(os.path.join(args.data, filename), cellid, step, substep, charge_files, len(cells))
|
||||
spectras.append(sf)
|
||||
|
||||
add_soc_estimate(spectras)
|
||||
|
||||
for spectra in spectras:
|
||||
spectra.write(args.out + ".tmp")
|
||||
|
||||
try:
|
||||
os.remove(f"{args.out}.tar")
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
tar = tarfile.open(f"{args.out}.tar", mode="x")
|
||||
for filename in tqdm(os.listdir(args.out + ".tmp"), desc="Saveing data"):
|
||||
path = os.path.join(args.out + ".tmp", filename)
|
||||
tar.add(path, arcname=os.path.split(path)[-1])
|
||||
os.remove(path)
|
||||
os.rmdir(args.out + ".tmp")
|
||||
tar.close()
|
||||
|
Reference in New Issue
Block a user