move data formating scripts into this repo

This commit is contained in:
Carl Philipp Klemm 2024-08-13 16:27:41 +02:00
parent a1cfee1955
commit 31444f919e
7 changed files with 490 additions and 0 deletions

138
scripts/cellmeta.py Normal file
View File

@ -0,0 +1,138 @@
from chargefile import ChargeFile, SearchDirection
CYCLES_PER_STEP = 4
STEP_COUNT = 12
def charge_cylces_in_step(globalstep: int):
cyclepoint = globalstep % STEP_COUNT
if cyclepoint == 0:
if (globalstep / STEP_COUNT) % 10 == 0:
return 1
else:
return 0
if cyclepoint == 9:
return 1
if cyclepoint == 11:
return CYCLES_PER_STEP
return 0
def charge_cycles_at_step(globalstep: int,):
count = 0
for i in range(globalstep):
count += charge_cylces_in_step(i)
return count
def thermal_cylces_in_step(globalstep: int, substep: int = -1):
cyclepoint = globalstep % STEP_COUNT
if cyclepoint == 0:
if (globalstep / STEP_COUNT) % 10 == 0:
return 0
else:
return CYCLES_PER_STEP
if cyclepoint == 2:
return CYCLES_PER_STEP
if cyclepoint == 4:
return CYCLES_PER_STEP
if cyclepoint == 6:
return CYCLES_PER_STEP
if cyclepoint == 8:
return CYCLES_PER_STEP
if cyclepoint == 10:
return CYCLES_PER_STEP
if cyclepoint == 11:
return 1
return 0
def thermal_cycles_at_step(globalstep: int, substep: int):
count = 0
for i in range(globalstep - 1):
count += thermal_cylces_in_step(globalstep)
count += thermal_cylces_in_step(globalstep, substep)
return count
non_charge_cycle_cell = list(range(4, 7))
non_thermal_cycle_cell = list(range(11, 21))
cell_thermal_range = {
0: [35, 55],
1: [35, 55],
2: [35, 55],
3: [35, 55],
4: [35, 55],
5: [35, 55],
6: [35, 55],
7: [35, 45],
8: [35, 45],
9: [35, 45],
10: [35, 45],
11: [35, 35],
12: [35, 35],
13: [35, 35],
14: [45, 45],
15: [45, 45],
16: [45, 45],
17: [35, 55],
18: [35, 55],
19: [35, 55],
20: [35, 55],
}
cell_group_table = {
0: 0,
1: 0,
2: 0,
3: 0,
4: 1,
5: 1,
6: 1,
7: 2,
8: 2,
9: 2,
10: 2,
11: 3,
12: 3,
13: 3,
14: 4,
15: 4,
16: 4,
17: 5,
18: 5,
19: 5,
20: 5,
}
class CellMeta:
def __init__(self, cellid: int, globalstep: int, substep: int, charge_files: list[ChargeFile], total_cells: int):
closest_avg = None
closest_charge = None
if cellid not in non_charge_cycle_cell:
closest_avg = ChargeFile.FindClosest(charge_files, globalstep, -1)
closest_charge = ChargeFile.FindClosest(charge_files, globalstep, cellid)
if closest_charge is not None:
assert closest_charge.cell == cellid
total_charge_cells = 0
for i in range(total_cells):
if i not in non_charge_cycle_cell:
total_charge_cells += 1
self.cell_group = cell_group_table[cellid]
self.charge_cycles = charge_cycles_at_step(globalstep) if cellid not in non_charge_cycle_cell else 0
self.thermal_cycles = thermal_cycles_at_step(globalstep, substep) if cellid not in non_thermal_cycle_cell else 0
self.last_avg_cap = abs(closest_avg.capacity) / total_charge_cells if closest_avg is not None else -1
self.last_avg_cap_step = closest_avg.step if closest_avg is not None else -1
self.last_cap = abs(closest_charge.capacity) if closest_charge is not None else -1
self.last_cap_step = closest_charge.step if closest_charge is not None else -1
self.thermal_range = cell_thermal_range[cellid]
if cellid not in non_charge_cycle_cell:
self.soc = ChargeFile.GetSoc(charge_files, globalstep, cellid, total_charge_cells)
self.cap_esitmate = ChargeFile.GetCapacityEsitmate(charge_files, globalstep, cellid, total_charge_cells)
else:
self.soc = -1
self.cap_esitmate = -1
self.soc_estimate = -1

177
scripts/chargefile.py Normal file
View File

@ -0,0 +1,177 @@
import csv
from parseerror import ParseError
import os
import enum
class SearchDirection(enum.Enum):
CLOSEST = 0
PREVIOUS_ONLY = 1
FORWARD_ONLY = 2
def calc_capacity(charge_curve: list[dict]):
capacity = 0.0
prev_time = -1
prev_current = -1
total_t = 0
for entry in charge_curve:
if prev_time > 0:
delta_s = entry['time'] - prev_time
current = (entry['current'] + prev_current) / 2
capacity += current * (delta_s / (60.0 * 60.0))
total_t += delta_s
prev_time = entry['time']
prev_current = entry['current']
return capacity
class ChargeFile:
def __init__(self, filename: str):
self.start_voltage = 0.0
self.end_voltage = 0.0
self.capacity = 0.0
self.cell = -1
self.discharge = False
self.current = 0.0
self.full_cycle = False
self.step = 0
if os.path.split(filename)[1].startswith("single_cell_charge") or os.path.split(filename)[1].startswith("single_cell_discharge"):
tokens = filename.split('.')[0].split('_')
self.step = int(tokens[-2])
self.cell = int(tokens[-1])
elif os.path.split(filename)[1].startswith("charge_for"):
self.step = int(filename.split('.')[0].split('_')[-1])
else:
raise ParseError(f"File name {os.path.split(filename)[1]} not in the expected sheme for ChargeFile")
with open(filename, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
reader.__next__()
timestr = reader.__next__()[0]
if timestr != "time":
raise ParseError(f"Expected time got {timestr}")
charge_curve = list()
for row in reader:
charge_curve.append({'time': int(row[0]), 'voltage': float(row[1]), 'current': float(row[2])})
self.current = charge_curve[int(len(charge_curve) / 2)]['current']
self.discharge = self.current < 0
self.start_voltage = charge_curve[0]['voltage']
self.end_voltage = charge_curve[-1]['voltage']
self.capacity = calc_capacity(charge_curve)
self.full_cycle = self.start_voltage > 4.05 and self.end_voltage < 3.15 or self.start_voltage < 3.15 and self.end_voltage > 4.05
@staticmethod
def FindClosest(charge_files: list, step: int, cellid: int = -1, full_cycle=True, direction=SearchDirection.CLOSEST):
closest_file = None
for charge_file in charge_files:
if charge_file.cell != cellid:
continue
if direction == SearchDirection.PREVIOUS_ONLY and charge_file.step > step:
continue
if direction == SearchDirection.FORWARD_ONLY and charge_file.step < step:
continue
if not full_cycle or charge_file.full_cycle:
if closest_file is not None:
if abs(step - closest_file.step) > abs(step - charge_file.step):
closest_file = charge_file
elif abs(step - closest_file.step) == abs(step - charge_file.step) and step > closest_file.step and not closest_file.discharge:
if (step > closest_file.step and not closest_file.discharge) or (step < closest_file.step and closest_file.discharge):
closest_file = charge_file
else:
closest_file = charge_file
return closest_file
@staticmethod
def GetSoc(charge_files: list, step: int, cellid: int, cell_count: int) -> float:
common_closest_full = ChargeFile.FindClosest(charge_files, step, -1, True, SearchDirection.PREVIOUS_ONLY)
specific_closest_full = ChargeFile.FindClosest(charge_files, step, cellid, True, SearchDirection.PREVIOUS_ONLY)
if specific_closest_full is None and common_closest_full is None:
return -1.0
if common_closest_full is None:
closest_full = specific_closest_full
elif specific_closest_full is None:
closest_full = common_closest_full
elif step - specific_closest_full.step < step - common_closest_full.step:
closest_full = specific_closest_full
else:
closest_full = common_closest_full
full_cap = closest_full.capacity
if closest_full.cell == -1:
full_cap = full_cap / cell_count
if closest_full.discharge:
charge_counter = 0.0
else:
charge_counter = full_cap
accepted_count = 0
end_voltage = closest_full.end_voltage
for charge_file in charge_files:
if charge_file.step <= step and charge_file.step > closest_full.step:
accepted_count += 1
if charge_file.cell == -1:
charge_counter += charge_file.capacity / cell_count
else:
charge_counter += charge_file.capacity
end_voltage = charge_file.end_voltage
if end_voltage > 4.15:
charge_counter = full_cap
elif end_voltage < 3.15:
charge_counter = 0
soc = charge_counter / abs(full_cap)
if soc > 1.05 or soc < -0.05:
return -1
assert not (end_voltage < 3.4 and soc > 0.8)
assert not (end_voltage > 4.0 and soc < 0.6)
assert not (soc < -0.1 or soc > 1.1)
return soc
def GetCommonCapacityEstimate(charge_files: list, step: int) -> tuple[float, int] | None:
prev_charge = ChargeFile.FindClosest(charge_files, step, -1, True, SearchDirection.PREVIOUS_ONLY)
next_charge = ChargeFile.FindClosest(charge_files, step, -1, True, SearchDirection.FORWARD_ONLY)
if prev_charge is None and next_charge is None:
return None
if prev_charge is None:
return (abs(next_charge.capacity), next_charge.step - step)
if next_charge is None:
return (abs(prev_charge.capacity), step - prev_charge.step)
return ((abs(next_charge.capacity) - abs(prev_charge.capacity)) * ((step - prev_charge.step) / (next_charge.step - prev_charge.step)) + abs(prev_charge.capacity),
min(step - prev_charge.step, next_charge.step - step))
def GetCapacityEsitmate(charge_files: list, step: int, cellid: int, cell_count: int) -> float:
prev_charge = ChargeFile.FindClosest(charge_files, step, cellid, True, SearchDirection.PREVIOUS_ONLY)
next_charge = ChargeFile.FindClosest(charge_files, step, cellid, True, SearchDirection.FORWARD_ONLY)
common_cap = ChargeFile.GetCommonCapacityEstimate(charge_files, step)
if prev_charge is None and next_charge is None:
if common_cap is None:
return -1
return common_cap[0] / cell_count
if prev_charge is not None and next_charge is not None:
single_charge_estimate = (abs(next_charge.capacity) - abs(prev_charge.capacity)) * ((step - prev_charge.step) / (next_charge.step - prev_charge.step))
single_charge_estimate += abs(prev_charge.capacity)
if common_cap is None or min(step - prev_charge.step, next_charge.step - step) < common_cap[1]:
return single_charge_estimate
common_cap_at_prev = ChargeFile.GetCommonCapacityEstimate(charge_files, prev_charge.step)
common_cap_at_next = ChargeFile.GetCommonCapacityEstimate(charge_files, next_charge.step)
avg_delta = ((abs(prev_charge.capacity) - common_cap_at_prev[0] / cell_count) + (abs(next_charge.capacity) - common_cap_at_next[0] / cell_count)) / 2.0
return (common_cap[0] / cell_count) + avg_delta
singe_charge = prev_charge if prev_charge is not None else next_charge
common_cap_at_single = ChargeFile.GetCommonCapacityEstimate(charge_files, singe_charge.step)
return (common_cap[0] / cell_count) + (abs(singe_charge.capacity) - common_cap_at_single[0] / cell_count)

65
scripts/createdataset.py Normal file
View File

@ -0,0 +1,65 @@
import argparse
import os
from tqdm import tqdm
import tarfile
from chargefile import ChargeFile
from spectrafile import SpectraFile
from soc_estimation import add_soc_estimate
if __name__ == "__main__":
parser = argparse.ArgumentParser("KissExpiramentCreateDataset")
parser.add_argument('--data', '-d', required=True, help="Data input directory")
parser.add_argument('--out', '-o', required=True, help="output directory")
args = parser.parse_args()
filenames = [f for f in os.listdir(args.data) if os.path.isfile(os.path.join(args.data, f))]
charge_filenames = [f for f in filenames if f.startswith("charge") or f.startswith("single_cell_")]
spectra_filenames = [f for f in filenames if not f.startswith("charge") and not f.startswith("single_cell_") and not f.startswith("voltage_equlaization_") and f != "expiramentlog.csv"]
print(f"found {len(spectra_filenames)} spectra")
print(f"found {len(charge_filenames)} charge/discharge sequences")
if not os.path.exists(args.out + ".tmp"):
os.makedirs(args.out + ".tmp")
charge_files = list()
for filename in charge_filenames:
charge_files.append(ChargeFile(os.path.join(args.data, filename)))
cells = set()
for filename in tqdm(spectra_filenames, desc="Finding cells"):
tokens = filename.split('.')[0].split('-')
cellid = int(tokens[1])
cells.add(cellid)
print(f"{len(cells)} cells where involved")
spectras = list()
for filename in tqdm(spectra_filenames, desc="Resolveing data"):
tokens = filename.split('.')[0].split('-')
step = int(tokens[0])
cellid = int(tokens[1])
substep = int(tokens[2])
sf = SpectraFile(os.path.join(args.data, filename), cellid, step, substep, charge_files, len(cells))
spectras.append(sf)
add_soc_estimate(spectras)
for spectra in spectras:
spectra.write(args.out + ".tmp")
try:
os.remove(f"{args.out}.tar")
except FileNotFoundError:
pass
tar = tarfile.open(f"{args.out}.tar", mode="x")
for filename in tqdm(os.listdir(args.out + ".tmp"), desc="Saveing data"):
path = os.path.join(args.out + ".tmp", filename)
tar.add(path, arcname=os.path.split(path)[-1])
os.remove(path)
os.rmdir(args.out + ".tmp")
tar.close()

35
scripts/extractmeta.py Normal file
View File

@ -0,0 +1,35 @@
#!/bin/python
import tarfile
from tqdm import tqdm
from eisgenerator import EisSpectra
import csv
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser("KissExpiramentExtractMeta")
parser.add_argument('--data', '-d', required=True, help="Data input tar file")
parser.add_argument('--out', '-o', required=True, help="output file")
args = parser.parse_args()
with open(args.out, 'w', newline='') as outfile:
csvwriter = csv.writer(outfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
with tarfile.open(args.data, mode="r") as tar:
master_labels = None
rows = list()
for file_info in tqdm(tar, desc="Extracting Metadata", total=len(list(tar))):
if file_info.isfile():
filestr = tar.extractfile(file_info).read()
spectra = EisSpectra.loadFromString(filestr)
if master_labels is None:
master_labels = spectra.labelNames
master_labels_copy = master_labels.copy()
for i in range(len(master_labels_copy)):
print(master_labels_copy[i])
master_labels_copy[i] = master_labels_copy[i].strip(' "')
csvwriter.writerow(master_labels_copy)
elif master_labels != spectra.labelNames:
print(f"Error: not all files in {args.data} have the same labelNames")
exit(1)
csvwriter.writerow(spectra.labels)
tar.close()

3
scripts/parseerror.py Normal file
View File

@ -0,0 +1,3 @@
class ParseError(Exception):
def __init__(self, message):
self.message = message

32
scripts/soc_estimation.py Normal file
View File

@ -0,0 +1,32 @@
from scipy.optimize import curve_fit
from scipy.interpolate import splrep, splev
import csv
import argparse
import numpy
import matplotlib.pyplot as plt
from eisgenerator import EisSpectra
import io
import tarfile
from tqdm import tqdm
from spectrafile import SpectraFile
def add_soc_estimate(spectras: list[SpectraFile]):
data = [list(), list()]
for spectra in spectras:
if not spectra.meta.soc <= 0:
data[0].append(spectra.ocv)
data[1].append(spectra.meta.soc)
ndata = numpy.asarray(data)
ndata.sort(1)
knots = 9
qs = numpy.linspace(0, 1, knots)[1:-1]
knots = numpy.quantile(ndata[0], qs)
tck = splrep(ndata[0], ndata[1], t=knots, k=3)
estimates = splev(ndata[0], tck)
for spectra in spectras:
spectra.meta.soc_estimate = splev(spectra.ocv, tck)

40
scripts/spectrafile.py Normal file
View File

@ -0,0 +1,40 @@
import os
from cellmeta import CellMeta
from eisgenerator import EisSpectra
from parseerror import ParseError
from chargefile import ChargeFile
class SpectraFile:
def __init__(self, filename: str, cellid: int, step: int, substep: int, charge_files: list[ChargeFile], total_cells: int):
self.cellid = cellid
self.step = step
self.substep = substep
self.filename = filename
self.temperature = -1.0
self.ocv = -1.0
self.meta = CellMeta(cellid, step, substep, charge_files, total_cells)
self.filename = os.path.split(filename)[1]
self.spectra = EisSpectra.loadFromDisk(filename)
header = self.spectra.header.split('"')[1].split(',')
self.temperature = float(header[2])
self.ocv = float(header[3])
if int(header[0]) != step or int(header[1]) != cellid:
raise ParseError(f"file name and file content of SpectraFile {filename} do not match")
def write(self, directory: str):
metaList = [float(self.step), float(self.substep), float(self.cellid), float(self.meta.cell_group), float(self.temperature), float(self.ocv),
float(self.meta.charge_cycles), float(self.meta.thermal_cycles), float(self.meta.last_avg_cap), float(self.meta.last_avg_cap_step),
float(self.meta.last_cap), float(self.meta.last_cap_step), float(self.meta.cap_esitmate), float(self.meta.soc), float(self.meta.soc_estimate)]
self.spectra.setLabels(metaList)
self.spectra.model = "Unkown"
meta_dsc_strings = ["step", "substep", "cellid", "cell_group", "temparature", "ocv", "charge_cycles", "thermal_cycles",
"last_avg_cap", "last_avg_step", "last_cap", "last_cap_step", "cap_estimate", "soc", "soc_estimate"]
self.spectra.headerDescription = "File origin"
self.spectra.header = "CoinCellHell mesurement file"
self.spectra.labelNames = meta_dsc_strings
self.spectra.saveToDisk(os.path.join(directory, self.filename))