Compare commits

...

1 Commits

Author SHA1 Message Date
e43a235c12 add eistomatlabarray 2024-10-29 15:28:58 +01:00
8 changed files with 790 additions and 1 deletions

View File

@ -44,10 +44,15 @@ set_property(TARGET rlxpassfail PROPERTY CXX_STANDARD 17)
install(TARGETS rlxpassfail RUNTIME DESTINATION bin)
add_executable(matlabarraytoeis ${COMMON_SRC_FILES} src/matlabarraytoeis.cpp)
target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES} -lrelaxisloader)
target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES})
set_property(TARGET matlabarraytoeis PROPERTY CXX_STANDARD 17)
install(TARGETS matlabarraytoeis RUNTIME DESTINATION bin)
add_executable(eistomatlabarray ${COMMON_SRC_FILES} src/eistomatlabarray.cpp src/microtar.c src/tarloader.cpp src/eisdataset.cpp)
target_link_libraries(eistomatlabarray ${COMMON_LINK_LIBRARIES})
set_property(TARGET eistomatlabarray PROPERTY CXX_STANDARD 17)
install(TARGETS eistomatlabarray RUNTIME DESTINATION bin)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")

7
src/eisdataset.cpp Normal file
View File

@ -0,0 +1,7 @@
#include "eisdataset.h"
eis::Spectra EisDataset::get(size_t index)
{
eis::Spectra data = getImpl(index);
return data;
}

18
src/eisdataset.h Normal file
View File

@ -0,0 +1,18 @@
#pragma once
#include <cstdint>
#include <string>
#include <kisstype/spectra.h>
class EisDataset
{
private:
virtual eis::Spectra getImpl(size_t index) = 0;
public:
eis::Spectra get(size_t index);
virtual size_t size() const = 0;
virtual size_t classForIndex(size_t index) = 0;
virtual std::string modelStringForClass(size_t classNum) {return std::string("Unkown");}
virtual std::string getDescription() {return "";};
virtual ~EisDataset(){}
};

77
src/eistomatlabarray.cpp Normal file
View File

@ -0,0 +1,77 @@
#include <iostream>
#include <fstream>
#include <sstream>
#include "tarloader.h"
#include "common.h"
std::string dataToTsv(const std::vector<eis::DataPoint>& data)
{
std::stringstream out;
out<<std::scientific;
for(const eis::DataPoint& dp : data)
out<<dp.im.real()<<'\t';
for(const eis::DataPoint& dp : data)
out<<0-dp.im.imag()<<'\t';
return out.str();
}
int main(int argc, char** argv)
{
if(argc != 5)
{
std::cout<<"Usage: "<<argv[0]<<" [TAR_DATASET] [OUT_DIR]\n";
return 1;
}
TarDataset dataset(argv[1], 120);
if(dataset.size() == 0)
{
std::cout<<"could not open "<<argv[1]<<'\n';
return 2;
}
std::filesystem::path outDir(argv[1]);
if(!checkDir(outDir))
return 2;
eis::Spectra spectra = dataset.get(0);
std::vector<std::fstream> labelFiles;
for(const std::string& name : spectra.labelNames)
{
labelFiles.push_back(std::fstream(outDir/(name+".txt"), std::ios_base::out));
if(!labelFiles.back().is_open())
{
std::cerr<<"Unable to open "<<outDir/(name+".txt")<<" for writeing\n";
return 2;
}
}
std::fstream spectraFile(outDir/"spectras.txt", std::ios_base::out);
if(!spectraFile.is_open())
{
std::cerr<<"Unable to open "<<(outDir/"spectras.txt")<<" for writeing\n";
return 2;
}
for(size_t i = 0; i < dataset.size(); ++i)
{
eis::Spectra spectra = dataset.get(i);
spectraFile<<dataToTsv(spectra.data)<<'\n';
if(spectra.labels.size() != labelFiles.size())
{
std::cerr<<"not all spectra have the same number of labels\n";
return 3;
}
for(size_t j = 0; j < spectra.labels.size(); ++i)
labelFiles[j]<<spectra.labels[j]<<'\n';
}
spectraFile.close();
for(std::fstream& file : labelFiles)
file.close();
return 0;
}

376
src/microtar.c Normal file
View File

@ -0,0 +1,376 @@
/*
* Copyright (c) 2017 rxi
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include "microtar.h"
typedef struct {
char name[100];
char mode[8];
char owner[8];
char group[8];
char size[12];
char mtime[12];
char checksum[8];
char type;
char linkname[100];
char _padding[255];
} mtar_raw_header_t;
static unsigned round_up(unsigned n, unsigned incr) {
return n + (incr - n % incr) % incr;
}
static unsigned checksum(const mtar_raw_header_t* rh) {
unsigned i;
unsigned char *p = (unsigned char*) rh;
unsigned res = 256;
for (i = 0; i < offsetof(mtar_raw_header_t, checksum); i++) {
res += p[i];
}
for (i = offsetof(mtar_raw_header_t, type); i < sizeof(*rh); i++) {
res += p[i];
}
return res;
}
static int tread(mtar_t *tar, void *data, size_t size) {
int err = tar->read(tar, data, size);
tar->pos += size;
return err;
}
static int twrite(mtar_t *tar, const void *data, size_t size) {
int err = tar->write(tar, data, size);
tar->pos += size;
return err;
}
static int write_null_bytes(mtar_t *tar, int n) {
int i, err;
char nul = '\0';
for (i = 0; i < n; i++) {
err = twrite(tar, &nul, 1);
if (err) {
return err;
}
}
return MTAR_ESUCCESS;
}
static int raw_to_header(mtar_header_t *h, const mtar_raw_header_t *rh) {
unsigned chksum1, chksum2;
/* If the checksum starts with a null byte we assume the record is NULL */
if (*rh->checksum == '\0') {
return MTAR_ENULLRECORD;
}
/* Build and compare checksum */
chksum1 = checksum(rh);
sscanf(rh->checksum, "%o", &chksum2);
if (chksum1 != chksum2) {
return MTAR_EBADCHKSUM;
}
/* Load raw header into header */
sscanf(rh->mode, "%o", &h->mode);
sscanf(rh->owner, "%o", &h->owner);
sscanf(rh->size, "%o", &h->size);
sscanf(rh->mtime, "%o", &h->mtime);
h->type = rh->type;
strcpy(h->name, rh->name);
strcpy(h->linkname, rh->linkname);
return MTAR_ESUCCESS;
}
static int header_to_raw(mtar_raw_header_t *rh, const mtar_header_t *h) {
unsigned chksum;
/* Load header into raw header */
memset(rh, 0, sizeof(*rh));
sprintf(rh->mode, "%o", h->mode);
sprintf(rh->owner, "%o", h->owner);
sprintf(rh->size, "%o", h->size);
sprintf(rh->mtime, "%o", h->mtime);
rh->type = h->type ? h->type : MTAR_TREG;
strcpy(rh->name, h->name);
strcpy(rh->linkname, h->linkname);
/* Calculate and write checksum */
chksum = checksum(rh);
sprintf(rh->checksum, "%06o", chksum);
rh->checksum[7] = ' ';
return MTAR_ESUCCESS;
}
const char* mtar_strerror(int err) {
switch (err) {
case MTAR_ESUCCESS : return "success";
case MTAR_EFAILURE : return "failure";
case MTAR_EOPENFAIL : return "could not open";
case MTAR_EREADFAIL : return "could not read";
case MTAR_EWRITEFAIL : return "could not write";
case MTAR_ESEEKFAIL : return "could not seek";
case MTAR_EBADCHKSUM : return "bad checksum";
case MTAR_ENULLRECORD : return "null record";
case MTAR_ENOTFOUND : return "file not found";
}
return "unknown error";
}
static int file_write(mtar_t *tar, const void *data, size_t size) {
size_t res = fwrite(data, 1, size, tar->stream);
return (res == size) ? MTAR_ESUCCESS : MTAR_EWRITEFAIL;
}
static int file_read(mtar_t *tar, void *data, size_t size) {
size_t res = fread(data, 1, size, tar->stream);
return (res == size) ? MTAR_ESUCCESS : MTAR_EREADFAIL;
}
static int file_seek(mtar_t *tar, long offset) {
int res = fseek(tar->stream, offset, SEEK_SET);
return (res == 0) ? MTAR_ESUCCESS : MTAR_ESEEKFAIL;
}
static int file_close(mtar_t *tar) {
fclose(tar->stream);
return MTAR_ESUCCESS;
}
int mtar_open(mtar_t *tar, const char *filename, const char *mode) {
int err;
mtar_header_t h;
/* Init tar struct and functions */
memset(tar, 0, sizeof(*tar));
tar->write = file_write;
tar->read = file_read;
tar->seek = file_seek;
tar->close = file_close;
/* Assure mode is always binary */
if ( strchr(mode, 'r') ) mode = "rb";
if ( strchr(mode, 'w') ) mode = "wb";
if ( strchr(mode, 'a') ) mode = "ab";
/* Open file */
tar->stream = fopen(filename, mode);
if (!tar->stream) {
return MTAR_EOPENFAIL;
}
/* Read first header to check it is valid if mode is `r` */
if (*mode == 'r') {
err = mtar_read_header(tar, &h);
if (err != MTAR_ESUCCESS) {
mtar_close(tar);
return err;
}
}
/* Return ok */
return MTAR_ESUCCESS;
}
int mtar_close(mtar_t *tar) {
return tar->close(tar);
}
int mtar_seek(mtar_t *tar, long pos) {
int err = tar->seek(tar, pos);
tar->pos = pos;
return err;
}
int mtar_rewind(mtar_t *tar) {
tar->remaining_data = 0;
tar->last_header = 0;
return mtar_seek(tar, 0);
}
int mtar_next(mtar_t *tar) {
int err, n;
mtar_header_t h;
/* Load header */
err = mtar_read_header(tar, &h);
if (err) {
return err;
}
/* Seek to next record */
n = round_up(h.size, 512) + sizeof(mtar_raw_header_t);
return mtar_seek(tar, tar->pos + n);
}
int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h) {
int err;
mtar_header_t header;
/* Start at beginning */
err = mtar_rewind(tar);
if (err) {
return err;
}
/* Iterate all files until we hit an error or find the file */
while ( (err = mtar_read_header(tar, &header)) == MTAR_ESUCCESS ) {
if ( !strcmp(header.name, name) ) {
if (h) {
*h = header;
}
return MTAR_ESUCCESS;
}
mtar_next(tar);
}
/* Return error */
if (err == MTAR_ENULLRECORD) {
err = MTAR_ENOTFOUND;
}
return err;
}
int mtar_read_header(mtar_t *tar, mtar_header_t *h) {
int err;
mtar_raw_header_t rh;
/* Save header position */
tar->last_header = tar->pos;
/* Read raw header */
err = tread(tar, &rh, sizeof(rh));
if (err) {
return err;
}
/* Seek back to start of header */
err = mtar_seek(tar, tar->last_header);
if (err) {
return err;
}
/* Load raw header into header struct and return */
return raw_to_header(h, &rh);
}
int mtar_read_data(mtar_t *tar, void *ptr, size_t size) {
int err;
/* If we have no remaining data then this is the first read, we get the size,
* set the remaining data and seek to the beginning of the data */
if (tar->remaining_data == 0) {
mtar_header_t h;
/* Read header */
err = mtar_read_header(tar, &h);
if (err) {
return err;
}
/* Seek past header and init remaining data */
err = mtar_seek(tar, tar->pos + sizeof(mtar_raw_header_t));
if (err) {
return err;
}
tar->remaining_data = h.size;
}
/* Read data */
err = tread(tar, ptr, size);
if (err) {
return err;
}
tar->remaining_data -= size;
/* If there is no remaining data we've finished reading and seek back to the
* header */
if (tar->remaining_data == 0) {
return mtar_seek(tar, tar->last_header);
}
return MTAR_ESUCCESS;
}
int mtar_write_header(mtar_t *tar, const mtar_header_t *h) {
mtar_raw_header_t rh;
/* Build raw header and write */
header_to_raw(&rh, h);
tar->remaining_data = h->size;
return twrite(tar, &rh, sizeof(rh));
}
int mtar_write_file_header(mtar_t *tar, const char *name, size_t size) {
mtar_header_t h;
/* Build header */
memset(&h, 0, sizeof(h));
strcpy(h.name, name);
h.size = size;
h.type = MTAR_TREG;
h.mode = 0664;
/* Write header */
return mtar_write_header(tar, &h);
}
int mtar_write_dir_header(mtar_t *tar, const char *name) {
mtar_header_t h;
/* Build header */
memset(&h, 0, sizeof(h));
strcpy(h.name, name);
h.type = MTAR_TDIR;
h.mode = 0775;
/* Write header */
return mtar_write_header(tar, &h);
}
int mtar_write_data(mtar_t *tar, const void *data, size_t size) {
int err;
/* Write data */
err = twrite(tar, data, size);
if (err) {
return err;
}
tar->remaining_data -= size;
/* Write padding if we've written all the data for this file */
if (tar->remaining_data == 0) {
return write_null_bytes(tar, round_up(tar->pos, 512) - tar->pos);
}
return MTAR_ESUCCESS;
}
int mtar_finalize(mtar_t *tar) {
/* Write two NULL records */
return write_null_bytes(tar, sizeof(mtar_raw_header_t) * 2);
}

90
src/microtar.h Normal file
View File

@ -0,0 +1,90 @@
/**
* Copyright (c) 2017 rxi
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the MIT license. See `microtar.c` for details.
*/
#ifndef MICROTAR_H
#define MICROTAR_H
#ifdef __cplusplus
extern "C"
{
#endif
#include <stdio.h>
#include <stdlib.h>
#define MTAR_VERSION "0.1.0"
enum {
MTAR_ESUCCESS = 0,
MTAR_EFAILURE = -1,
MTAR_EOPENFAIL = -2,
MTAR_EREADFAIL = -3,
MTAR_EWRITEFAIL = -4,
MTAR_ESEEKFAIL = -5,
MTAR_EBADCHKSUM = -6,
MTAR_ENULLRECORD = -7,
MTAR_ENOTFOUND = -8
};
enum {
MTAR_TREG = '0',
MTAR_TLNK = '1',
MTAR_TSYM = '2',
MTAR_TCHR = '3',
MTAR_TBLK = '4',
MTAR_TDIR = '5',
MTAR_TFIFO = '6'
};
typedef struct {
unsigned mode;
unsigned owner;
unsigned size;
unsigned mtime;
unsigned type;
char name[100];
char linkname[100];
} mtar_header_t;
typedef struct mtar_t mtar_t;
struct mtar_t {
int (*read)(mtar_t *tar, void *data, size_t size);
int (*write)(mtar_t *tar, const void *data, size_t size);
int (*seek)(mtar_t *tar, long pos);
int (*close)(mtar_t *tar);
FILE *stream;
size_t pos;
size_t remaining_data;
size_t last_header;
};
const char* mtar_strerror(int err);
int mtar_open(mtar_t *tar, const char *filename, const char *mode);
int mtar_close(mtar_t *tar);
int mtar_seek(mtar_t *tar, long pos);
int mtar_rewind(mtar_t *tar);
int mtar_next(mtar_t *tar);
int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h);
int mtar_read_header(mtar_t *tar, mtar_header_t *h);
int mtar_read_data(mtar_t *tar, void *ptr, size_t size);
int mtar_write_header(mtar_t *tar, const mtar_header_t *h);
int mtar_write_file_header(mtar_t *tar, const char *name, size_t size);
int mtar_write_dir_header(mtar_t *tar, const char *name);
int mtar_write_data(mtar_t *tar, const void *data, size_t size);
int mtar_finalize(mtar_t *tar);
#ifdef __cplusplus
}
#endif
#endif

169
src/tarloader.cpp Normal file
View File

@ -0,0 +1,169 @@
#include "tarloader.h"
#include <algorithm>
#include <assert.h>
#include <kisstype/type.h>
#include <eisgenerator/translators.h>
#include <iostream>
#include <eisgenerator/model.h>
#include <eisgenerator/basicmath.h>
TarDataset::TarDataset(const std::filesystem::path& path, int64_t inputSize, std::vector<std::string> selectLabels, std::vector<std::string> extraInputs):
inputSize(inputSize), selectLabels(selectLabels), extraInputs(extraInputs), path(path)
{
int ret = mtar_open(&tar, path.c_str(), "r");
if(ret)
{
std::cerr<<"Unable to open tar at "<<path<<'\n';
return;
}
mtar_header_t header;
while((mtar_read_header(&tar, &header)) != MTAR_ENULLRECORD)
{
if(header.type == MTAR_TREG)
{
std::filesystem::path path = header.name;
size_t pos = tar.pos;
eis::Spectra spectra = loadSpectraAtCurrentPos(header.size);
bool skip = false;
for(const std::string& key : selectLabels)
{
if(!spectra.hasLabel(key))
{
std::cout<<"Dsicarding as it is missing: "<<key<<'\n';
skip = true;
break;
}
}
for(const std::string& key : extraInputs)
{
if(!spectra.hasLabel(key))
{
std::cout<<"Dsicarding as it is missing: "<<key<<'\n';
skip = true;
break;
}
continue;
}
if(!skip)
{
eis::purgeEisParamBrackets(spectra.model);
eis::Model::removeSeriesResitance(spectra.model);
if(spectra.model.length() < 2 && spectra.model != "r" && spectra.model != "c" && spectra.model != "w" && spectra.model != "p" && spectra.model != "l")
spectra.model = "Union";
auto search = std::find(modelStrs.begin(), modelStrs.end(), spectra.model);
size_t index;
if(search == modelStrs.end())
{
index = modelStrs.size();
modelStrs.push_back(spectra.model);
}
else
{
index = search - modelStrs.begin();
}
files.push_back({.path = path, .classNum = index, .pos = pos, .size = header.size});
}
}
mtar_next(&tar);
}
if(files.size() < 20)
std::cout<<"found few valid files in "<<path<<'\n';
}
eis::Spectra TarDataset::loadSpectraAtCurrentPos(size_t size)
{
char* filebuffer = new char[size+1];
filebuffer[size] = '\0';
int ret = mtar_read_data(&tar, filebuffer, size);
if(ret != 0)
{
std::cerr<<"Unable to read from tar archive\n";
assert(ret == 0);
}
std::stringstream ss(filebuffer);
eis::Spectra spectra = eis::Spectra::loadFromStream(ss);
delete[] filebuffer;
return spectra;
}
TarDataset::TarDataset(const TarDataset& in)
{
operator=(in);
}
TarDataset& TarDataset::operator=(const TarDataset& in)
{
files = in.files;
inputSize = in.inputSize;
modelStrs = in.modelStrs;
selectLabels = in.selectLabels;
extraInputs = in.extraInputs;
path = in.path;
int ret = mtar_open(&tar, path.c_str(), "r");
if(ret != 0)
{
std::cerr<<"Unable to reopen tar file at "<<path<<'\n';
assert(ret == 0);
}
return *this;
}
TarDataset::~TarDataset()
{
mtar_close(&tar);
}
eis::Spectra TarDataset::getImpl(size_t index)
{
if(files.size() < index)
{
std::cerr<<"index "<<index<<" out of range in "<<__func__<<'\n';
assert(false);
return {};
}
mtar_seek(&tar, files[index].pos);
eis::Spectra spectra = loadSpectraAtCurrentPos(files[index].size);
spectra.data = eis::rescale(spectra.data, inputSize/2);
if(!selectLabels.empty() || !extraInputs.empty())
{
eis::Spectra copy = spectra;
spectra.labelNames.clear();
spectra.labels.clear();
for(const std::string& key : selectLabels)
spectra.addLabel(key, copy.getLabel(key));
for(const std::string& key : extraInputs)
spectra.addLabel("exip_" + key, copy.getLabel(key));
}
return spectra;
}
size_t TarDataset::classForIndex(size_t index)
{
return files[index].classNum;
}
size_t TarDataset::size() const
{
return files.size();
}
std::string TarDataset::modelStringForClass(size_t classNum)
{
if(classNum >= modelStrs.size())
return "invalid";
else
return *std::next(modelStrs.begin(), classNum);
}

47
src/tarloader.h Normal file
View File

@ -0,0 +1,47 @@
#pragma once
#include <cstdint>
#include <vector>
#include <string>
#include <filesystem>
#include <kisstype/spectra.h>
#include "eisdataset.h"
#include "microtar.h"
class TarDataset : public EisDataset
{
private:
mtar_t tar;
struct File
{
std::filesystem::path path;
size_t classNum;
size_t pos;
size_t size;
};
std::vector<TarDataset::File> files;
size_t inputSize;
std::vector<std::string> modelStrs;
std::vector<std::string> selectLabels;
std::vector<std::string> extraInputs;
std::filesystem::path path;
virtual eis::Spectra getImpl(size_t index) override;
eis::Spectra loadSpectraAtCurrentPos(size_t size);
public:
explicit TarDataset(const std::filesystem::path& path, int64_t inputSize = 100, std::vector<std::string> selectLabels = {}, std::vector<std::string> extraInputs = {});
TarDataset(const TarDataset& in);
TarDataset& operator=(const TarDataset& in);
~TarDataset();
virtual size_t size() const override;
virtual size_t classForIndex(size_t index) override;
virtual std::string modelStringForClass(size_t classNum) override;
};