add eistomatlabarray
This commit is contained in:
parent
f1b0a33e71
commit
e43a235c12
@ -44,10 +44,15 @@ set_property(TARGET rlxpassfail PROPERTY CXX_STANDARD 17)
|
||||
install(TARGETS rlxpassfail RUNTIME DESTINATION bin)
|
||||
|
||||
add_executable(matlabarraytoeis ${COMMON_SRC_FILES} src/matlabarraytoeis.cpp)
|
||||
target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES} -lrelaxisloader)
|
||||
target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES})
|
||||
set_property(TARGET matlabarraytoeis PROPERTY CXX_STANDARD 17)
|
||||
install(TARGETS matlabarraytoeis RUNTIME DESTINATION bin)
|
||||
|
||||
add_executable(eistomatlabarray ${COMMON_SRC_FILES} src/eistomatlabarray.cpp src/microtar.c src/tarloader.cpp src/eisdataset.cpp)
|
||||
target_link_libraries(eistomatlabarray ${COMMON_LINK_LIBRARIES})
|
||||
set_property(TARGET eistomatlabarray PROPERTY CXX_STANDARD 17)
|
||||
install(TARGETS eistomatlabarray RUNTIME DESTINATION bin)
|
||||
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s")
|
||||
|
||||
|
||||
|
7
src/eisdataset.cpp
Normal file
7
src/eisdataset.cpp
Normal file
@ -0,0 +1,7 @@
|
||||
#include "eisdataset.h"
|
||||
|
||||
eis::Spectra EisDataset::get(size_t index)
|
||||
{
|
||||
eis::Spectra data = getImpl(index);
|
||||
return data;
|
||||
}
|
18
src/eisdataset.h
Normal file
18
src/eisdataset.h
Normal file
@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <kisstype/spectra.h>
|
||||
|
||||
class EisDataset
|
||||
{
|
||||
private:
|
||||
virtual eis::Spectra getImpl(size_t index) = 0;
|
||||
|
||||
public:
|
||||
eis::Spectra get(size_t index);
|
||||
virtual size_t size() const = 0;
|
||||
virtual size_t classForIndex(size_t index) = 0;
|
||||
virtual std::string modelStringForClass(size_t classNum) {return std::string("Unkown");}
|
||||
virtual std::string getDescription() {return "";};
|
||||
virtual ~EisDataset(){}
|
||||
};
|
77
src/eistomatlabarray.cpp
Normal file
77
src/eistomatlabarray.cpp
Normal file
@ -0,0 +1,77 @@
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include "tarloader.h"
|
||||
#include "common.h"
|
||||
|
||||
std::string dataToTsv(const std::vector<eis::DataPoint>& data)
|
||||
{
|
||||
std::stringstream out;
|
||||
out<<std::scientific;
|
||||
for(const eis::DataPoint& dp : data)
|
||||
out<<dp.im.real()<<'\t';
|
||||
for(const eis::DataPoint& dp : data)
|
||||
out<<0-dp.im.imag()<<'\t';
|
||||
return out.str();
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if(argc != 5)
|
||||
{
|
||||
std::cout<<"Usage: "<<argv[0]<<" [TAR_DATASET] [OUT_DIR]\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
TarDataset dataset(argv[1], 120);
|
||||
if(dataset.size() == 0)
|
||||
{
|
||||
std::cout<<"could not open "<<argv[1]<<'\n';
|
||||
return 2;
|
||||
}
|
||||
|
||||
std::filesystem::path outDir(argv[1]);
|
||||
|
||||
if(!checkDir(outDir))
|
||||
return 2;
|
||||
|
||||
eis::Spectra spectra = dataset.get(0);
|
||||
std::vector<std::fstream> labelFiles;
|
||||
for(const std::string& name : spectra.labelNames)
|
||||
{
|
||||
labelFiles.push_back(std::fstream(outDir/(name+".txt"), std::ios_base::out));
|
||||
if(!labelFiles.back().is_open())
|
||||
{
|
||||
std::cerr<<"Unable to open "<<outDir/(name+".txt")<<" for writeing\n";
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
std::fstream spectraFile(outDir/"spectras.txt", std::ios_base::out);
|
||||
if(!spectraFile.is_open())
|
||||
{
|
||||
std::cerr<<"Unable to open "<<(outDir/"spectras.txt")<<" for writeing\n";
|
||||
return 2;
|
||||
}
|
||||
|
||||
for(size_t i = 0; i < dataset.size(); ++i)
|
||||
{
|
||||
eis::Spectra spectra = dataset.get(i);
|
||||
spectraFile<<dataToTsv(spectra.data)<<'\n';
|
||||
if(spectra.labels.size() != labelFiles.size())
|
||||
{
|
||||
std::cerr<<"not all spectra have the same number of labels\n";
|
||||
return 3;
|
||||
}
|
||||
for(size_t j = 0; j < spectra.labels.size(); ++i)
|
||||
labelFiles[j]<<spectra.labels[j]<<'\n';
|
||||
|
||||
}
|
||||
|
||||
spectraFile.close();
|
||||
for(std::fstream& file : labelFiles)
|
||||
file.close();
|
||||
|
||||
return 0;
|
||||
}
|
376
src/microtar.c
Normal file
376
src/microtar.c
Normal file
@ -0,0 +1,376 @@
|
||||
/*
|
||||
* Copyright (c) 2017 rxi
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
* sell copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "microtar.h"
|
||||
|
||||
typedef struct {
|
||||
char name[100];
|
||||
char mode[8];
|
||||
char owner[8];
|
||||
char group[8];
|
||||
char size[12];
|
||||
char mtime[12];
|
||||
char checksum[8];
|
||||
char type;
|
||||
char linkname[100];
|
||||
char _padding[255];
|
||||
} mtar_raw_header_t;
|
||||
|
||||
|
||||
static unsigned round_up(unsigned n, unsigned incr) {
|
||||
return n + (incr - n % incr) % incr;
|
||||
}
|
||||
|
||||
|
||||
static unsigned checksum(const mtar_raw_header_t* rh) {
|
||||
unsigned i;
|
||||
unsigned char *p = (unsigned char*) rh;
|
||||
unsigned res = 256;
|
||||
for (i = 0; i < offsetof(mtar_raw_header_t, checksum); i++) {
|
||||
res += p[i];
|
||||
}
|
||||
for (i = offsetof(mtar_raw_header_t, type); i < sizeof(*rh); i++) {
|
||||
res += p[i];
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static int tread(mtar_t *tar, void *data, size_t size) {
|
||||
int err = tar->read(tar, data, size);
|
||||
tar->pos += size;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
static int twrite(mtar_t *tar, const void *data, size_t size) {
|
||||
int err = tar->write(tar, data, size);
|
||||
tar->pos += size;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
static int write_null_bytes(mtar_t *tar, int n) {
|
||||
int i, err;
|
||||
char nul = '\0';
|
||||
for (i = 0; i < n; i++) {
|
||||
err = twrite(tar, &nul, 1);
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
return MTAR_ESUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int raw_to_header(mtar_header_t *h, const mtar_raw_header_t *rh) {
|
||||
unsigned chksum1, chksum2;
|
||||
|
||||
/* If the checksum starts with a null byte we assume the record is NULL */
|
||||
if (*rh->checksum == '\0') {
|
||||
return MTAR_ENULLRECORD;
|
||||
}
|
||||
|
||||
/* Build and compare checksum */
|
||||
chksum1 = checksum(rh);
|
||||
sscanf(rh->checksum, "%o", &chksum2);
|
||||
if (chksum1 != chksum2) {
|
||||
return MTAR_EBADCHKSUM;
|
||||
}
|
||||
|
||||
/* Load raw header into header */
|
||||
sscanf(rh->mode, "%o", &h->mode);
|
||||
sscanf(rh->owner, "%o", &h->owner);
|
||||
sscanf(rh->size, "%o", &h->size);
|
||||
sscanf(rh->mtime, "%o", &h->mtime);
|
||||
h->type = rh->type;
|
||||
strcpy(h->name, rh->name);
|
||||
strcpy(h->linkname, rh->linkname);
|
||||
|
||||
return MTAR_ESUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int header_to_raw(mtar_raw_header_t *rh, const mtar_header_t *h) {
|
||||
unsigned chksum;
|
||||
|
||||
/* Load header into raw header */
|
||||
memset(rh, 0, sizeof(*rh));
|
||||
sprintf(rh->mode, "%o", h->mode);
|
||||
sprintf(rh->owner, "%o", h->owner);
|
||||
sprintf(rh->size, "%o", h->size);
|
||||
sprintf(rh->mtime, "%o", h->mtime);
|
||||
rh->type = h->type ? h->type : MTAR_TREG;
|
||||
strcpy(rh->name, h->name);
|
||||
strcpy(rh->linkname, h->linkname);
|
||||
|
||||
/* Calculate and write checksum */
|
||||
chksum = checksum(rh);
|
||||
sprintf(rh->checksum, "%06o", chksum);
|
||||
rh->checksum[7] = ' ';
|
||||
|
||||
return MTAR_ESUCCESS;
|
||||
}
|
||||
|
||||
|
||||
const char* mtar_strerror(int err) {
|
||||
switch (err) {
|
||||
case MTAR_ESUCCESS : return "success";
|
||||
case MTAR_EFAILURE : return "failure";
|
||||
case MTAR_EOPENFAIL : return "could not open";
|
||||
case MTAR_EREADFAIL : return "could not read";
|
||||
case MTAR_EWRITEFAIL : return "could not write";
|
||||
case MTAR_ESEEKFAIL : return "could not seek";
|
||||
case MTAR_EBADCHKSUM : return "bad checksum";
|
||||
case MTAR_ENULLRECORD : return "null record";
|
||||
case MTAR_ENOTFOUND : return "file not found";
|
||||
}
|
||||
return "unknown error";
|
||||
}
|
||||
|
||||
|
||||
static int file_write(mtar_t *tar, const void *data, size_t size) {
|
||||
size_t res = fwrite(data, 1, size, tar->stream);
|
||||
return (res == size) ? MTAR_ESUCCESS : MTAR_EWRITEFAIL;
|
||||
}
|
||||
|
||||
static int file_read(mtar_t *tar, void *data, size_t size) {
|
||||
size_t res = fread(data, 1, size, tar->stream);
|
||||
return (res == size) ? MTAR_ESUCCESS : MTAR_EREADFAIL;
|
||||
}
|
||||
|
||||
static int file_seek(mtar_t *tar, long offset) {
|
||||
int res = fseek(tar->stream, offset, SEEK_SET);
|
||||
return (res == 0) ? MTAR_ESUCCESS : MTAR_ESEEKFAIL;
|
||||
}
|
||||
|
||||
static int file_close(mtar_t *tar) {
|
||||
fclose(tar->stream);
|
||||
return MTAR_ESUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int mtar_open(mtar_t *tar, const char *filename, const char *mode) {
|
||||
int err;
|
||||
mtar_header_t h;
|
||||
|
||||
/* Init tar struct and functions */
|
||||
memset(tar, 0, sizeof(*tar));
|
||||
tar->write = file_write;
|
||||
tar->read = file_read;
|
||||
tar->seek = file_seek;
|
||||
tar->close = file_close;
|
||||
|
||||
/* Assure mode is always binary */
|
||||
if ( strchr(mode, 'r') ) mode = "rb";
|
||||
if ( strchr(mode, 'w') ) mode = "wb";
|
||||
if ( strchr(mode, 'a') ) mode = "ab";
|
||||
/* Open file */
|
||||
tar->stream = fopen(filename, mode);
|
||||
if (!tar->stream) {
|
||||
return MTAR_EOPENFAIL;
|
||||
}
|
||||
/* Read first header to check it is valid if mode is `r` */
|
||||
if (*mode == 'r') {
|
||||
err = mtar_read_header(tar, &h);
|
||||
if (err != MTAR_ESUCCESS) {
|
||||
mtar_close(tar);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* Return ok */
|
||||
return MTAR_ESUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int mtar_close(mtar_t *tar) {
|
||||
return tar->close(tar);
|
||||
}
|
||||
|
||||
|
||||
int mtar_seek(mtar_t *tar, long pos) {
|
||||
int err = tar->seek(tar, pos);
|
||||
tar->pos = pos;
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int mtar_rewind(mtar_t *tar) {
|
||||
tar->remaining_data = 0;
|
||||
tar->last_header = 0;
|
||||
return mtar_seek(tar, 0);
|
||||
}
|
||||
|
||||
|
||||
int mtar_next(mtar_t *tar) {
|
||||
int err, n;
|
||||
mtar_header_t h;
|
||||
/* Load header */
|
||||
err = mtar_read_header(tar, &h);
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
/* Seek to next record */
|
||||
n = round_up(h.size, 512) + sizeof(mtar_raw_header_t);
|
||||
return mtar_seek(tar, tar->pos + n);
|
||||
}
|
||||
|
||||
|
||||
int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h) {
|
||||
int err;
|
||||
mtar_header_t header;
|
||||
/* Start at beginning */
|
||||
err = mtar_rewind(tar);
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
/* Iterate all files until we hit an error or find the file */
|
||||
while ( (err = mtar_read_header(tar, &header)) == MTAR_ESUCCESS ) {
|
||||
if ( !strcmp(header.name, name) ) {
|
||||
if (h) {
|
||||
*h = header;
|
||||
}
|
||||
return MTAR_ESUCCESS;
|
||||
}
|
||||
mtar_next(tar);
|
||||
}
|
||||
/* Return error */
|
||||
if (err == MTAR_ENULLRECORD) {
|
||||
err = MTAR_ENOTFOUND;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int mtar_read_header(mtar_t *tar, mtar_header_t *h) {
|
||||
int err;
|
||||
mtar_raw_header_t rh;
|
||||
/* Save header position */
|
||||
tar->last_header = tar->pos;
|
||||
/* Read raw header */
|
||||
err = tread(tar, &rh, sizeof(rh));
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
/* Seek back to start of header */
|
||||
err = mtar_seek(tar, tar->last_header);
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
/* Load raw header into header struct and return */
|
||||
return raw_to_header(h, &rh);
|
||||
}
|
||||
|
||||
|
||||
int mtar_read_data(mtar_t *tar, void *ptr, size_t size) {
|
||||
int err;
|
||||
/* If we have no remaining data then this is the first read, we get the size,
|
||||
* set the remaining data and seek to the beginning of the data */
|
||||
if (tar->remaining_data == 0) {
|
||||
mtar_header_t h;
|
||||
/* Read header */
|
||||
err = mtar_read_header(tar, &h);
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
/* Seek past header and init remaining data */
|
||||
err = mtar_seek(tar, tar->pos + sizeof(mtar_raw_header_t));
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
tar->remaining_data = h.size;
|
||||
}
|
||||
/* Read data */
|
||||
err = tread(tar, ptr, size);
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
tar->remaining_data -= size;
|
||||
/* If there is no remaining data we've finished reading and seek back to the
|
||||
* header */
|
||||
if (tar->remaining_data == 0) {
|
||||
return mtar_seek(tar, tar->last_header);
|
||||
}
|
||||
return MTAR_ESUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int mtar_write_header(mtar_t *tar, const mtar_header_t *h) {
|
||||
mtar_raw_header_t rh;
|
||||
/* Build raw header and write */
|
||||
header_to_raw(&rh, h);
|
||||
tar->remaining_data = h->size;
|
||||
return twrite(tar, &rh, sizeof(rh));
|
||||
}
|
||||
|
||||
|
||||
int mtar_write_file_header(mtar_t *tar, const char *name, size_t size) {
|
||||
mtar_header_t h;
|
||||
/* Build header */
|
||||
memset(&h, 0, sizeof(h));
|
||||
strcpy(h.name, name);
|
||||
h.size = size;
|
||||
h.type = MTAR_TREG;
|
||||
h.mode = 0664;
|
||||
/* Write header */
|
||||
return mtar_write_header(tar, &h);
|
||||
}
|
||||
|
||||
|
||||
int mtar_write_dir_header(mtar_t *tar, const char *name) {
|
||||
mtar_header_t h;
|
||||
/* Build header */
|
||||
memset(&h, 0, sizeof(h));
|
||||
strcpy(h.name, name);
|
||||
h.type = MTAR_TDIR;
|
||||
h.mode = 0775;
|
||||
/* Write header */
|
||||
return mtar_write_header(tar, &h);
|
||||
}
|
||||
|
||||
|
||||
int mtar_write_data(mtar_t *tar, const void *data, size_t size) {
|
||||
int err;
|
||||
/* Write data */
|
||||
err = twrite(tar, data, size);
|
||||
if (err) {
|
||||
return err;
|
||||
}
|
||||
tar->remaining_data -= size;
|
||||
/* Write padding if we've written all the data for this file */
|
||||
if (tar->remaining_data == 0) {
|
||||
return write_null_bytes(tar, round_up(tar->pos, 512) - tar->pos);
|
||||
}
|
||||
return MTAR_ESUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int mtar_finalize(mtar_t *tar) {
|
||||
/* Write two NULL records */
|
||||
return write_null_bytes(tar, sizeof(mtar_raw_header_t) * 2);
|
||||
}
|
90
src/microtar.h
Normal file
90
src/microtar.h
Normal file
@ -0,0 +1,90 @@
|
||||
/**
|
||||
* Copyright (c) 2017 rxi
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the MIT license. See `microtar.c` for details.
|
||||
*/
|
||||
|
||||
#ifndef MICROTAR_H
|
||||
#define MICROTAR_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define MTAR_VERSION "0.1.0"
|
||||
|
||||
enum {
|
||||
MTAR_ESUCCESS = 0,
|
||||
MTAR_EFAILURE = -1,
|
||||
MTAR_EOPENFAIL = -2,
|
||||
MTAR_EREADFAIL = -3,
|
||||
MTAR_EWRITEFAIL = -4,
|
||||
MTAR_ESEEKFAIL = -5,
|
||||
MTAR_EBADCHKSUM = -6,
|
||||
MTAR_ENULLRECORD = -7,
|
||||
MTAR_ENOTFOUND = -8
|
||||
};
|
||||
|
||||
enum {
|
||||
MTAR_TREG = '0',
|
||||
MTAR_TLNK = '1',
|
||||
MTAR_TSYM = '2',
|
||||
MTAR_TCHR = '3',
|
||||
MTAR_TBLK = '4',
|
||||
MTAR_TDIR = '5',
|
||||
MTAR_TFIFO = '6'
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
unsigned mode;
|
||||
unsigned owner;
|
||||
unsigned size;
|
||||
unsigned mtime;
|
||||
unsigned type;
|
||||
char name[100];
|
||||
char linkname[100];
|
||||
} mtar_header_t;
|
||||
|
||||
|
||||
typedef struct mtar_t mtar_t;
|
||||
|
||||
struct mtar_t {
|
||||
int (*read)(mtar_t *tar, void *data, size_t size);
|
||||
int (*write)(mtar_t *tar, const void *data, size_t size);
|
||||
int (*seek)(mtar_t *tar, long pos);
|
||||
int (*close)(mtar_t *tar);
|
||||
FILE *stream;
|
||||
size_t pos;
|
||||
size_t remaining_data;
|
||||
size_t last_header;
|
||||
};
|
||||
|
||||
|
||||
const char* mtar_strerror(int err);
|
||||
|
||||
int mtar_open(mtar_t *tar, const char *filename, const char *mode);
|
||||
int mtar_close(mtar_t *tar);
|
||||
|
||||
int mtar_seek(mtar_t *tar, long pos);
|
||||
int mtar_rewind(mtar_t *tar);
|
||||
int mtar_next(mtar_t *tar);
|
||||
int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h);
|
||||
int mtar_read_header(mtar_t *tar, mtar_header_t *h);
|
||||
int mtar_read_data(mtar_t *tar, void *ptr, size_t size);
|
||||
|
||||
int mtar_write_header(mtar_t *tar, const mtar_header_t *h);
|
||||
int mtar_write_file_header(mtar_t *tar, const char *name, size_t size);
|
||||
int mtar_write_dir_header(mtar_t *tar, const char *name);
|
||||
int mtar_write_data(mtar_t *tar, const void *data, size_t size);
|
||||
int mtar_finalize(mtar_t *tar);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
169
src/tarloader.cpp
Normal file
169
src/tarloader.cpp
Normal file
@ -0,0 +1,169 @@
|
||||
#include "tarloader.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#include <kisstype/type.h>
|
||||
#include <eisgenerator/translators.h>
|
||||
#include <iostream>
|
||||
#include <eisgenerator/model.h>
|
||||
#include <eisgenerator/basicmath.h>
|
||||
|
||||
|
||||
TarDataset::TarDataset(const std::filesystem::path& path, int64_t inputSize, std::vector<std::string> selectLabels, std::vector<std::string> extraInputs):
|
||||
inputSize(inputSize), selectLabels(selectLabels), extraInputs(extraInputs), path(path)
|
||||
{
|
||||
int ret = mtar_open(&tar, path.c_str(), "r");
|
||||
if(ret)
|
||||
{
|
||||
std::cerr<<"Unable to open tar at "<<path<<'\n';
|
||||
return;
|
||||
}
|
||||
|
||||
mtar_header_t header;
|
||||
while((mtar_read_header(&tar, &header)) != MTAR_ENULLRECORD)
|
||||
{
|
||||
if(header.type == MTAR_TREG)
|
||||
{
|
||||
std::filesystem::path path = header.name;
|
||||
size_t pos = tar.pos;
|
||||
eis::Spectra spectra = loadSpectraAtCurrentPos(header.size);
|
||||
|
||||
bool skip = false;
|
||||
for(const std::string& key : selectLabels)
|
||||
{
|
||||
if(!spectra.hasLabel(key))
|
||||
{
|
||||
std::cout<<"Dsicarding as it is missing: "<<key<<'\n';
|
||||
skip = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for(const std::string& key : extraInputs)
|
||||
{
|
||||
if(!spectra.hasLabel(key))
|
||||
{
|
||||
std::cout<<"Dsicarding as it is missing: "<<key<<'\n';
|
||||
skip = true;
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if(!skip)
|
||||
{
|
||||
eis::purgeEisParamBrackets(spectra.model);
|
||||
eis::Model::removeSeriesResitance(spectra.model);
|
||||
|
||||
if(spectra.model.length() < 2 && spectra.model != "r" && spectra.model != "c" && spectra.model != "w" && spectra.model != "p" && spectra.model != "l")
|
||||
spectra.model = "Union";
|
||||
|
||||
auto search = std::find(modelStrs.begin(), modelStrs.end(), spectra.model);
|
||||
size_t index;
|
||||
if(search == modelStrs.end())
|
||||
{
|
||||
index = modelStrs.size();
|
||||
modelStrs.push_back(spectra.model);
|
||||
}
|
||||
else
|
||||
{
|
||||
index = search - modelStrs.begin();
|
||||
}
|
||||
files.push_back({.path = path, .classNum = index, .pos = pos, .size = header.size});
|
||||
}
|
||||
}
|
||||
mtar_next(&tar);
|
||||
}
|
||||
if(files.size() < 20)
|
||||
std::cout<<"found few valid files in "<<path<<'\n';
|
||||
}
|
||||
|
||||
eis::Spectra TarDataset::loadSpectraAtCurrentPos(size_t size)
|
||||
{
|
||||
char* filebuffer = new char[size+1];
|
||||
filebuffer[size] = '\0';
|
||||
int ret = mtar_read_data(&tar, filebuffer, size);
|
||||
if(ret != 0)
|
||||
{
|
||||
std::cerr<<"Unable to read from tar archive\n";
|
||||
assert(ret == 0);
|
||||
}
|
||||
std::stringstream ss(filebuffer);
|
||||
|
||||
eis::Spectra spectra = eis::Spectra::loadFromStream(ss);
|
||||
delete[] filebuffer;
|
||||
|
||||
return spectra;
|
||||
}
|
||||
|
||||
TarDataset::TarDataset(const TarDataset& in)
|
||||
{
|
||||
operator=(in);
|
||||
}
|
||||
|
||||
TarDataset& TarDataset::operator=(const TarDataset& in)
|
||||
{
|
||||
files = in.files;
|
||||
inputSize = in.inputSize;
|
||||
modelStrs = in.modelStrs;
|
||||
selectLabels = in.selectLabels;
|
||||
extraInputs = in.extraInputs;
|
||||
path = in.path;
|
||||
int ret = mtar_open(&tar, path.c_str(), "r");
|
||||
if(ret != 0)
|
||||
{
|
||||
std::cerr<<"Unable to reopen tar file at "<<path<<'\n';
|
||||
assert(ret == 0);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
TarDataset::~TarDataset()
|
||||
{
|
||||
mtar_close(&tar);
|
||||
}
|
||||
|
||||
eis::Spectra TarDataset::getImpl(size_t index)
|
||||
{
|
||||
if(files.size() < index)
|
||||
{
|
||||
std::cerr<<"index "<<index<<" out of range in "<<__func__<<'\n';
|
||||
assert(false);
|
||||
return {};
|
||||
}
|
||||
|
||||
mtar_seek(&tar, files[index].pos);
|
||||
eis::Spectra spectra = loadSpectraAtCurrentPos(files[index].size);
|
||||
|
||||
spectra.data = eis::rescale(spectra.data, inputSize/2);
|
||||
|
||||
if(!selectLabels.empty() || !extraInputs.empty())
|
||||
{
|
||||
eis::Spectra copy = spectra;
|
||||
spectra.labelNames.clear();
|
||||
spectra.labels.clear();
|
||||
for(const std::string& key : selectLabels)
|
||||
spectra.addLabel(key, copy.getLabel(key));
|
||||
for(const std::string& key : extraInputs)
|
||||
spectra.addLabel("exip_" + key, copy.getLabel(key));
|
||||
}
|
||||
|
||||
return spectra;
|
||||
}
|
||||
|
||||
size_t TarDataset::classForIndex(size_t index)
|
||||
{
|
||||
return files[index].classNum;
|
||||
}
|
||||
|
||||
size_t TarDataset::size() const
|
||||
{
|
||||
return files.size();
|
||||
}
|
||||
|
||||
std::string TarDataset::modelStringForClass(size_t classNum)
|
||||
{
|
||||
if(classNum >= modelStrs.size())
|
||||
return "invalid";
|
||||
else
|
||||
return *std::next(modelStrs.begin(), classNum);
|
||||
}
|
47
src/tarloader.h
Normal file
47
src/tarloader.h
Normal file
@ -0,0 +1,47 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <filesystem>
|
||||
#include <kisstype/spectra.h>
|
||||
|
||||
#include "eisdataset.h"
|
||||
#include "microtar.h"
|
||||
|
||||
|
||||
class TarDataset : public EisDataset
|
||||
{
|
||||
private:
|
||||
|
||||
mtar_t tar;
|
||||
|
||||
struct File
|
||||
{
|
||||
std::filesystem::path path;
|
||||
size_t classNum;
|
||||
size_t pos;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
std::vector<TarDataset::File> files;
|
||||
size_t inputSize;
|
||||
std::vector<std::string> modelStrs;
|
||||
std::vector<std::string> selectLabels;
|
||||
std::vector<std::string> extraInputs;
|
||||
std::filesystem::path path;
|
||||
|
||||
virtual eis::Spectra getImpl(size_t index) override;
|
||||
eis::Spectra loadSpectraAtCurrentPos(size_t size);
|
||||
|
||||
public:
|
||||
explicit TarDataset(const std::filesystem::path& path, int64_t inputSize = 100, std::vector<std::string> selectLabels = {}, std::vector<std::string> extraInputs = {});
|
||||
TarDataset(const TarDataset& in);
|
||||
TarDataset& operator=(const TarDataset& in);
|
||||
~TarDataset();
|
||||
|
||||
virtual size_t size() const override;
|
||||
|
||||
virtual size_t classForIndex(size_t index) override;
|
||||
virtual std::string modelStringForClass(size_t classNum) override;
|
||||
};
|
Loading…
x
Reference in New Issue
Block a user