From e43a235c1251a53ad0d1f9b4df72b7c32ac5f4c2 Mon Sep 17 00:00:00 2001 From: uvos Date: Tue, 29 Oct 2024 15:28:58 +0100 Subject: [PATCH] add eistomatlabarray --- CMakeLists.txt | 7 +- src/eisdataset.cpp | 7 + src/eisdataset.h | 18 ++ src/eistomatlabarray.cpp | 77 ++++++++ src/microtar.c | 376 +++++++++++++++++++++++++++++++++++++++ src/microtar.h | 90 ++++++++++ src/tarloader.cpp | 169 ++++++++++++++++++ src/tarloader.h | 47 +++++ 8 files changed, 790 insertions(+), 1 deletion(-) create mode 100644 src/eisdataset.cpp create mode 100644 src/eisdataset.h create mode 100644 src/eistomatlabarray.cpp create mode 100644 src/microtar.c create mode 100644 src/microtar.h create mode 100644 src/tarloader.cpp create mode 100644 src/tarloader.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 5436cca..5ba557f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,10 +44,15 @@ set_property(TARGET rlxpassfail PROPERTY CXX_STANDARD 17) install(TARGETS rlxpassfail RUNTIME DESTINATION bin) add_executable(matlabarraytoeis ${COMMON_SRC_FILES} src/matlabarraytoeis.cpp) -target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES} -lrelaxisloader) +target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES}) set_property(TARGET matlabarraytoeis PROPERTY CXX_STANDARD 17) install(TARGETS matlabarraytoeis RUNTIME DESTINATION bin) +add_executable(eistomatlabarray ${COMMON_SRC_FILES} src/eistomatlabarray.cpp src/microtar.c src/tarloader.cpp src/eisdataset.cpp) +target_link_libraries(eistomatlabarray ${COMMON_LINK_LIBRARIES}) +set_property(TARGET eistomatlabarray PROPERTY CXX_STANDARD 17) +install(TARGETS eistomatlabarray RUNTIME DESTINATION bin) + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") diff --git a/src/eisdataset.cpp b/src/eisdataset.cpp new file mode 100644 index 0000000..829887e --- /dev/null +++ b/src/eisdataset.cpp @@ -0,0 +1,7 @@ +#include "eisdataset.h" + +eis::Spectra EisDataset::get(size_t index) +{ + eis::Spectra data = getImpl(index); + return data; +} diff --git a/src/eisdataset.h b/src/eisdataset.h new file mode 100644 index 0000000..65f37bc --- /dev/null +++ b/src/eisdataset.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include +#include + +class EisDataset +{ +private: + virtual eis::Spectra getImpl(size_t index) = 0; + +public: + eis::Spectra get(size_t index); + virtual size_t size() const = 0; + virtual size_t classForIndex(size_t index) = 0; + virtual std::string modelStringForClass(size_t classNum) {return std::string("Unkown");} + virtual std::string getDescription() {return "";}; + virtual ~EisDataset(){} +}; diff --git a/src/eistomatlabarray.cpp b/src/eistomatlabarray.cpp new file mode 100644 index 0000000..f028ad4 --- /dev/null +++ b/src/eistomatlabarray.cpp @@ -0,0 +1,77 @@ +#include +#include +#include +#include "tarloader.h" +#include "common.h" + +std::string dataToTsv(const std::vector& data) +{ + std::stringstream out; + out< labelFiles; + for(const std::string& name : spectra.labelNames) + { + labelFiles.push_back(std::fstream(outDir/(name+".txt"), std::ios_base::out)); + if(!labelFiles.back().is_open()) + { + std::cerr<<"Unable to open "< +#include +#include +#include + +#include "microtar.h" + +typedef struct { + char name[100]; + char mode[8]; + char owner[8]; + char group[8]; + char size[12]; + char mtime[12]; + char checksum[8]; + char type; + char linkname[100]; + char _padding[255]; +} mtar_raw_header_t; + + +static unsigned round_up(unsigned n, unsigned incr) { + return n + (incr - n % incr) % incr; +} + + +static unsigned checksum(const mtar_raw_header_t* rh) { + unsigned i; + unsigned char *p = (unsigned char*) rh; + unsigned res = 256; + for (i = 0; i < offsetof(mtar_raw_header_t, checksum); i++) { + res += p[i]; + } + for (i = offsetof(mtar_raw_header_t, type); i < sizeof(*rh); i++) { + res += p[i]; + } + return res; +} + + +static int tread(mtar_t *tar, void *data, size_t size) { + int err = tar->read(tar, data, size); + tar->pos += size; + return err; +} + + +static int twrite(mtar_t *tar, const void *data, size_t size) { + int err = tar->write(tar, data, size); + tar->pos += size; + return err; +} + + +static int write_null_bytes(mtar_t *tar, int n) { + int i, err; + char nul = '\0'; + for (i = 0; i < n; i++) { + err = twrite(tar, &nul, 1); + if (err) { + return err; + } + } + return MTAR_ESUCCESS; +} + + +static int raw_to_header(mtar_header_t *h, const mtar_raw_header_t *rh) { + unsigned chksum1, chksum2; + + /* If the checksum starts with a null byte we assume the record is NULL */ + if (*rh->checksum == '\0') { + return MTAR_ENULLRECORD; + } + + /* Build and compare checksum */ + chksum1 = checksum(rh); + sscanf(rh->checksum, "%o", &chksum2); + if (chksum1 != chksum2) { + return MTAR_EBADCHKSUM; + } + + /* Load raw header into header */ + sscanf(rh->mode, "%o", &h->mode); + sscanf(rh->owner, "%o", &h->owner); + sscanf(rh->size, "%o", &h->size); + sscanf(rh->mtime, "%o", &h->mtime); + h->type = rh->type; + strcpy(h->name, rh->name); + strcpy(h->linkname, rh->linkname); + + return MTAR_ESUCCESS; +} + + +static int header_to_raw(mtar_raw_header_t *rh, const mtar_header_t *h) { + unsigned chksum; + + /* Load header into raw header */ + memset(rh, 0, sizeof(*rh)); + sprintf(rh->mode, "%o", h->mode); + sprintf(rh->owner, "%o", h->owner); + sprintf(rh->size, "%o", h->size); + sprintf(rh->mtime, "%o", h->mtime); + rh->type = h->type ? h->type : MTAR_TREG; + strcpy(rh->name, h->name); + strcpy(rh->linkname, h->linkname); + + /* Calculate and write checksum */ + chksum = checksum(rh); + sprintf(rh->checksum, "%06o", chksum); + rh->checksum[7] = ' '; + + return MTAR_ESUCCESS; +} + + +const char* mtar_strerror(int err) { + switch (err) { + case MTAR_ESUCCESS : return "success"; + case MTAR_EFAILURE : return "failure"; + case MTAR_EOPENFAIL : return "could not open"; + case MTAR_EREADFAIL : return "could not read"; + case MTAR_EWRITEFAIL : return "could not write"; + case MTAR_ESEEKFAIL : return "could not seek"; + case MTAR_EBADCHKSUM : return "bad checksum"; + case MTAR_ENULLRECORD : return "null record"; + case MTAR_ENOTFOUND : return "file not found"; + } + return "unknown error"; +} + + +static int file_write(mtar_t *tar, const void *data, size_t size) { + size_t res = fwrite(data, 1, size, tar->stream); + return (res == size) ? MTAR_ESUCCESS : MTAR_EWRITEFAIL; +} + +static int file_read(mtar_t *tar, void *data, size_t size) { + size_t res = fread(data, 1, size, tar->stream); + return (res == size) ? MTAR_ESUCCESS : MTAR_EREADFAIL; +} + +static int file_seek(mtar_t *tar, long offset) { + int res = fseek(tar->stream, offset, SEEK_SET); + return (res == 0) ? MTAR_ESUCCESS : MTAR_ESEEKFAIL; +} + +static int file_close(mtar_t *tar) { + fclose(tar->stream); + return MTAR_ESUCCESS; +} + + +int mtar_open(mtar_t *tar, const char *filename, const char *mode) { + int err; + mtar_header_t h; + + /* Init tar struct and functions */ + memset(tar, 0, sizeof(*tar)); + tar->write = file_write; + tar->read = file_read; + tar->seek = file_seek; + tar->close = file_close; + + /* Assure mode is always binary */ + if ( strchr(mode, 'r') ) mode = "rb"; + if ( strchr(mode, 'w') ) mode = "wb"; + if ( strchr(mode, 'a') ) mode = "ab"; + /* Open file */ + tar->stream = fopen(filename, mode); + if (!tar->stream) { + return MTAR_EOPENFAIL; + } + /* Read first header to check it is valid if mode is `r` */ + if (*mode == 'r') { + err = mtar_read_header(tar, &h); + if (err != MTAR_ESUCCESS) { + mtar_close(tar); + return err; + } + } + + /* Return ok */ + return MTAR_ESUCCESS; +} + + +int mtar_close(mtar_t *tar) { + return tar->close(tar); +} + + +int mtar_seek(mtar_t *tar, long pos) { + int err = tar->seek(tar, pos); + tar->pos = pos; + return err; +} + + +int mtar_rewind(mtar_t *tar) { + tar->remaining_data = 0; + tar->last_header = 0; + return mtar_seek(tar, 0); +} + + +int mtar_next(mtar_t *tar) { + int err, n; + mtar_header_t h; + /* Load header */ + err = mtar_read_header(tar, &h); + if (err) { + return err; + } + /* Seek to next record */ + n = round_up(h.size, 512) + sizeof(mtar_raw_header_t); + return mtar_seek(tar, tar->pos + n); +} + + +int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h) { + int err; + mtar_header_t header; + /* Start at beginning */ + err = mtar_rewind(tar); + if (err) { + return err; + } + /* Iterate all files until we hit an error or find the file */ + while ( (err = mtar_read_header(tar, &header)) == MTAR_ESUCCESS ) { + if ( !strcmp(header.name, name) ) { + if (h) { + *h = header; + } + return MTAR_ESUCCESS; + } + mtar_next(tar); + } + /* Return error */ + if (err == MTAR_ENULLRECORD) { + err = MTAR_ENOTFOUND; + } + return err; +} + + +int mtar_read_header(mtar_t *tar, mtar_header_t *h) { + int err; + mtar_raw_header_t rh; + /* Save header position */ + tar->last_header = tar->pos; + /* Read raw header */ + err = tread(tar, &rh, sizeof(rh)); + if (err) { + return err; + } + /* Seek back to start of header */ + err = mtar_seek(tar, tar->last_header); + if (err) { + return err; + } + /* Load raw header into header struct and return */ + return raw_to_header(h, &rh); +} + + +int mtar_read_data(mtar_t *tar, void *ptr, size_t size) { + int err; + /* If we have no remaining data then this is the first read, we get the size, + * set the remaining data and seek to the beginning of the data */ + if (tar->remaining_data == 0) { + mtar_header_t h; + /* Read header */ + err = mtar_read_header(tar, &h); + if (err) { + return err; + } + /* Seek past header and init remaining data */ + err = mtar_seek(tar, tar->pos + sizeof(mtar_raw_header_t)); + if (err) { + return err; + } + tar->remaining_data = h.size; + } + /* Read data */ + err = tread(tar, ptr, size); + if (err) { + return err; + } + tar->remaining_data -= size; + /* If there is no remaining data we've finished reading and seek back to the + * header */ + if (tar->remaining_data == 0) { + return mtar_seek(tar, tar->last_header); + } + return MTAR_ESUCCESS; +} + + +int mtar_write_header(mtar_t *tar, const mtar_header_t *h) { + mtar_raw_header_t rh; + /* Build raw header and write */ + header_to_raw(&rh, h); + tar->remaining_data = h->size; + return twrite(tar, &rh, sizeof(rh)); +} + + +int mtar_write_file_header(mtar_t *tar, const char *name, size_t size) { + mtar_header_t h; + /* Build header */ + memset(&h, 0, sizeof(h)); + strcpy(h.name, name); + h.size = size; + h.type = MTAR_TREG; + h.mode = 0664; + /* Write header */ + return mtar_write_header(tar, &h); +} + + +int mtar_write_dir_header(mtar_t *tar, const char *name) { + mtar_header_t h; + /* Build header */ + memset(&h, 0, sizeof(h)); + strcpy(h.name, name); + h.type = MTAR_TDIR; + h.mode = 0775; + /* Write header */ + return mtar_write_header(tar, &h); +} + + +int mtar_write_data(mtar_t *tar, const void *data, size_t size) { + int err; + /* Write data */ + err = twrite(tar, data, size); + if (err) { + return err; + } + tar->remaining_data -= size; + /* Write padding if we've written all the data for this file */ + if (tar->remaining_data == 0) { + return write_null_bytes(tar, round_up(tar->pos, 512) - tar->pos); + } + return MTAR_ESUCCESS; +} + + +int mtar_finalize(mtar_t *tar) { + /* Write two NULL records */ + return write_null_bytes(tar, sizeof(mtar_raw_header_t) * 2); +} diff --git a/src/microtar.h b/src/microtar.h new file mode 100644 index 0000000..b23989a --- /dev/null +++ b/src/microtar.h @@ -0,0 +1,90 @@ +/** + * Copyright (c) 2017 rxi + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of the MIT license. See `microtar.c` for details. + */ + +#ifndef MICROTAR_H +#define MICROTAR_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include +#include + +#define MTAR_VERSION "0.1.0" + +enum { + MTAR_ESUCCESS = 0, + MTAR_EFAILURE = -1, + MTAR_EOPENFAIL = -2, + MTAR_EREADFAIL = -3, + MTAR_EWRITEFAIL = -4, + MTAR_ESEEKFAIL = -5, + MTAR_EBADCHKSUM = -6, + MTAR_ENULLRECORD = -7, + MTAR_ENOTFOUND = -8 +}; + +enum { + MTAR_TREG = '0', + MTAR_TLNK = '1', + MTAR_TSYM = '2', + MTAR_TCHR = '3', + MTAR_TBLK = '4', + MTAR_TDIR = '5', + MTAR_TFIFO = '6' +}; + +typedef struct { + unsigned mode; + unsigned owner; + unsigned size; + unsigned mtime; + unsigned type; + char name[100]; + char linkname[100]; +} mtar_header_t; + + +typedef struct mtar_t mtar_t; + +struct mtar_t { + int (*read)(mtar_t *tar, void *data, size_t size); + int (*write)(mtar_t *tar, const void *data, size_t size); + int (*seek)(mtar_t *tar, long pos); + int (*close)(mtar_t *tar); + FILE *stream; + size_t pos; + size_t remaining_data; + size_t last_header; +}; + + +const char* mtar_strerror(int err); + +int mtar_open(mtar_t *tar, const char *filename, const char *mode); +int mtar_close(mtar_t *tar); + +int mtar_seek(mtar_t *tar, long pos); +int mtar_rewind(mtar_t *tar); +int mtar_next(mtar_t *tar); +int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h); +int mtar_read_header(mtar_t *tar, mtar_header_t *h); +int mtar_read_data(mtar_t *tar, void *ptr, size_t size); + +int mtar_write_header(mtar_t *tar, const mtar_header_t *h); +int mtar_write_file_header(mtar_t *tar, const char *name, size_t size); +int mtar_write_dir_header(mtar_t *tar, const char *name); +int mtar_write_data(mtar_t *tar, const void *data, size_t size); +int mtar_finalize(mtar_t *tar); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/tarloader.cpp b/src/tarloader.cpp new file mode 100644 index 0000000..6fa45fc --- /dev/null +++ b/src/tarloader.cpp @@ -0,0 +1,169 @@ +#include "tarloader.h" + +#include +#include +#include +#include +#include +#include +#include + + +TarDataset::TarDataset(const std::filesystem::path& path, int64_t inputSize, std::vector selectLabels, std::vector extraInputs): +inputSize(inputSize), selectLabels(selectLabels), extraInputs(extraInputs), path(path) +{ + int ret = mtar_open(&tar, path.c_str(), "r"); + if(ret) + { + std::cerr<<"Unable to open tar at "<= modelStrs.size()) + return "invalid"; + else + return *std::next(modelStrs.begin(), classNum); +} diff --git a/src/tarloader.h b/src/tarloader.h new file mode 100644 index 0000000..9e17a8f --- /dev/null +++ b/src/tarloader.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "eisdataset.h" +#include "microtar.h" + + +class TarDataset : public EisDataset +{ +private: + + mtar_t tar; + + struct File + { + std::filesystem::path path; + size_t classNum; + size_t pos; + size_t size; + }; + + std::vector files; + size_t inputSize; + std::vector modelStrs; + std::vector selectLabels; + std::vector extraInputs; + std::filesystem::path path; + + virtual eis::Spectra getImpl(size_t index) override; + eis::Spectra loadSpectraAtCurrentPos(size_t size); + +public: + explicit TarDataset(const std::filesystem::path& path, int64_t inputSize = 100, std::vector selectLabels = {}, std::vector extraInputs = {}); + TarDataset(const TarDataset& in); + TarDataset& operator=(const TarDataset& in); + ~TarDataset(); + + virtual size_t size() const override; + + virtual size_t classForIndex(size_t index) override; + virtual std::string modelStringForClass(size_t classNum) override; +};