diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ba557f..5436cca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,15 +44,10 @@ set_property(TARGET rlxpassfail PROPERTY CXX_STANDARD 17) install(TARGETS rlxpassfail RUNTIME DESTINATION bin) add_executable(matlabarraytoeis ${COMMON_SRC_FILES} src/matlabarraytoeis.cpp) -target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES}) +target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES} -lrelaxisloader) set_property(TARGET matlabarraytoeis PROPERTY CXX_STANDARD 17) install(TARGETS matlabarraytoeis RUNTIME DESTINATION bin) -add_executable(eistomatlabarray ${COMMON_SRC_FILES} src/eistomatlabarray.cpp src/microtar.c src/tarloader.cpp src/eisdataset.cpp) -target_link_libraries(eistomatlabarray ${COMMON_LINK_LIBRARIES}) -set_property(TARGET eistomatlabarray PROPERTY CXX_STANDARD 17) -install(TARGETS eistomatlabarray RUNTIME DESTINATION bin) - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") diff --git a/README.md b/README.md new file mode 100644 index 0000000..fda3df6 --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# KISS EIS Dataformaters + +Dataformaters contains misc. data formating utilites that are used to transfer file types used by eisgenerator/eistype into forign formats + +## Requirements + +* You must be running unix or a unix-like os such as linux +* A c++17 compiler like gcc +* cmake +* libeisgenerator +* libkisstype +* git + +## Build and instructions + +``` +$ git clone REPO_URL dataformaters +$ cd dataformaters +$ mkdir build +$ cd build +$ cmake .. +$ make +$ make install +``` diff --git a/src/eisdataset.cpp b/src/eisdataset.cpp deleted file mode 100644 index 829887e..0000000 --- a/src/eisdataset.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "eisdataset.h" - -eis::Spectra EisDataset::get(size_t index) -{ - eis::Spectra data = getImpl(index); - return data; -} diff --git a/src/eisdataset.h b/src/eisdataset.h deleted file mode 100644 index 65f37bc..0000000 --- a/src/eisdataset.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once -#include -#include -#include - -class EisDataset -{ -private: - virtual eis::Spectra getImpl(size_t index) = 0; - -public: - eis::Spectra get(size_t index); - virtual size_t size() const = 0; - virtual size_t classForIndex(size_t index) = 0; - virtual std::string modelStringForClass(size_t classNum) {return std::string("Unkown");} - virtual std::string getDescription() {return "";}; - virtual ~EisDataset(){} -}; diff --git a/src/eistomatlabarray.cpp b/src/eistomatlabarray.cpp deleted file mode 100644 index f028ad4..0000000 --- a/src/eistomatlabarray.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include -#include -#include -#include "tarloader.h" -#include "common.h" - -std::string dataToTsv(const std::vector& data) -{ - std::stringstream out; - out< labelFiles; - for(const std::string& name : spectra.labelNames) - { - labelFiles.push_back(std::fstream(outDir/(name+".txt"), std::ios_base::out)); - if(!labelFiles.back().is_open()) - { - std::cerr<<"Unable to open "< data = parseLine(eisLine); diff --git a/src/microtar.c b/src/microtar.c deleted file mode 100644 index 8904762..0000000 --- a/src/microtar.c +++ /dev/null @@ -1,376 +0,0 @@ -/* - * Copyright (c) 2017 rxi - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include -#include -#include -#include - -#include "microtar.h" - -typedef struct { - char name[100]; - char mode[8]; - char owner[8]; - char group[8]; - char size[12]; - char mtime[12]; - char checksum[8]; - char type; - char linkname[100]; - char _padding[255]; -} mtar_raw_header_t; - - -static unsigned round_up(unsigned n, unsigned incr) { - return n + (incr - n % incr) % incr; -} - - -static unsigned checksum(const mtar_raw_header_t* rh) { - unsigned i; - unsigned char *p = (unsigned char*) rh; - unsigned res = 256; - for (i = 0; i < offsetof(mtar_raw_header_t, checksum); i++) { - res += p[i]; - } - for (i = offsetof(mtar_raw_header_t, type); i < sizeof(*rh); i++) { - res += p[i]; - } - return res; -} - - -static int tread(mtar_t *tar, void *data, size_t size) { - int err = tar->read(tar, data, size); - tar->pos += size; - return err; -} - - -static int twrite(mtar_t *tar, const void *data, size_t size) { - int err = tar->write(tar, data, size); - tar->pos += size; - return err; -} - - -static int write_null_bytes(mtar_t *tar, int n) { - int i, err; - char nul = '\0'; - for (i = 0; i < n; i++) { - err = twrite(tar, &nul, 1); - if (err) { - return err; - } - } - return MTAR_ESUCCESS; -} - - -static int raw_to_header(mtar_header_t *h, const mtar_raw_header_t *rh) { - unsigned chksum1, chksum2; - - /* If the checksum starts with a null byte we assume the record is NULL */ - if (*rh->checksum == '\0') { - return MTAR_ENULLRECORD; - } - - /* Build and compare checksum */ - chksum1 = checksum(rh); - sscanf(rh->checksum, "%o", &chksum2); - if (chksum1 != chksum2) { - return MTAR_EBADCHKSUM; - } - - /* Load raw header into header */ - sscanf(rh->mode, "%o", &h->mode); - sscanf(rh->owner, "%o", &h->owner); - sscanf(rh->size, "%o", &h->size); - sscanf(rh->mtime, "%o", &h->mtime); - h->type = rh->type; - strcpy(h->name, rh->name); - strcpy(h->linkname, rh->linkname); - - return MTAR_ESUCCESS; -} - - -static int header_to_raw(mtar_raw_header_t *rh, const mtar_header_t *h) { - unsigned chksum; - - /* Load header into raw header */ - memset(rh, 0, sizeof(*rh)); - sprintf(rh->mode, "%o", h->mode); - sprintf(rh->owner, "%o", h->owner); - sprintf(rh->size, "%o", h->size); - sprintf(rh->mtime, "%o", h->mtime); - rh->type = h->type ? h->type : MTAR_TREG; - strcpy(rh->name, h->name); - strcpy(rh->linkname, h->linkname); - - /* Calculate and write checksum */ - chksum = checksum(rh); - sprintf(rh->checksum, "%06o", chksum); - rh->checksum[7] = ' '; - - return MTAR_ESUCCESS; -} - - -const char* mtar_strerror(int err) { - switch (err) { - case MTAR_ESUCCESS : return "success"; - case MTAR_EFAILURE : return "failure"; - case MTAR_EOPENFAIL : return "could not open"; - case MTAR_EREADFAIL : return "could not read"; - case MTAR_EWRITEFAIL : return "could not write"; - case MTAR_ESEEKFAIL : return "could not seek"; - case MTAR_EBADCHKSUM : return "bad checksum"; - case MTAR_ENULLRECORD : return "null record"; - case MTAR_ENOTFOUND : return "file not found"; - } - return "unknown error"; -} - - -static int file_write(mtar_t *tar, const void *data, size_t size) { - size_t res = fwrite(data, 1, size, tar->stream); - return (res == size) ? MTAR_ESUCCESS : MTAR_EWRITEFAIL; -} - -static int file_read(mtar_t *tar, void *data, size_t size) { - size_t res = fread(data, 1, size, tar->stream); - return (res == size) ? MTAR_ESUCCESS : MTAR_EREADFAIL; -} - -static int file_seek(mtar_t *tar, long offset) { - int res = fseek(tar->stream, offset, SEEK_SET); - return (res == 0) ? MTAR_ESUCCESS : MTAR_ESEEKFAIL; -} - -static int file_close(mtar_t *tar) { - fclose(tar->stream); - return MTAR_ESUCCESS; -} - - -int mtar_open(mtar_t *tar, const char *filename, const char *mode) { - int err; - mtar_header_t h; - - /* Init tar struct and functions */ - memset(tar, 0, sizeof(*tar)); - tar->write = file_write; - tar->read = file_read; - tar->seek = file_seek; - tar->close = file_close; - - /* Assure mode is always binary */ - if ( strchr(mode, 'r') ) mode = "rb"; - if ( strchr(mode, 'w') ) mode = "wb"; - if ( strchr(mode, 'a') ) mode = "ab"; - /* Open file */ - tar->stream = fopen(filename, mode); - if (!tar->stream) { - return MTAR_EOPENFAIL; - } - /* Read first header to check it is valid if mode is `r` */ - if (*mode == 'r') { - err = mtar_read_header(tar, &h); - if (err != MTAR_ESUCCESS) { - mtar_close(tar); - return err; - } - } - - /* Return ok */ - return MTAR_ESUCCESS; -} - - -int mtar_close(mtar_t *tar) { - return tar->close(tar); -} - - -int mtar_seek(mtar_t *tar, long pos) { - int err = tar->seek(tar, pos); - tar->pos = pos; - return err; -} - - -int mtar_rewind(mtar_t *tar) { - tar->remaining_data = 0; - tar->last_header = 0; - return mtar_seek(tar, 0); -} - - -int mtar_next(mtar_t *tar) { - int err, n; - mtar_header_t h; - /* Load header */ - err = mtar_read_header(tar, &h); - if (err) { - return err; - } - /* Seek to next record */ - n = round_up(h.size, 512) + sizeof(mtar_raw_header_t); - return mtar_seek(tar, tar->pos + n); -} - - -int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h) { - int err; - mtar_header_t header; - /* Start at beginning */ - err = mtar_rewind(tar); - if (err) { - return err; - } - /* Iterate all files until we hit an error or find the file */ - while ( (err = mtar_read_header(tar, &header)) == MTAR_ESUCCESS ) { - if ( !strcmp(header.name, name) ) { - if (h) { - *h = header; - } - return MTAR_ESUCCESS; - } - mtar_next(tar); - } - /* Return error */ - if (err == MTAR_ENULLRECORD) { - err = MTAR_ENOTFOUND; - } - return err; -} - - -int mtar_read_header(mtar_t *tar, mtar_header_t *h) { - int err; - mtar_raw_header_t rh; - /* Save header position */ - tar->last_header = tar->pos; - /* Read raw header */ - err = tread(tar, &rh, sizeof(rh)); - if (err) { - return err; - } - /* Seek back to start of header */ - err = mtar_seek(tar, tar->last_header); - if (err) { - return err; - } - /* Load raw header into header struct and return */ - return raw_to_header(h, &rh); -} - - -int mtar_read_data(mtar_t *tar, void *ptr, size_t size) { - int err; - /* If we have no remaining data then this is the first read, we get the size, - * set the remaining data and seek to the beginning of the data */ - if (tar->remaining_data == 0) { - mtar_header_t h; - /* Read header */ - err = mtar_read_header(tar, &h); - if (err) { - return err; - } - /* Seek past header and init remaining data */ - err = mtar_seek(tar, tar->pos + sizeof(mtar_raw_header_t)); - if (err) { - return err; - } - tar->remaining_data = h.size; - } - /* Read data */ - err = tread(tar, ptr, size); - if (err) { - return err; - } - tar->remaining_data -= size; - /* If there is no remaining data we've finished reading and seek back to the - * header */ - if (tar->remaining_data == 0) { - return mtar_seek(tar, tar->last_header); - } - return MTAR_ESUCCESS; -} - - -int mtar_write_header(mtar_t *tar, const mtar_header_t *h) { - mtar_raw_header_t rh; - /* Build raw header and write */ - header_to_raw(&rh, h); - tar->remaining_data = h->size; - return twrite(tar, &rh, sizeof(rh)); -} - - -int mtar_write_file_header(mtar_t *tar, const char *name, size_t size) { - mtar_header_t h; - /* Build header */ - memset(&h, 0, sizeof(h)); - strcpy(h.name, name); - h.size = size; - h.type = MTAR_TREG; - h.mode = 0664; - /* Write header */ - return mtar_write_header(tar, &h); -} - - -int mtar_write_dir_header(mtar_t *tar, const char *name) { - mtar_header_t h; - /* Build header */ - memset(&h, 0, sizeof(h)); - strcpy(h.name, name); - h.type = MTAR_TDIR; - h.mode = 0775; - /* Write header */ - return mtar_write_header(tar, &h); -} - - -int mtar_write_data(mtar_t *tar, const void *data, size_t size) { - int err; - /* Write data */ - err = twrite(tar, data, size); - if (err) { - return err; - } - tar->remaining_data -= size; - /* Write padding if we've written all the data for this file */ - if (tar->remaining_data == 0) { - return write_null_bytes(tar, round_up(tar->pos, 512) - tar->pos); - } - return MTAR_ESUCCESS; -} - - -int mtar_finalize(mtar_t *tar) { - /* Write two NULL records */ - return write_null_bytes(tar, sizeof(mtar_raw_header_t) * 2); -} diff --git a/src/microtar.h b/src/microtar.h deleted file mode 100644 index b23989a..0000000 --- a/src/microtar.h +++ /dev/null @@ -1,90 +0,0 @@ -/** - * Copyright (c) 2017 rxi - * - * This library is free software; you can redistribute it and/or modify it - * under the terms of the MIT license. See `microtar.c` for details. - */ - -#ifndef MICROTAR_H -#define MICROTAR_H - -#ifdef __cplusplus -extern "C" -{ -#endif - -#include -#include - -#define MTAR_VERSION "0.1.0" - -enum { - MTAR_ESUCCESS = 0, - MTAR_EFAILURE = -1, - MTAR_EOPENFAIL = -2, - MTAR_EREADFAIL = -3, - MTAR_EWRITEFAIL = -4, - MTAR_ESEEKFAIL = -5, - MTAR_EBADCHKSUM = -6, - MTAR_ENULLRECORD = -7, - MTAR_ENOTFOUND = -8 -}; - -enum { - MTAR_TREG = '0', - MTAR_TLNK = '1', - MTAR_TSYM = '2', - MTAR_TCHR = '3', - MTAR_TBLK = '4', - MTAR_TDIR = '5', - MTAR_TFIFO = '6' -}; - -typedef struct { - unsigned mode; - unsigned owner; - unsigned size; - unsigned mtime; - unsigned type; - char name[100]; - char linkname[100]; -} mtar_header_t; - - -typedef struct mtar_t mtar_t; - -struct mtar_t { - int (*read)(mtar_t *tar, void *data, size_t size); - int (*write)(mtar_t *tar, const void *data, size_t size); - int (*seek)(mtar_t *tar, long pos); - int (*close)(mtar_t *tar); - FILE *stream; - size_t pos; - size_t remaining_data; - size_t last_header; -}; - - -const char* mtar_strerror(int err); - -int mtar_open(mtar_t *tar, const char *filename, const char *mode); -int mtar_close(mtar_t *tar); - -int mtar_seek(mtar_t *tar, long pos); -int mtar_rewind(mtar_t *tar); -int mtar_next(mtar_t *tar); -int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h); -int mtar_read_header(mtar_t *tar, mtar_header_t *h); -int mtar_read_data(mtar_t *tar, void *ptr, size_t size); - -int mtar_write_header(mtar_t *tar, const mtar_header_t *h); -int mtar_write_file_header(mtar_t *tar, const char *name, size_t size); -int mtar_write_dir_header(mtar_t *tar, const char *name); -int mtar_write_data(mtar_t *tar, const void *data, size_t size); -int mtar_finalize(mtar_t *tar); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/tarloader.cpp b/src/tarloader.cpp deleted file mode 100644 index 6fa45fc..0000000 --- a/src/tarloader.cpp +++ /dev/null @@ -1,169 +0,0 @@ -#include "tarloader.h" - -#include -#include -#include -#include -#include -#include -#include - - -TarDataset::TarDataset(const std::filesystem::path& path, int64_t inputSize, std::vector selectLabels, std::vector extraInputs): -inputSize(inputSize), selectLabels(selectLabels), extraInputs(extraInputs), path(path) -{ - int ret = mtar_open(&tar, path.c_str(), "r"); - if(ret) - { - std::cerr<<"Unable to open tar at "<= modelStrs.size()) - return "invalid"; - else - return *std::next(modelStrs.begin(), classNum); -} diff --git a/src/tarloader.h b/src/tarloader.h deleted file mode 100644 index 9e17a8f..0000000 --- a/src/tarloader.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include "eisdataset.h" -#include "microtar.h" - - -class TarDataset : public EisDataset -{ -private: - - mtar_t tar; - - struct File - { - std::filesystem::path path; - size_t classNum; - size_t pos; - size_t size; - }; - - std::vector files; - size_t inputSize; - std::vector modelStrs; - std::vector selectLabels; - std::vector extraInputs; - std::filesystem::path path; - - virtual eis::Spectra getImpl(size_t index) override; - eis::Spectra loadSpectraAtCurrentPos(size_t size); - -public: - explicit TarDataset(const std::filesystem::path& path, int64_t inputSize = 100, std::vector selectLabels = {}, std::vector extraInputs = {}); - TarDataset(const TarDataset& in); - TarDataset& operator=(const TarDataset& in); - ~TarDataset(); - - virtual size_t size() const override; - - virtual size_t classForIndex(size_t index) override; - virtual std::string modelStringForClass(size_t classNum) override; -};