Compare commits
	
		
			1 commit
		
	
	
		
			3db3e0a0de
			...
			e43a235c12
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | e43a235c12 | 
|  | @ -44,10 +44,15 @@ set_property(TARGET rlxpassfail PROPERTY CXX_STANDARD 17) | |||
| install(TARGETS rlxpassfail RUNTIME DESTINATION bin) | ||||
| 
 | ||||
| add_executable(matlabarraytoeis ${COMMON_SRC_FILES} src/matlabarraytoeis.cpp) | ||||
| target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES} -lrelaxisloader) | ||||
| target_link_libraries(matlabarraytoeis ${COMMON_LINK_LIBRARIES}) | ||||
| set_property(TARGET matlabarraytoeis PROPERTY CXX_STANDARD 17) | ||||
| install(TARGETS matlabarraytoeis RUNTIME DESTINATION bin) | ||||
| 
 | ||||
| add_executable(eistomatlabarray ${COMMON_SRC_FILES} src/eistomatlabarray.cpp src/microtar.c src/tarloader.cpp src/eisdataset.cpp) | ||||
| target_link_libraries(eistomatlabarray ${COMMON_LINK_LIBRARIES}) | ||||
| set_property(TARGET eistomatlabarray PROPERTY CXX_STANDARD 17) | ||||
| install(TARGETS eistomatlabarray RUNTIME DESTINATION bin) | ||||
| 
 | ||||
| set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										24
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								README.md
									
									
									
									
									
								
							|  | @ -1,24 +0,0 @@ | |||
| # KISS EIS Dataformaters | ||||
| 
 | ||||
| Dataformaters contains misc. data formating utilites that are used to transfer file types used by eisgenerator/eistype into forign formats | ||||
| 
 | ||||
| ## Requirements | ||||
| 
 | ||||
| * You must be running unix or a unix-like os such as linux | ||||
| * A c++17 compiler like gcc | ||||
| * cmake | ||||
| * libeisgenerator | ||||
| * libkisstype | ||||
| * git | ||||
| 
 | ||||
| ## Build and instructions | ||||
| 
 | ||||
| ``` | ||||
| $ git clone REPO_URL dataformaters | ||||
| $ cd dataformaters | ||||
| $ mkdir build | ||||
| $ cd build | ||||
| $ cmake .. | ||||
| $ make | ||||
| $ make install | ||||
| ``` | ||||
							
								
								
									
										7
									
								
								src/eisdataset.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								src/eisdataset.cpp
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,7 @@ | |||
| #include "eisdataset.h" | ||||
| 
 | ||||
| eis::Spectra EisDataset::get(size_t index) | ||||
| { | ||||
| 	eis::Spectra data = getImpl(index); | ||||
| 	return data; | ||||
| } | ||||
							
								
								
									
										18
									
								
								src/eisdataset.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								src/eisdataset.h
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,18 @@ | |||
| #pragma once | ||||
| #include <cstdint> | ||||
| #include <string> | ||||
| #include <kisstype/spectra.h> | ||||
| 
 | ||||
| class EisDataset | ||||
| { | ||||
| private: | ||||
| 	virtual eis::Spectra getImpl(size_t index) = 0; | ||||
| 
 | ||||
| public: | ||||
| 	eis::Spectra get(size_t index); | ||||
| 	virtual size_t size() const = 0; | ||||
| 	virtual size_t classForIndex(size_t index) = 0; | ||||
| 	virtual std::string modelStringForClass(size_t classNum) {return std::string("Unkown");} | ||||
| 	virtual std::string getDescription() {return "";}; | ||||
| 	virtual ~EisDataset(){} | ||||
| }; | ||||
							
								
								
									
										77
									
								
								src/eistomatlabarray.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								src/eistomatlabarray.cpp
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,77 @@ | |||
| #include <iostream> | ||||
| #include <fstream> | ||||
| #include <sstream> | ||||
| #include "tarloader.h" | ||||
| #include "common.h" | ||||
| 
 | ||||
| std::string dataToTsv(const std::vector<eis::DataPoint>& data) | ||||
| { | ||||
| 	std::stringstream out; | ||||
| 	out<<std::scientific; | ||||
| 	for(const eis::DataPoint& dp : data) | ||||
| 		out<<dp.im.real()<<'\t'; | ||||
| 	for(const eis::DataPoint& dp : data) | ||||
| 		out<<0-dp.im.imag()<<'\t'; | ||||
| 	return out.str(); | ||||
| } | ||||
| 
 | ||||
| int main(int argc, char** argv) | ||||
| { | ||||
| 	if(argc != 5) | ||||
| 	{ | ||||
| 		std::cout<<"Usage: "<<argv[0]<<" [TAR_DATASET] [OUT_DIR]\n"; | ||||
| 		return 1; | ||||
| 	} | ||||
| 
 | ||||
| 
 | ||||
| 	TarDataset dataset(argv[1], 120); | ||||
| 	if(dataset.size() == 0) | ||||
| 	{ | ||||
| 		std::cout<<"could not open "<<argv[1]<<'\n'; | ||||
| 		return 2; | ||||
| 	} | ||||
| 
 | ||||
| 	std::filesystem::path outDir(argv[1]); | ||||
| 
 | ||||
| 	if(!checkDir(outDir)) | ||||
| 		return 2; | ||||
| 
 | ||||
| 	eis::Spectra spectra = dataset.get(0); | ||||
| 	std::vector<std::fstream> labelFiles; | ||||
| 	for(const std::string& name : spectra.labelNames) | ||||
| 	{ | ||||
| 		labelFiles.push_back(std::fstream(outDir/(name+".txt"), std::ios_base::out)); | ||||
| 		if(!labelFiles.back().is_open()) | ||||
| 		{ | ||||
| 			std::cerr<<"Unable to open "<<outDir/(name+".txt")<<" for writeing\n"; | ||||
| 			return 2; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	std::fstream spectraFile(outDir/"spectras.txt", std::ios_base::out); | ||||
| 	if(!spectraFile.is_open()) | ||||
| 	{ | ||||
| 		std::cerr<<"Unable to open "<<(outDir/"spectras.txt")<<" for writeing\n"; | ||||
| 		return 2; | ||||
| 	} | ||||
| 
 | ||||
| 	for(size_t i = 0; i < dataset.size(); ++i) | ||||
| 	{ | ||||
| 		eis::Spectra spectra = dataset.get(i); | ||||
| 		spectraFile<<dataToTsv(spectra.data)<<'\n'; | ||||
| 		if(spectra.labels.size() != labelFiles.size()) | ||||
| 		{ | ||||
| 			std::cerr<<"not all spectra have the same number of labels\n"; | ||||
| 			return 3; | ||||
| 		} | ||||
| 		for(size_t j = 0; j < spectra.labels.size(); ++i) | ||||
| 			labelFiles[j]<<spectra.labels[j]<<'\n'; | ||||
| 
 | ||||
| 	} | ||||
| 
 | ||||
| 	spectraFile.close(); | ||||
| 	for(std::fstream& file : labelFiles) | ||||
| 		file.close(); | ||||
| 
 | ||||
| 	return 0; | ||||
| } | ||||
|  | @ -76,8 +76,8 @@ int main(int argc, char** argv) | |||
| 			std::string rul; | ||||
| 			if(!std::getline(rulFile, rul).good()) | ||||
| 			{ | ||||
| 				std::cout<<argv[3]<<" dose not have the same number of lines as "<<argv[2]<<" this could be fine or mean that the supplied label file dosent corrispond to the spectra file\n"; | ||||
| 				return 0; | ||||
| 				std::cout<<argv[3]<<" dose not have the same number of lines as "<<argv[2]<<" aborting\n"; | ||||
| 				return 3; | ||||
| 			} | ||||
| 
 | ||||
| 			std::vector<eis::DataPoint> data = parseLine(eisLine); | ||||
|  |  | |||
							
								
								
									
										376
									
								
								src/microtar.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										376
									
								
								src/microtar.c
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,376 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2017 rxi | ||||
|  * | ||||
|  * Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
|  * of this software and associated documentation files (the "Software"), to | ||||
|  * deal in the Software without restriction, including without limitation the | ||||
|  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | ||||
|  * sell copies of the Software, and to permit persons to whom the Software is | ||||
|  * furnished to do so, subject to the following conditions: | ||||
|  * | ||||
|  * The above copyright notice and this permission notice shall be included in | ||||
|  * all copies or substantial portions of the Software. | ||||
|  * | ||||
|  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
|  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
|  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
|  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
|  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | ||||
|  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | ||||
|  * IN THE SOFTWARE. | ||||
|  */ | ||||
| 
 | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| #include <stddef.h> | ||||
| #include <string.h> | ||||
| 
 | ||||
| #include "microtar.h" | ||||
| 
 | ||||
| typedef struct { | ||||
|   char name[100]; | ||||
|   char mode[8]; | ||||
|   char owner[8]; | ||||
|   char group[8]; | ||||
|   char size[12]; | ||||
|   char mtime[12]; | ||||
|   char checksum[8]; | ||||
|   char type; | ||||
|   char linkname[100]; | ||||
|   char _padding[255]; | ||||
| } mtar_raw_header_t; | ||||
| 
 | ||||
| 
 | ||||
| static unsigned round_up(unsigned n, unsigned incr) { | ||||
|   return n + (incr - n % incr) % incr; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static unsigned checksum(const mtar_raw_header_t* rh) { | ||||
|   unsigned i; | ||||
|   unsigned char *p = (unsigned char*) rh; | ||||
|   unsigned res = 256; | ||||
|   for (i = 0; i < offsetof(mtar_raw_header_t, checksum); i++) { | ||||
|     res += p[i]; | ||||
|   } | ||||
|   for (i = offsetof(mtar_raw_header_t, type); i < sizeof(*rh); i++) { | ||||
|     res += p[i]; | ||||
|   } | ||||
|   return res; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static int tread(mtar_t *tar, void *data, size_t size) { | ||||
|   int err = tar->read(tar, data, size); | ||||
|   tar->pos += size; | ||||
|   return err; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static int twrite(mtar_t *tar, const void *data, size_t size) { | ||||
|   int err = tar->write(tar, data, size); | ||||
|   tar->pos += size; | ||||
|   return err; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static int write_null_bytes(mtar_t *tar, int n) { | ||||
|   int i, err; | ||||
|   char nul = '\0'; | ||||
|   for (i = 0; i < n; i++) { | ||||
|     err = twrite(tar, &nul, 1); | ||||
|     if (err) { | ||||
|       return err; | ||||
|     } | ||||
|   } | ||||
|   return MTAR_ESUCCESS; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static int raw_to_header(mtar_header_t *h, const mtar_raw_header_t *rh) { | ||||
|   unsigned chksum1, chksum2; | ||||
| 
 | ||||
|   /* If the checksum starts with a null byte we assume the record is NULL */ | ||||
|   if (*rh->checksum == '\0') { | ||||
|     return MTAR_ENULLRECORD; | ||||
|   } | ||||
| 
 | ||||
|   /* Build and compare checksum */ | ||||
|   chksum1 = checksum(rh); | ||||
|   sscanf(rh->checksum, "%o", &chksum2); | ||||
|   if (chksum1 != chksum2) { | ||||
|     return MTAR_EBADCHKSUM; | ||||
|   } | ||||
| 
 | ||||
|   /* Load raw header into header */ | ||||
|   sscanf(rh->mode, "%o", &h->mode); | ||||
|   sscanf(rh->owner, "%o", &h->owner); | ||||
|   sscanf(rh->size, "%o", &h->size); | ||||
|   sscanf(rh->mtime, "%o", &h->mtime); | ||||
|   h->type = rh->type; | ||||
|   strcpy(h->name, rh->name); | ||||
|   strcpy(h->linkname, rh->linkname); | ||||
| 
 | ||||
|   return MTAR_ESUCCESS; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static int header_to_raw(mtar_raw_header_t *rh, const mtar_header_t *h) { | ||||
|   unsigned chksum; | ||||
| 
 | ||||
|   /* Load header into raw header */ | ||||
|   memset(rh, 0, sizeof(*rh)); | ||||
|   sprintf(rh->mode, "%o", h->mode); | ||||
|   sprintf(rh->owner, "%o", h->owner); | ||||
|   sprintf(rh->size, "%o", h->size); | ||||
|   sprintf(rh->mtime, "%o", h->mtime); | ||||
|   rh->type = h->type ? h->type : MTAR_TREG; | ||||
|   strcpy(rh->name, h->name); | ||||
|   strcpy(rh->linkname, h->linkname); | ||||
| 
 | ||||
|   /* Calculate and write checksum */ | ||||
|   chksum = checksum(rh); | ||||
|   sprintf(rh->checksum, "%06o", chksum); | ||||
|   rh->checksum[7] = ' '; | ||||
| 
 | ||||
|   return MTAR_ESUCCESS; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| const char* mtar_strerror(int err) { | ||||
|   switch (err) { | ||||
|     case MTAR_ESUCCESS     : return "success"; | ||||
|     case MTAR_EFAILURE     : return "failure"; | ||||
|     case MTAR_EOPENFAIL    : return "could not open"; | ||||
|     case MTAR_EREADFAIL    : return "could not read"; | ||||
|     case MTAR_EWRITEFAIL   : return "could not write"; | ||||
|     case MTAR_ESEEKFAIL    : return "could not seek"; | ||||
|     case MTAR_EBADCHKSUM   : return "bad checksum"; | ||||
|     case MTAR_ENULLRECORD  : return "null record"; | ||||
|     case MTAR_ENOTFOUND    : return "file not found"; | ||||
|   } | ||||
|   return "unknown error"; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static int file_write(mtar_t *tar, const void *data, size_t size) { | ||||
|   size_t res = fwrite(data, 1, size, tar->stream); | ||||
|   return (res == size) ? MTAR_ESUCCESS : MTAR_EWRITEFAIL; | ||||
| } | ||||
| 
 | ||||
| static int file_read(mtar_t *tar, void *data, size_t size) { | ||||
|   size_t res = fread(data, 1, size, tar->stream); | ||||
|   return (res == size) ? MTAR_ESUCCESS : MTAR_EREADFAIL; | ||||
| } | ||||
| 
 | ||||
| static int file_seek(mtar_t *tar, long offset) { | ||||
|   int res = fseek(tar->stream, offset, SEEK_SET); | ||||
|   return (res == 0) ? MTAR_ESUCCESS : MTAR_ESEEKFAIL; | ||||
| } | ||||
| 
 | ||||
| static int file_close(mtar_t *tar) { | ||||
|   fclose(tar->stream); | ||||
|   return MTAR_ESUCCESS; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_open(mtar_t *tar, const char *filename, const char *mode) { | ||||
|   int err; | ||||
|   mtar_header_t h; | ||||
| 
 | ||||
|   /* Init tar struct and functions */ | ||||
|   memset(tar, 0, sizeof(*tar)); | ||||
|   tar->write = file_write; | ||||
|   tar->read = file_read; | ||||
|   tar->seek = file_seek; | ||||
|   tar->close = file_close; | ||||
| 
 | ||||
|   /* Assure mode is always binary */ | ||||
|   if ( strchr(mode, 'r') ) mode = "rb"; | ||||
|   if ( strchr(mode, 'w') ) mode = "wb"; | ||||
|   if ( strchr(mode, 'a') ) mode = "ab"; | ||||
|   /* Open file */ | ||||
|   tar->stream = fopen(filename, mode); | ||||
|   if (!tar->stream) { | ||||
|     return MTAR_EOPENFAIL; | ||||
|   } | ||||
|   /* Read first header to check it is valid if mode is `r` */ | ||||
|   if (*mode == 'r') { | ||||
|     err = mtar_read_header(tar, &h); | ||||
|     if (err != MTAR_ESUCCESS) { | ||||
|       mtar_close(tar); | ||||
|       return err; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   /* Return ok */ | ||||
|   return MTAR_ESUCCESS; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_close(mtar_t *tar) { | ||||
|   return tar->close(tar); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_seek(mtar_t *tar, long pos) { | ||||
|   int err = tar->seek(tar, pos); | ||||
|   tar->pos = pos; | ||||
|   return err; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_rewind(mtar_t *tar) { | ||||
|   tar->remaining_data = 0; | ||||
|   tar->last_header = 0; | ||||
|   return mtar_seek(tar, 0); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_next(mtar_t *tar) { | ||||
|   int err, n; | ||||
|   mtar_header_t h; | ||||
|   /* Load header */ | ||||
|   err = mtar_read_header(tar, &h); | ||||
|   if (err) { | ||||
|     return err; | ||||
|   } | ||||
|   /* Seek to next record */ | ||||
|   n = round_up(h.size, 512) + sizeof(mtar_raw_header_t); | ||||
|   return mtar_seek(tar, tar->pos + n); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h) { | ||||
|   int err; | ||||
|   mtar_header_t header; | ||||
|   /* Start at beginning */ | ||||
|   err = mtar_rewind(tar); | ||||
|   if (err) { | ||||
|     return err; | ||||
|   } | ||||
|   /* Iterate all files until we hit an error or find the file */ | ||||
|   while ( (err = mtar_read_header(tar, &header)) == MTAR_ESUCCESS ) { | ||||
|     if ( !strcmp(header.name, name) ) { | ||||
|       if (h) { | ||||
|         *h = header; | ||||
|       } | ||||
|       return MTAR_ESUCCESS; | ||||
|     } | ||||
|     mtar_next(tar); | ||||
|   } | ||||
|   /* Return error */ | ||||
|   if (err == MTAR_ENULLRECORD) { | ||||
|     err = MTAR_ENOTFOUND; | ||||
|   } | ||||
|   return err; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_read_header(mtar_t *tar, mtar_header_t *h) { | ||||
|   int err; | ||||
|   mtar_raw_header_t rh; | ||||
|   /* Save header position */ | ||||
|   tar->last_header = tar->pos; | ||||
|   /* Read raw header */ | ||||
|   err = tread(tar, &rh, sizeof(rh)); | ||||
|   if (err) { | ||||
|     return err; | ||||
|   } | ||||
|   /* Seek back to start of header */ | ||||
|   err = mtar_seek(tar, tar->last_header); | ||||
|   if (err) { | ||||
|     return err; | ||||
|   } | ||||
|   /* Load raw header into header struct and return */ | ||||
|   return raw_to_header(h, &rh); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_read_data(mtar_t *tar, void *ptr, size_t size) { | ||||
|   int err; | ||||
|   /* If we have no remaining data then this is the first read, we get the size,
 | ||||
|    * set the remaining data and seek to the beginning of the data */ | ||||
|   if (tar->remaining_data == 0) { | ||||
|     mtar_header_t h; | ||||
|     /* Read header */ | ||||
|     err = mtar_read_header(tar, &h); | ||||
|     if (err) { | ||||
|       return err; | ||||
|     } | ||||
|     /* Seek past header and init remaining data */ | ||||
|     err = mtar_seek(tar, tar->pos + sizeof(mtar_raw_header_t)); | ||||
|     if (err) { | ||||
|       return err; | ||||
|     } | ||||
|     tar->remaining_data = h.size; | ||||
|   } | ||||
|   /* Read data */ | ||||
|   err = tread(tar, ptr, size); | ||||
|   if (err) { | ||||
|     return err; | ||||
|   } | ||||
|   tar->remaining_data -= size; | ||||
|   /* If there is no remaining data we've finished reading and seek back to the
 | ||||
|    * header */ | ||||
|   if (tar->remaining_data == 0) { | ||||
|     return mtar_seek(tar, tar->last_header); | ||||
|   } | ||||
|   return MTAR_ESUCCESS; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_write_header(mtar_t *tar, const mtar_header_t *h) { | ||||
|   mtar_raw_header_t rh; | ||||
|   /* Build raw header and write */ | ||||
|   header_to_raw(&rh, h); | ||||
|   tar->remaining_data = h->size; | ||||
|   return twrite(tar, &rh, sizeof(rh)); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_write_file_header(mtar_t *tar, const char *name, size_t size) { | ||||
|   mtar_header_t h; | ||||
|   /* Build header */ | ||||
|   memset(&h, 0, sizeof(h)); | ||||
|   strcpy(h.name, name); | ||||
|   h.size = size; | ||||
|   h.type = MTAR_TREG; | ||||
|   h.mode = 0664; | ||||
|   /* Write header */ | ||||
|   return mtar_write_header(tar, &h); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_write_dir_header(mtar_t *tar, const char *name) { | ||||
|   mtar_header_t h; | ||||
|   /* Build header */ | ||||
|   memset(&h, 0, sizeof(h)); | ||||
|   strcpy(h.name, name); | ||||
|   h.type = MTAR_TDIR; | ||||
|   h.mode = 0775; | ||||
|   /* Write header */ | ||||
|   return mtar_write_header(tar, &h); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_write_data(mtar_t *tar, const void *data, size_t size) { | ||||
|   int err; | ||||
|   /* Write data */ | ||||
|   err = twrite(tar, data, size); | ||||
|   if (err) { | ||||
|     return err; | ||||
|   } | ||||
|   tar->remaining_data -= size; | ||||
|   /* Write padding if we've written all the data for this file */ | ||||
|   if (tar->remaining_data == 0) { | ||||
|     return write_null_bytes(tar, round_up(tar->pos, 512) - tar->pos); | ||||
|   } | ||||
|   return MTAR_ESUCCESS; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| int mtar_finalize(mtar_t *tar) { | ||||
|   /* Write two NULL records */ | ||||
|   return write_null_bytes(tar, sizeof(mtar_raw_header_t) * 2); | ||||
| } | ||||
							
								
								
									
										90
									
								
								src/microtar.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								src/microtar.h
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,90 @@ | |||
| /**
 | ||||
|  * Copyright (c) 2017 rxi | ||||
|  * | ||||
|  * This library is free software; you can redistribute it and/or modify it | ||||
|  * under the terms of the MIT license. See `microtar.c` for details. | ||||
|  */ | ||||
| 
 | ||||
| #ifndef MICROTAR_H | ||||
| #define MICROTAR_H | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| extern "C" | ||||
| { | ||||
| #endif | ||||
| 
 | ||||
| #include <stdio.h> | ||||
| #include <stdlib.h> | ||||
| 
 | ||||
| #define MTAR_VERSION "0.1.0" | ||||
| 
 | ||||
| enum { | ||||
|   MTAR_ESUCCESS     =  0, | ||||
|   MTAR_EFAILURE     = -1, | ||||
|   MTAR_EOPENFAIL    = -2, | ||||
|   MTAR_EREADFAIL    = -3, | ||||
|   MTAR_EWRITEFAIL   = -4, | ||||
|   MTAR_ESEEKFAIL    = -5, | ||||
|   MTAR_EBADCHKSUM   = -6, | ||||
|   MTAR_ENULLRECORD  = -7, | ||||
|   MTAR_ENOTFOUND    = -8 | ||||
| }; | ||||
| 
 | ||||
| enum { | ||||
|   MTAR_TREG   = '0', | ||||
|   MTAR_TLNK   = '1', | ||||
|   MTAR_TSYM   = '2', | ||||
|   MTAR_TCHR   = '3', | ||||
|   MTAR_TBLK   = '4', | ||||
|   MTAR_TDIR   = '5', | ||||
|   MTAR_TFIFO  = '6' | ||||
| }; | ||||
| 
 | ||||
| typedef struct { | ||||
|   unsigned mode; | ||||
|   unsigned owner; | ||||
|   unsigned size; | ||||
|   unsigned mtime; | ||||
|   unsigned type; | ||||
|   char name[100]; | ||||
|   char linkname[100]; | ||||
| } mtar_header_t; | ||||
| 
 | ||||
| 
 | ||||
| typedef struct mtar_t mtar_t; | ||||
| 
 | ||||
| struct mtar_t { | ||||
|   int (*read)(mtar_t *tar, void *data, size_t size); | ||||
|   int (*write)(mtar_t *tar, const void *data, size_t size); | ||||
|   int (*seek)(mtar_t *tar, long pos); | ||||
|   int (*close)(mtar_t *tar); | ||||
|   FILE *stream; | ||||
|   size_t pos; | ||||
|   size_t remaining_data; | ||||
|   size_t last_header; | ||||
| }; | ||||
| 
 | ||||
| 
 | ||||
| const char* mtar_strerror(int err); | ||||
| 
 | ||||
| int mtar_open(mtar_t *tar, const char *filename, const char *mode); | ||||
| int mtar_close(mtar_t *tar); | ||||
| 
 | ||||
| int mtar_seek(mtar_t *tar, long pos); | ||||
| int mtar_rewind(mtar_t *tar); | ||||
| int mtar_next(mtar_t *tar); | ||||
| int mtar_find(mtar_t *tar, const char *name, mtar_header_t *h); | ||||
| int mtar_read_header(mtar_t *tar, mtar_header_t *h); | ||||
| int mtar_read_data(mtar_t *tar, void *ptr, size_t size); | ||||
| 
 | ||||
| int mtar_write_header(mtar_t *tar, const mtar_header_t *h); | ||||
| int mtar_write_file_header(mtar_t *tar, const char *name, size_t size); | ||||
| int mtar_write_dir_header(mtar_t *tar, const char *name); | ||||
| int mtar_write_data(mtar_t *tar, const void *data, size_t size); | ||||
| int mtar_finalize(mtar_t *tar); | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										169
									
								
								src/tarloader.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										169
									
								
								src/tarloader.cpp
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,169 @@ | |||
| #include "tarloader.h" | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <assert.h> | ||||
| #include <kisstype/type.h> | ||||
| #include <eisgenerator/translators.h> | ||||
| #include <iostream> | ||||
| #include <eisgenerator/model.h> | ||||
| #include <eisgenerator/basicmath.h> | ||||
| 
 | ||||
| 
 | ||||
| TarDataset::TarDataset(const std::filesystem::path& path, int64_t inputSize, std::vector<std::string> selectLabels, std::vector<std::string> extraInputs): | ||||
| inputSize(inputSize), selectLabels(selectLabels), extraInputs(extraInputs), path(path) | ||||
| { | ||||
| 	int ret = mtar_open(&tar, path.c_str(), "r"); | ||||
| 	if(ret) | ||||
| 	{ | ||||
| 		std::cerr<<"Unable to open tar at "<<path<<'\n'; | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	mtar_header_t header; | ||||
| 	while((mtar_read_header(&tar, &header)) != MTAR_ENULLRECORD) | ||||
| 	{ | ||||
| 		if(header.type == MTAR_TREG) | ||||
| 		{ | ||||
| 			std::filesystem::path path = header.name; | ||||
| 			size_t pos = tar.pos; | ||||
| 			eis::Spectra spectra = loadSpectraAtCurrentPos(header.size); | ||||
| 
 | ||||
| 			bool skip = false; | ||||
| 			for(const std::string& key : selectLabels) | ||||
| 			{ | ||||
| 				if(!spectra.hasLabel(key)) | ||||
| 				{ | ||||
| 					std::cout<<"Dsicarding as it is missing: "<<key<<'\n'; | ||||
| 					skip = true; | ||||
| 					break; | ||||
| 				} | ||||
| 			} | ||||
| 			for(const std::string& key : extraInputs) | ||||
| 			{ | ||||
| 				if(!spectra.hasLabel(key)) | ||||
| 				{ | ||||
| 					std::cout<<"Dsicarding as it is missing: "<<key<<'\n'; | ||||
| 					skip = true; | ||||
| 					break; | ||||
| 				} | ||||
| 				continue; | ||||
| 			} | ||||
| 
 | ||||
| 			if(!skip) | ||||
| 			{ | ||||
| 				eis::purgeEisParamBrackets(spectra.model); | ||||
| 				eis::Model::removeSeriesResitance(spectra.model); | ||||
| 
 | ||||
| 				if(spectra.model.length() < 2 && spectra.model != "r" && spectra.model != "c" && spectra.model != "w" && spectra.model != "p" && spectra.model != "l") | ||||
| 					spectra.model = "Union"; | ||||
| 
 | ||||
| 				auto search = std::find(modelStrs.begin(), modelStrs.end(), spectra.model); | ||||
| 				size_t index; | ||||
| 				if(search == modelStrs.end()) | ||||
| 				{ | ||||
| 					index = modelStrs.size(); | ||||
| 					modelStrs.push_back(spectra.model); | ||||
| 				} | ||||
| 				else | ||||
| 				{ | ||||
| 					index = search - modelStrs.begin(); | ||||
| 				} | ||||
| 				files.push_back({.path = path, .classNum = index, .pos = pos, .size = header.size}); | ||||
| 			} | ||||
| 		} | ||||
| 		mtar_next(&tar); | ||||
| 	} | ||||
| 	if(files.size() < 20) | ||||
| 		std::cout<<"found few valid files in "<<path<<'\n'; | ||||
| } | ||||
| 
 | ||||
| eis::Spectra TarDataset::loadSpectraAtCurrentPos(size_t size) | ||||
| { | ||||
| 	char* filebuffer = new char[size+1]; | ||||
| 	filebuffer[size] = '\0'; | ||||
| 	int ret = mtar_read_data(&tar, filebuffer, size); | ||||
| 	if(ret != 0) | ||||
| 	{ | ||||
| 		std::cerr<<"Unable to read from tar archive\n"; | ||||
| 		assert(ret == 0); | ||||
| 	} | ||||
| 	std::stringstream ss(filebuffer); | ||||
| 
 | ||||
| 	eis::Spectra spectra = eis::Spectra::loadFromStream(ss); | ||||
| 	delete[] filebuffer; | ||||
| 
 | ||||
| 	return spectra; | ||||
| } | ||||
| 
 | ||||
| TarDataset::TarDataset(const TarDataset& in) | ||||
| { | ||||
| 	operator=(in); | ||||
| } | ||||
| 
 | ||||
| TarDataset& TarDataset::operator=(const TarDataset& in) | ||||
| { | ||||
| 	files = in.files; | ||||
| 	inputSize = in.inputSize; | ||||
| 	modelStrs = in.modelStrs; | ||||
| 	selectLabels = in.selectLabels; | ||||
| 	extraInputs = in.extraInputs; | ||||
| 	path = in.path; | ||||
| 	int ret = mtar_open(&tar, path.c_str(), "r"); | ||||
| 	if(ret != 0) | ||||
| 	{ | ||||
| 		std::cerr<<"Unable to reopen tar file at "<<path<<'\n'; | ||||
| 		assert(ret == 0); | ||||
| 	} | ||||
| 	return *this; | ||||
| } | ||||
| 
 | ||||
| TarDataset::~TarDataset() | ||||
| { | ||||
| 	mtar_close(&tar); | ||||
| } | ||||
| 
 | ||||
| eis::Spectra TarDataset::getImpl(size_t index) | ||||
| { | ||||
| 	if(files.size() < index) | ||||
| 	{ | ||||
| 		std::cerr<<"index "<<index<<" out of range in "<<__func__<<'\n'; | ||||
| 		assert(false); | ||||
| 		return {}; | ||||
| 	} | ||||
| 
 | ||||
| 	mtar_seek(&tar, files[index].pos); | ||||
| 	eis::Spectra spectra = loadSpectraAtCurrentPos(files[index].size); | ||||
| 
 | ||||
| 	spectra.data = eis::rescale(spectra.data, inputSize/2); | ||||
| 
 | ||||
| 	if(!selectLabels.empty() || !extraInputs.empty()) | ||||
| 	{ | ||||
| 		eis::Spectra copy = spectra; | ||||
| 		spectra.labelNames.clear(); | ||||
| 		spectra.labels.clear(); | ||||
| 		for(const std::string& key : selectLabels) | ||||
| 			spectra.addLabel(key, copy.getLabel(key)); | ||||
| 		for(const std::string& key : extraInputs) | ||||
| 			spectra.addLabel("exip_" + key, copy.getLabel(key)); | ||||
| 	} | ||||
| 
 | ||||
| 	return spectra; | ||||
| } | ||||
| 
 | ||||
| size_t TarDataset::classForIndex(size_t index) | ||||
| { | ||||
| 	return files[index].classNum; | ||||
| } | ||||
| 
 | ||||
| size_t TarDataset::size() const | ||||
| { | ||||
| 	return files.size(); | ||||
| } | ||||
| 
 | ||||
| std::string TarDataset::modelStringForClass(size_t classNum) | ||||
| { | ||||
| 	if(classNum >= modelStrs.size()) | ||||
| 		return "invalid"; | ||||
| 	else | ||||
| 		return *std::next(modelStrs.begin(), classNum); | ||||
| } | ||||
							
								
								
									
										47
									
								
								src/tarloader.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								src/tarloader.h
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,47 @@ | |||
| #pragma once | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <vector> | ||||
| #include <string> | ||||
| #include <filesystem> | ||||
| #include <kisstype/spectra.h> | ||||
| 
 | ||||
| #include "eisdataset.h" | ||||
| #include "microtar.h" | ||||
| 
 | ||||
| 
 | ||||
| class TarDataset : public EisDataset | ||||
| { | ||||
| private: | ||||
| 
 | ||||
| 	mtar_t tar; | ||||
| 
 | ||||
| 	struct File | ||||
| 	{ | ||||
| 		std::filesystem::path path; | ||||
| 		size_t classNum; | ||||
| 		size_t pos; | ||||
| 		size_t size; | ||||
| 	}; | ||||
| 
 | ||||
| 	std::vector<TarDataset::File> files; | ||||
| 	size_t inputSize; | ||||
| 	std::vector<std::string> modelStrs; | ||||
| 	std::vector<std::string> selectLabels; | ||||
| 	std::vector<std::string> extraInputs; | ||||
| 	std::filesystem::path path; | ||||
| 
 | ||||
| 	virtual eis::Spectra getImpl(size_t index) override; | ||||
| 	eis::Spectra loadSpectraAtCurrentPos(size_t size); | ||||
| 
 | ||||
| public: | ||||
| 	explicit TarDataset(const std::filesystem::path& path, int64_t inputSize = 100, std::vector<std::string> selectLabels = {}, std::vector<std::string> extraInputs = {}); | ||||
| 	TarDataset(const TarDataset& in); | ||||
| 	TarDataset& operator=(const TarDataset& in); | ||||
| 	~TarDataset(); | ||||
| 
 | ||||
| 	virtual size_t size() const override; | ||||
| 
 | ||||
| 	virtual size_t classForIndex(size_t index) override; | ||||
| 	virtual std::string modelStringForClass(size_t classNum) override; | ||||
| }; | ||||
		Loading…
	
		Reference in a new issue