From f04ede3a497a7a88dda91ab622ace5d91037705a Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 17 Feb 2024 00:55:36 +0100 Subject: [PATCH 01/57] Add option to read csv data from a buffer, add some unit tests for the new feature --- include/ss/common.hpp | 2 +- include/ss/parser.hpp | 127 +++++++++++++++++++++++--- test/test_parser.cpp | 204 +++++++++++++++++++++++++++--------------- 3 files changed, 250 insertions(+), 83 deletions(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 7531e29..d16475f 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -26,7 +26,7 @@ inline void assert_throw_on_error_not_defined() { } #if __unix__ -inline ssize_t get_line(char** lineptr, size_t* n, FILE* stream) { +inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { return getline(lineptr, n, stream); } #else diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index db80dd3..31907fe 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -48,6 +48,18 @@ public: } } + parser(const char* const csv_data_buffer, size_t csv_data_size, + const std::string& delim = ss::default_delimiter) + : file_name_{"buffer line"}, + reader_{csv_data_buffer, csv_data_size, delim} { + read_line(); + if constexpr (ignore_header) { + ignore_next(); + } else { + raw_header_ = reader_.get_buffer(); + } + } + parser(parser&& other) = default; parser& operator=(parser&& other) = default; @@ -641,18 +653,27 @@ private: : delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} { } + reader(const char* const buffer, size_t csv_data_size, + const std::string& delim) + : delim_{delim}, csv_data_buffer_{buffer}, + csv_data_size_{csv_data_size} { + } + reader(reader&& other) : buffer_{other.buffer_}, next_line_buffer_{other.next_line_buffer_}, - helper_buffer_{other.helper_buffer_}, converter_{std::move( - other.converter_)}, + helper_buffer_{other.helper_buffer_}, + converter_{std::move(other.converter_)}, next_line_converter_{std::move(other.next_line_converter_)}, buffer_size_{other.buffer_size_}, next_line_buffer_size_{other.next_line_buffer_size_}, - helper_size_{other.helper_size_}, delim_{std::move(other.delim_)}, - file_{other.file_}, crlf_{other.crlf_}, - line_number_{other.line_number_}, next_line_size_{ - other.next_line_size_} { + helper_buffer_size{other.helper_buffer_size}, + delim_{std::move(other.delim_)}, file_{other.file_}, + csv_data_buffer_{other.csv_data_buffer_}, + csv_data_size_{other.csv_data_size_}, + curr_char_{other.curr_char_}, crlf_{other.crlf_}, + line_number_{other.line_number_}, + next_line_size_{other.next_line_size_} { other.buffer_ = nullptr; other.next_line_buffer_ = nullptr; other.helper_buffer_ = nullptr; @@ -668,9 +689,12 @@ private: next_line_converter_ = std::move(other.next_line_converter_); buffer_size_ = other.buffer_size_; next_line_buffer_size_ = other.next_line_buffer_size_; - helper_size_ = other.helper_size_; + helper_buffer_size = other.helper_buffer_size; delim_ = std::move(other.delim_); file_ = other.file_; + csv_data_buffer_ = other.csv_data_buffer_; + csv_data_size_ = other.csv_data_size_; + curr_char_ = other.curr_char_; crlf_ = other.crlf_; line_number_ = other.line_number_; next_line_size_ = other.next_line_size_; @@ -698,6 +722,60 @@ private: reader(const reader& other) = delete; reader& operator=(const reader& other) = delete; + ssize_t get_line_buffer(char** lineptr, size_t* n, + const char* const buffer, size_t csv_data_size, + size_t& curr_char) { + size_t pos; + int c; + + // TODO remove check + if (lineptr == nullptr || buffer == nullptr || n == nullptr) { + return -1; + } + + c = buffer[curr_char++]; + if (curr_char >= csv_data_size) { + return -1; + } + + // TODO maybe remove this too + if (*lineptr == nullptr) { + *lineptr = static_cast(malloc(128)); + if (*lineptr == nullptr) { + return -1; + } + *n = 128; + } + + pos = 0; + while (curr_char <= csv_data_size) { + if (pos + 1 >= *n) { + size_t new_size = *n + (*n >> 2); + // TODO maybe remove this too + if (new_size < 128) { + new_size = 128; + } + char* new_ptr = static_cast( + realloc(static_cast(*lineptr), new_size)); + // TODO check for failed malloc in the callee + if (new_ptr == nullptr) { + return -1; + } + *n = new_size; + *lineptr = new_ptr; + } + + (*lineptr)[pos++] = c; + if (c == '\n') { + break; + } + c = buffer[curr_char++]; + } + + (*lineptr)[pos] = '\0'; + return pos; + } + // read next line each time in order to set eof_ bool read_next() { next_line_converter_.clear_error(); @@ -708,8 +786,16 @@ private: if (next_line_buffer_size_ > 0) { next_line_buffer_[0] = '\0'; } - ssize = get_line(&next_line_buffer_, &next_line_buffer_size_, - file_); + + if (file_) { + ssize = get_line_file(&next_line_buffer_, + &next_line_buffer_size_, file_); + } else { + ssize = get_line_buffer(&next_line_buffer_, + &next_line_buffer_size_, + csv_data_buffer_, csv_data_size_, + curr_char_); + } if (ssize == -1) { return false; @@ -821,6 +907,10 @@ private: } size_t remove_eol(char*& buffer, size_t ssize) { + if (buffer[ssize - 1] != '\n') { + return ssize; + } + size_t size = ssize - 1; if (ssize >= 2 && buffer[ssize - 2] == '\r') { crlf_ = true; @@ -851,8 +941,17 @@ private: bool append_next_line_to_buffer(char*& buffer, size_t& size) { undo_remove_eol(buffer, size); - ssize_t next_ssize = - get_line(&helper_buffer_, &helper_size_, file_); + ssize_t next_ssize; + if (file_) { + next_ssize = + get_line_file(&helper_buffer_, &helper_buffer_size, file_); + } else { + next_ssize = + get_line_buffer(&helper_buffer_, &helper_buffer_size, + csv_data_buffer_, csv_data_size_, + curr_char_); + } + if (next_ssize == -1) { return false; } @@ -879,11 +978,15 @@ private: size_t buffer_size_{0}; size_t next_line_buffer_size_{0}; - size_t helper_size_{0}; + size_t helper_buffer_size{0}; std::string delim_; FILE* file_{nullptr}; + const char* csv_data_buffer_{nullptr}; + size_t csv_data_size_{0}; + size_t curr_char_{0}; + bool crlf_{false}; size_t line_number_{0}; diff --git a/test/test_parser.cpp b/test/test_parser.cpp index e7089ff..865c3de 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -38,7 +38,7 @@ void expect_error_on_command(ss::parser& p, } } -void update_if_crlf(std::string& s) { +[[maybe_unused]] void update_if_crlf(std::string& s) { #ifdef _WIN32 replace_all(s, "\r\n", "\n"); #else @@ -102,6 +102,31 @@ static void make_and_write(const std::string& file_name, out << data[i].to_string() << new_lines[i % new_lines.size()]; } } + +std::string make_buffer(const std::string& file_name) { + std::ifstream in{file_name, std::ios::binary}; + std::string tmp; + std::string out; + out.reserve(sizeof(out) + 1); + while (in >> tmp) { + out += tmp; + out.append("\n"); + } + return out; +} + +template +std::tuple, std::string> make_parser( + const std::string& file_name, const std::string& delim) { + if (buffer_mode) { + auto buffer = make_buffer(file_name); + return {ss::parser{buffer.data(), buffer.size(), delim}, + std::move(buffer)}; + } else { + return {ss::parser{file_name, delim}, std::string{}}; + } +} + } /* namespace */ TEST_CASE("test file not found") { @@ -125,22 +150,23 @@ TEST_CASE("test file not found") { } } -template +template void test_various_cases() { unique_file_name f{"test_parser"}; std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; make_and_write(f.name, data); + auto csv_data_buffer = make_buffer(f.name); { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); ss::parser p0{std::move(p)}; p = std::move(p0); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; - auto move_rotate = [&] { + auto move_rotate = [&p = p, &p0 = p0] { auto p1 = std::move(p); p0 = std::move(p1); p = std::move(p0); @@ -152,7 +178,7 @@ void test_various_cases() { i.emplace_back(ss::to_object(a)); } - for (const auto& a : p2.iterate()) { + for (const auto& a : p2.template iterate()) { i2.emplace_back(ss::to_object(a)); } @@ -161,13 +187,13 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; - ss::parser p3{f.name, ","}; + auto [p3, ___] = make_parser(f.name, ","); std::vector i3; std::vector expected = {std::begin(data) + 1, std::end(data)}; @@ -175,18 +201,18 @@ void test_various_cases() { p.ignore_next(); while (!p.eof()) { - auto a = p.get_next(); + auto a = p.template get_next(); i.emplace_back(ss::to_object(a)); } p2.ignore_next(); - for (const auto& a : p2.iterate()) { + for (const auto& a : p2.template iterate()) { i2.emplace_back(ss::to_object(a)); } p3.ignore_next(); - for (auto it = p3.iterate().begin(); it != p3.iterate().end(); - ++it) { + for (auto it = p3.template iterate().begin(); + it != p3.template iterate().end(); ++it) { i3.emplace_back(ss::to_object(*it)); } @@ -196,16 +222,17 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; while (!p.eof()) { - i.push_back(p.get_object()); + i.push_back(p.template get_object()); } - for (auto&& a : p2.iterate_object()) { + for (auto&& a : + p2.template iterate_object()) { i2.push_back(std::move(a)); } @@ -214,10 +241,11 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - for (auto&& a : p.iterate_object()) { + for (auto&& a : + p.template iterate_object()) { i.push_back(std::move(a)); } @@ -225,19 +253,19 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; using tup = std::tuple; while (!p.eof()) { - i.push_back(p.get_object()); + i.push_back(p.template get_object()); } - for (auto it = p2.iterate_object().begin(); - it != p2.iterate_object().end(); it++) { + for (auto it = p2.template iterate_object().begin(); + it != p2.template iterate_object().end(); it++) { i2.push_back({it->i, it->d, it->s}); } @@ -246,11 +274,11 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; using tup = std::tuple; - for (auto&& a : p.iterate_object()) { + for (auto&& a : p.template iterate_object()) { i.push_back(std::move(a)); } @@ -258,21 +286,21 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; while (!p.eof()) { - i.push_back(p.get_next()); + i.push_back(p.template get_next()); } CHECK_EQ(i, data); } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - for (auto&& a : p.iterate()) { + for (auto&& a : p.template iterate()) { i.push_back(std::move(a)); } @@ -281,24 +309,30 @@ void test_various_cases() { { constexpr int excluded = 3; - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; while (!p.eof()) { - auto a = - p.get_object, double, std::string>(); - if (p.valid()) { - i.push_back(a); - } + try { + auto a = p.template get_object, double, + std::string>(); + if (p.valid()) { + i.push_back(a); + } + } catch (...) { + // ignore + }; } - for (auto&& a : p2.iterate_object, double, - std::string>()) { - if (p2.valid()) { - i2.push_back(std::move(a)); + if (!ss::setup::throw_on_error) { + for (auto&& a : p2.template iterate_object, + double, std::string>()) { + if (p2.valid()) { + i2.push_back(std::move(a)); + } } } @@ -312,33 +346,45 @@ void test_various_cases() { std::copy_if(data.begin(), data.end(), expected.begin(), [&](const X& x) { return x.i != excluded; }); CHECK_EQ(i, expected); - CHECK_EQ(i2, expected); + + if (!ss::setup::throw_on_error) { + CHECK_EQ(i2, expected); + } } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; while (!p.eof()) { - auto a = p.get_object, double, std::string>(); - if (p.valid()) { - i.push_back(a); + try { + auto a = p.template get_object, double, + std::string>(); + if (p.valid()) { + i.push_back(a); + } + } catch (...) { + // ignore } } - for (auto&& a : - p2.iterate_object, double, std::string>()) { - if (p2.valid()) { - i2.push_back(std::move(a)); + if (!ss::setup::throw_on_error) { + for (auto&& a : p2.template iterate_object, + double, std::string>()) { + if (p2.valid()) { + i2.push_back(std::move(a)); + } } } std::vector expected = {{3, 4, "y"}}; CHECK_EQ(i, expected); - CHECK_EQ(i2, expected); + if (!ss::setup::throw_on_error) { + CHECK_EQ(i2, expected); + } } { @@ -347,17 +393,17 @@ void test_various_cases() { make_and_write(empty_f.name, empty_data); - ss::parser p{empty_f.name, ","}; + auto [p, _] = make_parser(empty_f.name, ","); std::vector i; - ss::parser p2{empty_f.name, ","}; + auto [p2, __] = make_parser(empty_f.name, ","); std::vector i2; while (!p.eof()) { - i.push_back(p.get_next()); + i.push_back(p.template get_next()); } - for (auto&& a : p2.iterate()) { + for (auto&& a : p2.template iterate()) { i2.push_back(std::move(a)); } @@ -367,9 +413,12 @@ void test_various_cases() { } TEST_CASE("parser test various cases") { - test_various_cases(); - test_various_cases(); - test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); } using test_tuple = std::tuple; @@ -385,7 +434,7 @@ struct test_struct { static inline void expect_test_struct(const test_struct&) { } -template +template void test_composite_conversion() { unique_file_name f{"test_parser"}; { @@ -397,7 +446,7 @@ void test_composite_conversion() { } } - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); auto fail = [] { FAIL(""); }; auto expect_error = [](auto error) { CHECK(!error.empty()); }; auto ignore_error = [] {}; @@ -609,7 +658,8 @@ void test_composite_conversion() { // various scenarios TEST_CASE("parser test composite conversion") { - test_composite_conversion(); + test_composite_conversion(); + test_composite_conversion(); } struct my_string { @@ -653,7 +703,7 @@ struct xyz { } }; -template +template void test_moving_of_parsed_composite_values() { // to compile is enough return; @@ -669,8 +719,10 @@ void test_moving_of_parsed_composite_values() { } TEST_CASE("parser test the moving of parsed composite values") { - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); } TEST_CASE("parser test error mode") { @@ -681,12 +733,23 @@ TEST_CASE("parser test error mode") { out << "junk" << std::endl; } - ss::parser p(f.name, ","); + { + auto [p, _] = make_parser(f.name, ","); - REQUIRE_FALSE(p.eof()); - p.get_next(); - CHECK_FALSE(p.valid()); - CHECK_FALSE(p.error_msg().empty()); + REQUIRE_FALSE(p.eof()); + p.get_next(); + CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + p.get_next(); + CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); + } } TEST_CASE("parser throw on error mode") { @@ -1680,3 +1743,4 @@ TEST_CASE("parser test various cases with empty lines") { test_ignore_empty({}); } + From 7062888d7216e0fb46c10c25061aade335038404 Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 17 Feb 2024 01:07:29 +0100 Subject: [PATCH 02/57] Fix msvc build --- include/ss/common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index d16475f..430f8ee 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -31,7 +31,7 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { } #else using ssize_t = int64_t; -inline ssize_t get_line(char** lineptr, size_t* n, FILE* stream) { +inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { size_t pos; int c; From 6c859959d6d0ee8cfd9d89e0ffbe3be286059da3 Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 17 Feb 2024 17:43:33 +0100 Subject: [PATCH 03/57] Add more unit tests for buffer mode --- test/test_parser.cpp | 57 +++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 16 deletions(-) diff --git a/test/test_parser.cpp b/test/test_parser.cpp index 865c3de..d1fc036 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -110,7 +110,12 @@ std::string make_buffer(const std::string& file_name) { out.reserve(sizeof(out) + 1); while (in >> tmp) { out += tmp; - out.append("\n"); + if (in.peek() == '\n') { + out += "\n"; + } + if (in.peek() == '\r') { + out += "\r\n"; + } } return out; } @@ -760,14 +765,28 @@ TEST_CASE("parser throw on error mode") { out << "junk" << std::endl; } - ss::parser p(f.name, ","); + { + auto [p, _] = make_parser(f.name, ","); - REQUIRE_FALSE(p.eof()); - try { - p.get_next(); - FAIL("Expected exception..."); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); + REQUIRE_FALSE(p.eof()); + try { + p.get_next(); + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + try { + p.get_next(); + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } } } @@ -778,7 +797,7 @@ static inline std::string no_quote(const std::string& s) { return s; } -template +template void test_quote_multiline() { unique_file_name f{"test_parser"}; std::vector data = {{1, 2, "\"x\r\nx\nx\""}, @@ -799,7 +818,10 @@ void test_quote_multiline() { } } - ss::parser, Ts...> p{f.name, ","}; + auto [p, buff] = + make_parser, Ts...>(f.name, + ","); + std::vector i; while (!p.eof()) { @@ -812,9 +834,10 @@ void test_quote_multiline() { } CHECK_EQ(i, data); - ss::parser, Ts...> p_no_multiline{f.name, ","}; + auto [p_no_multiline, __] = + make_parser, Ts...>(f.name, ","); while (!p.eof()) { - auto command = [&] { + auto command = [&p_no_multiline = p_no_multiline] { p_no_multiline.template get_next(); }; expect_error_on_command(p_no_multiline, command); @@ -822,9 +845,12 @@ void test_quote_multiline() { } TEST_CASE("parser test csv on multiple lines with quotes") { - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); } static inline std::string no_escape(std::string& s) { @@ -1743,4 +1769,3 @@ TEST_CASE("parser test various cases with empty lines") { test_ignore_empty({}); } - From ce03c371ae7e6b15568cb978aa326351919f2d8f Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 17 Feb 2024 21:05:30 +0100 Subject: [PATCH 04/57] Split parser tests into multiple files, add more tests for buffer mode --- test/meson.build | 5 +- test/test_parser.cpp | 1771 --------------------------------------- test/test_parser1.hpp | 151 ++++ test/test_parser1_1.cpp | 534 ++++++++++++ test/test_parser1_2.cpp | 309 +++++++ test/test_parser1_3.cpp | 515 ++++++++++++ test/test_parser1_4.cpp | 291 +++++++ 7 files changed, 1804 insertions(+), 1772 deletions(-) delete mode 100644 test/test_parser.cpp create mode 100644 test/test_parser1.hpp create mode 100644 test/test_parser1_1.cpp create mode 100644 test/test_parser1_2.cpp create mode 100644 test/test_parser1_3.cpp create mode 100644 test/test_parser1_4.cpp diff --git a/test/meson.build b/test/meson.build index 25bf963..454b16f 100644 --- a/test/meson.build +++ b/test/meson.build @@ -2,7 +2,10 @@ doctest_dep = dependency('doctest') add_project_arguments('-DDOCTEST_CONFIG_IMPLEMENT_WITH_MAIN', language: 'cpp') tests = [ - 'parser', + 'parser1_1', + 'parser1_2', + 'parser1_3', + 'parser1_4', 'splitter', 'converter', 'extractions', diff --git a/test/test_parser.cpp b/test/test_parser.cpp deleted file mode 100644 index d1fc036..0000000 --- a/test/test_parser.cpp +++ /dev/null @@ -1,1771 +0,0 @@ -#include "test_helpers.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace { -[[maybe_unused]] void replace_all(std::string& s, const std::string& from, - const std::string& to) { - if (from.empty()) return; - size_t start_pos = 0; - while ((start_pos = s.find(from, start_pos)) != std::string::npos) { - s.replace(start_pos, from.length(), to); - start_pos += to.length(); - } -} - -template -void expect_error_on_command(ss::parser& p, - const std::function command) { - if (ss::setup::throw_on_error) { - try { - command(); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } - } else { - command(); - CHECK(!p.valid()); - if constexpr (ss::setup::string_error) { - CHECK_FALSE(p.error_msg().empty()); - } - } -} - -[[maybe_unused]] void update_if_crlf(std::string& s) { -#ifdef _WIN32 - replace_all(s, "\r\n", "\n"); -#else - (void)(s); -#endif -} - -struct X { - constexpr static auto delim = ","; - constexpr static auto empty = "_EMPTY_"; - int i; - double d; - std::string s; - - std::string to_string() const { - if (s == empty) { - return ""; - } - - return std::to_string(i) - .append(delim) - .append(std::to_string(d)) - .append(delim) - .append(s); - } - auto tied() const { - return std::tie(i, d, s); - } -}; - -template -std::enable_if_t, bool> operator==(const T& lhs, - const T& rhs) { - return lhs.tied() == rhs.tied(); -} - -template -static void make_and_write(const std::string& file_name, - const std::vector& data, - const std::vector& header = {}) { - std::ofstream out{file_name}; - -#ifdef _WIN32 - std::vector new_lines = {"\n"}; -#else - std::vector new_lines = {"\n", "\r\n"}; -#endif - - for (const auto& i : header) { - if (&i != &header.front()) { - out << T::delim; - } - out << i; - } - - if (!header.empty()) { - out << new_lines.front(); - } - - for (size_t i = 0; i < data.size(); ++i) { - out << data[i].to_string() << new_lines[i % new_lines.size()]; - } -} - -std::string make_buffer(const std::string& file_name) { - std::ifstream in{file_name, std::ios::binary}; - std::string tmp; - std::string out; - out.reserve(sizeof(out) + 1); - while (in >> tmp) { - out += tmp; - if (in.peek() == '\n') { - out += "\n"; - } - if (in.peek() == '\r') { - out += "\r\n"; - } - } - return out; -} - -template -std::tuple, std::string> make_parser( - const std::string& file_name, const std::string& delim) { - if (buffer_mode) { - auto buffer = make_buffer(file_name); - return {ss::parser{buffer.data(), buffer.size(), delim}, - std::move(buffer)}; - } else { - return {ss::parser{file_name, delim}, std::string{}}; - } -} - -} /* namespace */ - -TEST_CASE("test file not found") { - unique_file_name f{"test_parser"}; - - { - ss::parser p{f.name, ","}; - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - CHECK_FALSE(p.valid()); - } - - try { - ss::parser p{f.name, ","}; - FAIL("Expected exception..."); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } -} - -template -void test_various_cases() { - unique_file_name f{"test_parser"}; - std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, - {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; - make_and_write(f.name, data); - auto csv_data_buffer = make_buffer(f.name); - { - auto [p, _] = make_parser(f.name, ","); - ss::parser p0{std::move(p)}; - p = std::move(p0); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - auto move_rotate = [&p = p, &p0 = p0] { - auto p1 = std::move(p); - p0 = std::move(p1); - p = std::move(p0); - }; - - while (!p.eof()) { - move_rotate(); - auto a = p.template get_next(); - i.emplace_back(ss::to_object(a)); - } - - for (const auto& a : p2.template iterate()) { - i2.emplace_back(ss::to_object(a)); - } - - CHECK_EQ(i, data); - CHECK_EQ(i2, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - auto [p3, ___] = make_parser(f.name, ","); - std::vector i3; - - std::vector expected = {std::begin(data) + 1, std::end(data)}; - using tup = std::tuple; - - p.ignore_next(); - while (!p.eof()) { - auto a = p.template get_next(); - i.emplace_back(ss::to_object(a)); - } - - p2.ignore_next(); - for (const auto& a : p2.template iterate()) { - i2.emplace_back(ss::to_object(a)); - } - - p3.ignore_next(); - for (auto it = p3.template iterate().begin(); - it != p3.template iterate().end(); ++it) { - i3.emplace_back(ss::to_object(*it)); - } - - CHECK_EQ(i, expected); - CHECK_EQ(i2, expected); - CHECK_EQ(i3, expected); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - while (!p.eof()) { - i.push_back(p.template get_object()); - } - - for (auto&& a : - p2.template iterate_object()) { - i2.push_back(std::move(a)); - } - - CHECK_EQ(i, data); - CHECK_EQ(i2, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - for (auto&& a : - p.template iterate_object()) { - i.push_back(std::move(a)); - } - - CHECK_EQ(i, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - using tup = std::tuple; - while (!p.eof()) { - i.push_back(p.template get_object()); - } - - for (auto it = p2.template iterate_object().begin(); - it != p2.template iterate_object().end(); it++) { - i2.push_back({it->i, it->d, it->s}); - } - - CHECK_EQ(i, data); - CHECK_EQ(i2, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - using tup = std::tuple; - for (auto&& a : p.template iterate_object()) { - i.push_back(std::move(a)); - } - - CHECK_EQ(i, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - while (!p.eof()) { - i.push_back(p.template get_next()); - } - - CHECK_EQ(i, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - for (auto&& a : p.template iterate()) { - i.push_back(std::move(a)); - } - - CHECK_EQ(i, data); - } - - { - constexpr int excluded = 3; - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - while (!p.eof()) { - try { - auto a = p.template get_object, double, - std::string>(); - if (p.valid()) { - i.push_back(a); - } - } catch (...) { - // ignore - }; - } - - if (!ss::setup::throw_on_error) { - for (auto&& a : p2.template iterate_object, - double, std::string>()) { - if (p2.valid()) { - i2.push_back(std::move(a)); - } - } - } - - std::vector expected; - for (auto& x : data) { - if (x.i != excluded) { - expected.push_back(x); - } - } - - std::copy_if(data.begin(), data.end(), expected.begin(), - [&](const X& x) { return x.i != excluded; }); - CHECK_EQ(i, expected); - - if (!ss::setup::throw_on_error) { - CHECK_EQ(i2, expected); - } - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - while (!p.eof()) { - try { - auto a = p.template get_object, double, - std::string>(); - if (p.valid()) { - i.push_back(a); - } - } catch (...) { - // ignore - } - } - - if (!ss::setup::throw_on_error) { - for (auto&& a : p2.template iterate_object, - double, std::string>()) { - if (p2.valid()) { - i2.push_back(std::move(a)); - } - } - } - - std::vector expected = {{3, 4, "y"}}; - CHECK_EQ(i, expected); - if (!ss::setup::throw_on_error) { - CHECK_EQ(i2, expected); - } - } - - { - unique_file_name empty_f{"test_parser"}; - std::vector empty_data = {}; - - make_and_write(empty_f.name, empty_data); - - auto [p, _] = make_parser(empty_f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(empty_f.name, ","); - std::vector i2; - - while (!p.eof()) { - i.push_back(p.template get_next()); - } - - for (auto&& a : p2.template iterate()) { - i2.push_back(std::move(a)); - } - - CHECK(i.empty()); - CHECK(i2.empty()); - } -} - -TEST_CASE("parser test various cases") { - test_various_cases(); - test_various_cases(); - test_various_cases(); - test_various_cases(); - test_various_cases(); - test_various_cases(); -} - -using test_tuple = std::tuple; -struct test_struct { - int i; - double d; - char c; - auto tied() { - return std::tie(i, d, c); - } -}; - -static inline void expect_test_struct(const test_struct&) { -} - -template -void test_composite_conversion() { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - for (auto& i : - {"10,a,11.1", "10,20,11.1", "junk", "10,11.1", "1,11.1,a", "junk", - "10,junk", "11,junk", "10,11.1,c", "10,20", "10,22.2,f"}) { - out << i << std::endl; - } - } - - auto [p, _] = make_parser(f.name, ","); - auto fail = [] { FAIL(""); }; - auto expect_error = [](auto error) { CHECK(!error.empty()); }; - auto ignore_error = [] {}; - - REQUIRE(p.valid()); - REQUIRE_FALSE(p.eof()); - - { - constexpr static auto expectedData = std::tuple{10, 'a', 11.1}; - - auto [d1, d2, d3, d4] = - p.template try_next(fail) - .template or_else(fail) - .template or_else( - [&](auto&& data) { CHECK_EQ(data, expectedData); }) - .on_error(fail) - .template or_else(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE(d3); - REQUIRE_FALSE(d4); - CHECK_EQ(*d3, expectedData); - } - - { - REQUIRE(!p.eof()); - constexpr static auto expectedData = std::tuple{10, 20, 11.1}; - - auto [d1, d2, d3, d4] = - p.template try_next( - [&](auto& i1, auto i2, double d) { - CHECK_EQ(std::tie(i1, i2, d), expectedData); - }) - .on_error(fail) - .template or_object(fail) - .on_error(fail) - .template or_else(fail) - .on_error(fail) - .template or_else(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - REQUIRE_FALSE(d3); - REQUIRE_FALSE(d4); - CHECK_EQ(*d1, expectedData); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2, d3, d4, d5] = - p.template try_object(fail) - .on_error(expect_error) - .template or_else(fail) - .template or_else(fail) - .template or_else(fail) - .template or_else(fail) - .values(); - - REQUIRE_FALSE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE_FALSE(d3); - REQUIRE_FALSE(d4); - REQUIRE_FALSE(d5); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2] = - p.template try_next([](auto& i, auto& d) { - REQUIRE_EQ(std::tie(i, d), std::tuple{10, 11.1}); - }) - .template or_else([](auto&, auto&) { FAIL(""); }) - .values(); - - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2] = - p.template try_next([](auto&, auto&) { FAIL(""); }) - .template or_else(expect_test_struct) - .values(); - - REQUIRE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE(d2); - CHECK_EQ(d2->tied(), std::tuple{1, 11.1, 'a'}); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2, d3, d4, d5] = - p.template try_next(fail) - .template or_object() - .template or_else(expect_test_struct) - .template or_else(fail) - .template or_else>(fail) - .on_error(ignore_error) - .on_error(expect_error) - .values(); - - REQUIRE_FALSE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE_FALSE(d3); - REQUIRE_FALSE(d4); - REQUIRE_FALSE(d5); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2] = - p.template try_next>() - .on_error(ignore_error) - .on_error(fail) - .template or_else>(fail) - .on_error(ignore_error) - .on_error(fail) - .on_error(ignore_error) - .values(); - - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - CHECK_EQ(*d1, std::tuple{10, std::nullopt}); - } - - { - REQUIRE_FALSE(p.eof()); - - auto [d1, d2] = - p.template try_next>() - .on_error(fail) - .template or_else>(fail) - .on_error(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - CHECK_EQ(*d1, std::tuple{11, std::variant{"junk"}}); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2] = p.template try_object() - .template or_else(fail) - .values(); - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - CHECK_EQ(d1->tied(), std::tuple{10, 11.1, 'c'}); - } - - { - REQUIRE_FALSE(p.eof()); - - auto [d1, d2, d3, d4] = - p.template try_next([] { return false; }) - .template or_else([](auto&) { return false; }) - .template or_else() - .template or_else(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE(d3); - REQUIRE_FALSE(d4); - CHECK_EQ(d3.value(), std::tuple{10, 20}); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2, d3, d4] = - p.template try_object( - [] { return false; }) - .template or_else([](auto&) { return false; }) - .template or_object() - .template or_else(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE(d3); - REQUIRE_FALSE(d4); - CHECK_EQ(d3->tied(), std::tuple{10, 22.2, 'f'}); - } - - CHECK(p.eof()); -} - -// various scenarios -TEST_CASE("parser test composite conversion") { - test_composite_conversion(); - test_composite_conversion(); -} - -struct my_string { - char* data{nullptr}; - - my_string() = default; - - ~my_string() { - delete[] data; - } - - // make sure no object is copied - my_string(const my_string&) = delete; - my_string& operator=(const my_string&) = delete; - - my_string(my_string&& other) : data{other.data} { - other.data = nullptr; - } - - my_string& operator=(my_string&& other) { - data = other.data; - return *this; - } -}; - -template <> -inline bool ss::extract(const char* begin, const char* end, my_string& s) { - size_t size = end - begin; - s.data = new char[size + 1]; - strncpy(s.data, begin, size); - s.data[size] = '\0'; - return true; -} - -struct xyz { - my_string x; - my_string y; - my_string z; - auto tied() { - return std::tie(x, y, z); - } -}; - -template -void test_moving_of_parsed_composite_values() { - // to compile is enough - return; - ss::parser p{"", ""}; - p.template try_next() - .template or_else( - [](auto&&) {}) - .template or_else([](auto&) {}) - .template or_else([](auto&&) {}) - .template or_object([](auto&&) {}) - .template or_else>( - [](auto&, auto&, auto&) {}); -} - -TEST_CASE("parser test the moving of parsed composite values") { - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); -} - -TEST_CASE("parser test error mode") { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "junk" << std::endl; - out << "junk" << std::endl; - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - p.get_next(); - CHECK_FALSE(p.valid()); - CHECK_FALSE(p.error_msg().empty()); - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - p.get_next(); - CHECK_FALSE(p.valid()); - CHECK_FALSE(p.error_msg().empty()); - } -} - -TEST_CASE("parser throw on error mode") { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "junk" << std::endl; - out << "junk" << std::endl; - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - try { - p.get_next(); - FAIL("Expected exception..."); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - try { - p.get_next(); - FAIL("Expected exception..."); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } - } -} - -static inline std::string no_quote(const std::string& s) { - if (!s.empty() && s[0] == '"') { - return {std::next(begin(s)), std::prev(end(s))}; - } - return s; -} - -template -void test_quote_multiline() { - unique_file_name f{"test_parser"}; - std::vector data = {{1, 2, "\"x\r\nx\nx\""}, - {3, 4, "\"y\ny\r\ny\""}, - {5, 6, "\"z\nz\""}, - {7, 8, "\"u\"\"\""}, - {9, 10, "v"}, - {11, 12, "\"w\n\""}}; - for (auto& [_, __, s] : data) { - update_if_crlf(s); - } - - make_and_write(f.name, data); - for (auto& [_, __, s] : data) { - s = no_quote(s); - if (s[0] == 'u') { - s = "u\""; - } - } - - auto [p, buff] = - make_parser, Ts...>(f.name, - ","); - - std::vector i; - - while (!p.eof()) { - auto a = p.template get_next(); - i.emplace_back(ss::to_object(a)); - } - - for (auto& [_, __, s] : i) { - update_if_crlf(s); - } - CHECK_EQ(i, data); - - auto [p_no_multiline, __] = - make_parser, Ts...>(f.name, ","); - while (!p.eof()) { - auto command = [&p_no_multiline = p_no_multiline] { - p_no_multiline.template get_next(); - }; - expect_error_on_command(p_no_multiline, command); - } -} - -TEST_CASE("parser test csv on multiple lines with quotes") { - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); -} - -static inline std::string no_escape(std::string& s) { - s.erase(std::remove(begin(s), end(s), '\\'), end(s)); - return s; -} - -template -void test_escape_multiline() { - unique_file_name f{"test_parser"}; - std::vector data = {{1, 2, "x\\\nx\\\r\nx"}, - {5, 6, "z\\\nz\\\nz"}, - {7, 8, "u"}, - {3, 4, "y\\\ny\\\ny"}, - {9, 10, "v\\\\"}, - {11, 12, "w\\\n"}}; - for (auto& [_, __, s] : data) { - update_if_crlf(s); - } - - make_and_write(f.name, data); - for (auto& [_, __, s] : data) { - s = no_escape(s); - if (s == "v") { - s = "v\\"; - } - } - - ss::parser, Ts...> p{f.name, ","}; - std::vector i; - - while (!p.eof()) { - auto a = p.template get_next(); - i.emplace_back(ss::to_object(a)); - } - - for (auto& [_, __, s] : i) { - update_if_crlf(s); - } - CHECK_EQ(i, data); - - ss::parser, Ts...> p_no_multiline{f.name, ","}; - while (!p.eof()) { - auto command = [&] { - auto a = - p_no_multiline.template get_next(); - }; - expect_error_on_command(p_no_multiline, command); - } -} - -TEST_CASE("parser test csv on multiple lines with escapes") { - test_escape_multiline(); - test_escape_multiline(); - test_escape_multiline(); -} - -template -void test_quote_escape_multiline() { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "1,2,\"just\\\n\nstrings\"" << std::endl; -#ifndef _WIN32 - out << "3,4,\"just\r\nsome\\\r\n\n\\\nstrings\"" << std::endl; - out << "5,6,\"just\\\n\\\r\n\r\n\nstrings" << std::endl; -#else - out << "3,4,\"just\nsome\\\n\n\\\nstrings\"" << std::endl; - out << "5,6,\"just\\\n\\\n\n\nstrings" << std::endl; -#endif - out << "7,8,\"just strings\"" << std::endl; - out << "9,10,just strings" << std::endl; - } - size_t bad_lines = 1; - auto num_errors = 0; - - ss::parser, ss::quote<'"'>, Ts...> p{ - f.name}; - std::vector i; - - while (!p.eof()) { - try { - auto a = p.template get_next(); - if (p.valid()) { - i.emplace_back(ss::to_object(a)); - } else { - ++num_errors; - } - } catch (const std::exception& e) { - ++num_errors; - } - } - - CHECK(bad_lines == num_errors); - - std::vector data = {{1, 2, "just\n\nstrings"}, -#ifndef _WIN32 - {3, 4, "just\r\nsome\r\n\n\nstrings"}, -#else - {3, 4, "just\nsome\n\n\nstrings"}, -#endif - {9, 10, "just strings"}}; - - for (auto& [_, __, s] : i) { - update_if_crlf(s); - } - CHECK_EQ(i, data); -} - -TEST_CASE("parser test csv on multiple lines with quotes and escapes") { - test_quote_escape_multiline(); - test_quote_escape_multiline(); - test_quote_escape_multiline(); -} - -template -void test_multiline_restricted() { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "1,2,\"just\n\nstrings\"" << std::endl; -#ifndef _WIN32 - out << "3,4,\"ju\n\r\n\nnk\"" << std::endl; - out << "5,6,just\\\n\\\r\nstrings" << std::endl; -#else - out << "3,4,\"ju\n\n\nnk\"" << std::endl; - out << "5,6,just\\\n\\\nstrings" << std::endl; -#endif - out << "7,8,ju\\\n\\\n\\\nnk" << std::endl; - out << "9,10,\"just\\\n\nstrings\"" << std::endl; - out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl; - out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl; - out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl; - out << "19,20,just strings" << std::endl; - } - auto bad_lines = 15; - auto num_errors = 0; - - ss::parser, ss::quote<'"'>, ss::escape<'\\'>, - Ts...> - p{f.name, ","}; - std::vector i; - - while (!p.eof()) { - try { - auto a = p.template get_next(); - if (p.valid()) { - i.emplace_back(ss::to_object(a)); - } else { - ++num_errors; - } - } catch (const std::exception& e) { - ++num_errors; - } - } - - CHECK(bad_lines == num_errors); - - std::vector data = {{1, 2, "just\n\nstrings"}, -#ifndef _WIN32 - {5, 6, "just\n\r\nstrings"}, -#else - {5, 6, "just\n\nstrings"}, -#endif - {9, 10, "just\n\nstrings"}, - {19, 20, "just strings"}}; - - for (auto& [_, __, s] : i) { - update_if_crlf(s); - } - - if (i.size() != data.size()) { - CHECK_EQ(i.size(), data.size()); - } - - CHECK_EQ(i, data); -} - -TEST_CASE("parser test multiline restricted") { - test_multiline_restricted(); - test_multiline_restricted(); - test_multiline_restricted(); -} - -template -void test_unterminated_line_impl(const std::vector& lines, - size_t bad_line) { - unique_file_name f{"test_parser"}; - std::ofstream out{f.name}; - for (const auto& line : lines) { - out << line << std::endl; - } - out.close(); - - ss::parser p{f.name}; - size_t line = 0; - while (!p.eof()) { - auto command = [&] { p.template get_next(); }; - - if (line == bad_line) { - expect_error_on_command(p, command); - break; - } else { - command(); - CHECK(p.valid()); - ++line; - } - } -} - -template -void test_unterminated_line(const std::vector& lines, - size_t bad_line) { - test_unterminated_line_impl(lines, bad_line); - test_unterminated_line_impl(lines, bad_line); - test_unterminated_line_impl(lines, bad_line); -} - -TEST_CASE("parser test csv on multiline with errors") { - using multiline = ss::multiline_restricted<3>; - using escape = ss::escape<'\\'>; - using quote = ss::quote<'"'>; - - // unterminated escape - { - const std::vector lines{"1,2,just\\"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"1,2,just\\", "9,8,second"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", "1,2,just\\"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "1,2,just\\", - "3,4,third"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", - "1,2,just\\\nstrings\\", - "3,4,th\\\nird"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "3,4,second", - "1,2,just\\"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); - } - - { - const std::vector lines{"9,8,\\first", "3,4,second", - "1,2,jus\\t\\"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); - } - - // unterminated quote - { - const std::vector lines{"1,2,\"just"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"1,2,\"just", "9,8,second"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", "1,2,\"just"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "1,2,\"just", - "3,4,th\\,ird"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "3,4,second", - "1,2,\"just"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); - } - - { - const std::vector lines{"9,8,\"first\"", - "\"3\",4,\"sec,ond\"", - "1,2,\"ju\"\"st"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); - } - - // unterminated quote and escape - { - const std::vector lines{"1,2,\"just\\"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"1,2,\"just\\\n\\"}; - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"1,2,\"just\n\\"}; - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", "1,2,\"just\n\\"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "1,2,\"just\n\\", - "4,3,thrid"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,f\\\nirst", "1,2,\"just\n\\", - "4,3,thrid"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,\"f\ni\nrst\"", - "1,2,\"just\n\\", "4,3,thrid"}; - test_unterminated_line(lines, 1); - } - - // multiline limmit reached escape - { - const std::vector lines{"1,2,\\\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", - "1,2,\\\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,fi\\\nrs\\\nt", - "1,2,\\\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", - "1,2,\\\n\\\n\\\n\\\njust", - "4,3,third"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - // multiline limmit reached quote - { - const std::vector lines{"1,2,\"\n\n\n\n\njust\""}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", - "1,2,\"\n\n\n\n\njust\""}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,\"fir\nst\"", - "1,2,\"\n\n\n\n\njust\""}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - // multiline limmit reached quote and escape - { - const std::vector lines{"1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", - "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,fi\\\nrst", - "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,\"fi\nrst\"", - "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,\"fi\nr\\\nst\"", - "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - } -} - -template -struct has_type; - -template -struct has_type> - : std::disjunction...> {}; - -static inline void check_size(size_t size1, size_t size2) { - CHECK_EQ(size1, size2); -} - -template -static void test_fields_impl(const std::string file_name, - const std::vector& data, - const std::vector& fields) { - using CaseType = std::tuple; - - ss::parser p{file_name, ","}; - CHECK_FALSE(p.field_exists("Unknown")); - p.use_fields(fields); - std::vector i; - - for (const auto& a : p.template iterate()) { - i.push_back(a); - } - - check_size(i.size(), data.size()); - for (size_t j = 0; j < i.size(); ++j) { - if constexpr (has_type::value) { - CHECK_EQ(std::get(i[j]), data[j].i); - } - if constexpr (has_type::value) { - CHECK_EQ(std::get(i[j]), data[j].d); - } - if constexpr (has_type::value) { - CHECK_EQ(std::get(i[j]), data[j].s); - } - } -} - -template -static void test_fields(const std::string file_name, const std::vector& data, - const std::vector& fields) { - test_fields_impl, Ts...>(file_name, data, fields); - test_fields_impl, Ts...>(file_name, data, - fields); - test_fields_impl, Ts...>(file_name, data, - fields); -} - -TEST_CASE("parser test various cases with header") { - unique_file_name f{"test_parser"}; - constexpr static auto Int = "Int"; - constexpr static auto Dbl = "Double"; - constexpr static auto Str = "String"; - using str = std::string; - - std::vector header{Int, Dbl, Str}; - - std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, - {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; - - make_and_write(f.name, data, header); - const auto& o = f.name; - const auto& d = data; - - { - ss::parser p{f.name, ","}; - std::vector i; - - for (const auto& a : p.iterate()) { - i.emplace_back(ss::to_object(a)); - } - - CHECK_NE(i, data); - } - - { - ss::parser p{f.name, ","}; - std::vector i; - - p.ignore_next(); - for (const auto& a : p.iterate()) { - i.emplace_back(ss::to_object(a)); - } - - CHECK_EQ(i, data); - } - - { - ss::parser p{f.name, ","}; - std::vector i; - - for (const auto& a : p.iterate()) { - i.emplace_back(ss::to_object(a)); - } - - CHECK_EQ(i, data); - } - - { - ss::parser p{f.name, ","}; - p.use_fields(Int, Dbl, Str); - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - CHECK_FALSE(p.field_exists("Unknown")); - - p.use_fields(Int, "Unknown"); - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - p.use_fields(Int, Int); - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - p.use_fields(Int, Dbl); - - { - auto [int_, double_] = p.get_next(); - CHECK_EQ(int_, data[0].i); - CHECK_EQ(double_, data[0].d); - } - - p.use_fields(Dbl, Int); - - { - auto [double_, int_] = p.get_next(); - CHECK_EQ(int_, data[1].i); - CHECK_EQ(double_, data[1].d); - } - - p.use_fields(Str); - - { - auto string_ = p.get_next(); - CHECK_EQ(string_, data[2].s); - } - - p.use_fields(Str, Int, Dbl); - - { - auto [string_, int_, double_] = - p.get_next(); - CHECK_EQ(double_, data[3].d); - CHECK_EQ(int_, data[3].i); - CHECK_EQ(string_, data[3].s); - } - } - - /* python used to generate permutations - import itertools - - header = {'str': 'Str', - 'double': 'Dbl', - 'int': 'Int'} - - keys = ['str', 'int', 'double'] - - for r in range (1, 3): - combinations = list(itertools.permutations(keys, r = r)) - - for combination in combinations: - template_params = [] - arg_params = [] - for type in combination: - template_params.append(type) - arg_params.append(header[type]) - call = 'testFields<' + ', '.join(template_params) + \ - '>(o, d, {' + ', '.join(arg_params) + '});' - print(call) - */ - - test_fields(o, d, {Str}); - test_fields(o, d, {Int}); - test_fields(o, d, {Dbl}); - test_fields(o, d, {Str, Int}); - test_fields(o, d, {Str, Dbl}); - test_fields(o, d, {Int, Str}); - test_fields(o, d, {Int, Dbl}); - test_fields(o, d, {Dbl, Str}); - test_fields(o, d, {Dbl, Int}); - test_fields(o, d, {Str, Int, Dbl}); - test_fields(o, d, {Str, Dbl, Int}); - test_fields(o, d, {Int, Str, Dbl}); - test_fields(o, d, {Int, Dbl, Str}); - test_fields(o, d, {Dbl, Str, Int}); - test_fields(o, d, {Dbl, Int, Str}); -} - -template -void test_invalid_fields_impl(const std::vector& lines, - const std::vector& fields) { - unique_file_name f{"test_parser"}; - std::ofstream out{f.name}; - for (const auto& line : lines) { - out << line << std::endl; - } - out.close(); - - { - // No fields specified - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields(); }; - expect_error_on_command(p, command); - } - - { - // Unknown field - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields("Unknown"); }; - expect_error_on_command(p, command); - } - - { - // Field used multiple times - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields(fields.at(0), fields.at(0)); }; - if (!fields.empty()) { - expect_error_on_command(p, command); - } - } - - { - // Mapping out of range - ss::parser p{f.name, ","}; - auto command = [&] { - p.use_fields(fields.at(0)); - p.template get_next(); - }; - if (!fields.empty()) { - expect_error_on_command(p, command); - } - } - - { - // Invalid header - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields(fields); }; - - if (!fields.empty()) { - // Pass if there are no duplicates, fail otherwise - if (std::unordered_set{fields.begin(), fields.end()} - .size() != fields.size()) { - expect_error_on_command(p, command); - } else { - command(); - CHECK(p.valid()); - if (!p.valid()) { - if constexpr (ss::setup::string_error) { - std::cout << p.error_msg() << std::endl; - } - } - } - } - } -} - -template -void test_invalid_fields(const std::vector& lines, - const std::vector& fields) { - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); -} - -TEST_CASE("parser test invalid header fields usage") { - test_invalid_fields({}, {}); - - test_invalid_fields({"Int"}, {"Int"}); - test_invalid_fields({"Int", "1"}, {"Int"}); - test_invalid_fields({"Int", "1", "2"}, {"Int"}); - - test_invalid_fields({"Int,String"}, {"Int", "String"}); - test_invalid_fields({"Int,String", "1,hi"}, {"Int", "String"}); - test_invalid_fields({"Int,String", "2,hello"}, {"Int", "String"}); - - test_invalid_fields({"Int,String,Double"}, {"Int", "String", "Double"}); - test_invalid_fields({"Int,String,Double", "1,hi,2.34"}, - {"Int", "String", "Double"}); - test_invalid_fields({"Int,String,Double", "1,hi,2.34", "2,hello,3.45"}, - {"Int", "String", "Double"}); - - test_invalid_fields({"Int,Int,Int"}, {"Int", "Int", "Int"}); - test_invalid_fields({"Int,Int,Int", "1,2,3"}, {"Int", "Int", "Int"}); - - test_invalid_fields({"Int,String,Int"}, {"Int", "String", "Int"}); - test_invalid_fields({"Int,String,Int", "1,hi,3"}, {"Int", "String", "Int"}); -} - -template -void test_invalid_rows_with_header() { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "Int,String,Double" << std::endl; - out << "1,line1,2.34" << std::endl; - out << "2,line2" << std::endl; - out << "3,line3,67.8" << std::endl; - out << "4,line4,67.8,9" << std::endl; - out << "5,line5,9.10" << std::endl; - out << "six,line6,10.11" << std::endl; - } - - { - ss::parser p{f.name}; - - p.use_fields("Int", "String", "Double"); - using data = std::tuple; - std::vector i; - - CHECK(p.valid()); - - while (!p.eof()) { - try { - const auto& t = p.template get_next(); - if (p.valid()) { - i.push_back(t); - } - } catch (const ss::exception&) { - continue; - } - } - - std::vector expected = {{1, "line1", 2.34}, - {3, "line3", 67.8}, - {5, "line5", 9.10}}; - CHECK_EQ(i, expected); - } - - { - ss::parser p{f.name}; - - p.use_fields("Double", "Int"); - using data = std::tuple; - std::vector i; - - CHECK(p.valid()); - - while (!p.eof()) { - try { - const auto& t = p.template get_next(); - if (p.valid()) { - i.push_back(t); - } - } catch (const ss::exception&) { - continue; - } - } - - std::vector expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}}; - CHECK_EQ(i, expected); - } - - { - ss::parser p{f.name}; - - p.use_fields("String", "Double"); - using data = std::tuple; - std::vector i; - - CHECK(p.valid()); - - while (!p.eof()) { - try { - const auto& t = p.template get_next(); - if (p.valid()) { - i.push_back(t); - } - } catch (const ss::exception&) { - continue; - } - } - - std::vector expected = {{"line1", 2.34}, - {"line3", 67.8}, - {"line5", 9.10}, - {"line6", 10.11}}; - CHECK_EQ(i, expected); - } -} - -TEST_CASE("parser test invalid rows with header") { - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); -} - -template -void test_ignore_empty_impl(const std::vector& data) { - unique_file_name f{"test_parser"}; - make_and_write(f.name, data); - - std::vector expected; - for (const auto& d : data) { - if (d.s != X::empty) { - expected.push_back(d); - } - } - - { - ss::parser p{f.name, ","}; - - std::vector i; - for (const auto& a : p.template iterate()) { - i.push_back(a); - } - - CHECK_EQ(i, expected); - } - - { - ss::parser p{f.name, ","}; - std::vector i; - size_t n = 0; - while (!p.eof()) { - try { - ++n; - const auto& a = p.template get_next(); - if (data.at(n - 1).s == X::empty) { - CHECK_FALSE(p.valid()); - continue; - } - i.push_back(a); - } catch (...) { - CHECK_EQ(data.at(n - 1).s, X::empty); - } - } - - CHECK_EQ(i, expected); - } -} - -template -void test_ignore_empty(const std::vector& data) { - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); -} - -TEST_CASE("parser test various cases with empty lines") { - test_ignore_empty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); - - test_ignore_empty( - {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); - - test_ignore_empty( - {{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, "x"}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); - - test_ignore_empty( - {{1, 2, X::empty}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); - - test_ignore_empty( - {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, "x"}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); - - test_ignore_empty({{1, 2, X::empty}, - {3, 4, X::empty}, - {9, 10, X::empty}, - {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, "x"}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, X::empty}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, "w"}}); - - test_ignore_empty({{11, 12, X::empty}}); - - test_ignore_empty({}); -} diff --git a/test/test_parser1.hpp b/test/test_parser1.hpp new file mode 100644 index 0000000..a68a939 --- /dev/null +++ b/test/test_parser1.hpp @@ -0,0 +1,151 @@ +#pragma once + +#include "test_helpers.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { +[[maybe_unused]] void replace_all(std::string& s, const std::string& from, + const std::string& to) { + if (from.empty()) return; + size_t start_pos = 0; + while ((start_pos = s.find(from, start_pos)) != std::string::npos) { + s.replace(start_pos, from.length(), to); + start_pos += to.length(); + } +} + +template +void expect_error_on_command(ss::parser& p, + const std::function command) { + if (ss::setup::throw_on_error) { + try { + command(); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } + } else { + command(); + CHECK(!p.valid()); + if constexpr (ss::setup::string_error) { + CHECK_FALSE(p.error_msg().empty()); + } + } +} + +[[maybe_unused]] void update_if_crlf(std::string& s) { +#ifdef _WIN32 + replace_all(s, "\r\n", "\n"); +#else + (void)(s); +#endif +} + +struct X { + constexpr static auto delim = ","; + constexpr static auto empty = "_EMPTY_"; + int i; + double d; + std::string s; + + std::string to_string() const { + if (s == empty) { + return ""; + } + + return std::to_string(i) + .append(delim) + .append(std::to_string(d)) + .append(delim) + .append(s); + } + auto tied() const { + return std::tie(i, d, s); + } +}; + +template +std::enable_if_t, bool> operator==(const T& lhs, + const T& rhs) { + return lhs.tied() == rhs.tied(); +} + +template +static void make_and_write(const std::string& file_name, + const std::vector& data, + const std::vector& header = {}) { + std::ofstream out{file_name}; + +#ifdef _WIN32 + std::vector new_lines = {"\n"}; +#else + std::vector new_lines = {"\n", "\r\n"}; +#endif + + for (const auto& i : header) { + if (&i != &header.front()) { + out << T::delim; + } + out << i; + } + + if (!header.empty()) { + out << new_lines.front(); + } + + for (size_t i = 0; i < data.size(); ++i) { + out << data[i].to_string() << new_lines[i % new_lines.size()]; + } +} + +std::string make_buffer(const std::string& file_name) { + std::ifstream in{file_name, std::ios::binary}; + std::string tmp; + std::string out; + out.reserve(sizeof(out) + 1); + while (in >> tmp) { + out += tmp; + std::string matches = "\n\r\t "; + while (std::any_of(matches.begin(), matches.end(), + [&](auto c) { return in.peek() == c; })) { + if (in.peek() == '\r') { + out += "\r\n"; + in.ignore(2); + } else { + out += std::string{static_cast(in.peek())}; + in.ignore(1); + } + } + } + return out; +} + +template +std::tuple, std::string> make_parser( + const std::string& file_name, const std::string& delim = "") { + if (buffer_mode) { + auto buffer = make_buffer(file_name); + if (delim.empty()) { + return {ss::parser{buffer.data(), buffer.size()}, + std::move(buffer)}; + } else { + return {ss::parser{buffer.data(), buffer.size(), delim}, + std::move(buffer)}; + } + } else { + if (delim.empty()) { + return {ss::parser{file_name}, std::string{}}; + } else { + return {ss::parser{file_name, delim}, std::string{}}; + } + } +} + +} /* namespace */ diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp new file mode 100644 index 0000000..60787f2 --- /dev/null +++ b/test/test_parser1_1.cpp @@ -0,0 +1,534 @@ +#include "test_parser1.hpp" + +TEST_CASE("test file not found") { + unique_file_name f{"test_parser"}; + + { + ss::parser p{f.name, ","}; + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + CHECK_FALSE(p.valid()); + } + + try { + ss::parser p{f.name, ","}; + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } +} + +template +void test_various_cases() { + unique_file_name f{"test_parser"}; + std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, + {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; + make_and_write(f.name, data); + auto csv_data_buffer = make_buffer(f.name); + { + auto [p, _] = make_parser(f.name, ","); + ss::parser p0{std::move(p)}; + p = std::move(p0); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + auto move_rotate = [&p = p, &p0 = p0] { + auto p1 = std::move(p); + p0 = std::move(p1); + p = std::move(p0); + }; + + while (!p.eof()) { + move_rotate(); + auto a = p.template get_next(); + i.emplace_back(ss::to_object(a)); + } + + for (const auto& a : p2.template iterate()) { + i2.emplace_back(ss::to_object(a)); + } + + CHECK_EQ(i, data); + CHECK_EQ(i2, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + auto [p3, ___] = make_parser(f.name, ","); + std::vector i3; + + std::vector expected = {std::begin(data) + 1, std::end(data)}; + using tup = std::tuple; + + p.ignore_next(); + while (!p.eof()) { + auto a = p.template get_next(); + i.emplace_back(ss::to_object(a)); + } + + p2.ignore_next(); + for (const auto& a : p2.template iterate()) { + i2.emplace_back(ss::to_object(a)); + } + + p3.ignore_next(); + for (auto it = p3.template iterate().begin(); + it != p3.template iterate().end(); ++it) { + i3.emplace_back(ss::to_object(*it)); + } + + CHECK_EQ(i, expected); + CHECK_EQ(i2, expected); + CHECK_EQ(i3, expected); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + while (!p.eof()) { + i.push_back(p.template get_object()); + } + + for (auto&& a : + p2.template iterate_object()) { + i2.push_back(std::move(a)); + } + + CHECK_EQ(i, data); + CHECK_EQ(i2, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + for (auto&& a : + p.template iterate_object()) { + i.push_back(std::move(a)); + } + + CHECK_EQ(i, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + using tup = std::tuple; + while (!p.eof()) { + i.push_back(p.template get_object()); + } + + for (auto it = p2.template iterate_object().begin(); + it != p2.template iterate_object().end(); it++) { + i2.push_back({it->i, it->d, it->s}); + } + + CHECK_EQ(i, data); + CHECK_EQ(i2, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + using tup = std::tuple; + for (auto&& a : p.template iterate_object()) { + i.push_back(std::move(a)); + } + + CHECK_EQ(i, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + while (!p.eof()) { + i.push_back(p.template get_next()); + } + + CHECK_EQ(i, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + for (auto&& a : p.template iterate()) { + i.push_back(std::move(a)); + } + + CHECK_EQ(i, data); + } + + { + constexpr int excluded = 3; + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + while (!p.eof()) { + try { + auto a = p.template get_object, double, + std::string>(); + if (p.valid()) { + i.push_back(a); + } + } catch (...) { + // ignore + }; + } + + if (!ss::setup::throw_on_error) { + for (auto&& a : p2.template iterate_object, + double, std::string>()) { + if (p2.valid()) { + i2.push_back(std::move(a)); + } + } + } + + std::vector expected; + for (auto& x : data) { + if (x.i != excluded) { + expected.push_back(x); + } + } + + std::copy_if(data.begin(), data.end(), expected.begin(), + [&](const X& x) { return x.i != excluded; }); + CHECK_EQ(i, expected); + + if (!ss::setup::throw_on_error) { + CHECK_EQ(i2, expected); + } + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + while (!p.eof()) { + try { + auto a = p.template get_object, double, + std::string>(); + if (p.valid()) { + i.push_back(a); + } + } catch (...) { + // ignore + } + } + + if (!ss::setup::throw_on_error) { + for (auto&& a : p2.template iterate_object, + double, std::string>()) { + if (p2.valid()) { + i2.push_back(std::move(a)); + } + } + } + + std::vector expected = {{3, 4, "y"}}; + CHECK_EQ(i, expected); + if (!ss::setup::throw_on_error) { + CHECK_EQ(i2, expected); + } + } + + { + unique_file_name empty_f{"test_parser"}; + std::vector empty_data = {}; + + make_and_write(empty_f.name, empty_data); + + auto [p, _] = make_parser(empty_f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(empty_f.name, ","); + std::vector i2; + + while (!p.eof()) { + i.push_back(p.template get_next()); + } + + for (auto&& a : p2.template iterate()) { + i2.push_back(std::move(a)); + } + + CHECK(i.empty()); + CHECK(i2.empty()); + } +} + +TEST_CASE("parser test various cases") { + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); +} + +using test_tuple = std::tuple; +struct test_struct { + int i; + double d; + char c; + auto tied() { + return std::tie(i, d, c); + } +}; + +static inline void expect_test_struct(const test_struct&) { +} + +template +void test_composite_conversion() { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + for (auto& i : + {"10,a,11.1", "10,20,11.1", "junk", "10,11.1", "1,11.1,a", "junk", + "10,junk", "11,junk", "10,11.1,c", "10,20", "10,22.2,f"}) { + out << i << std::endl; + } + } + + auto [p, _] = make_parser(f.name, ","); + auto fail = [] { FAIL(""); }; + auto expect_error = [](auto error) { CHECK(!error.empty()); }; + auto ignore_error = [] {}; + + REQUIRE(p.valid()); + REQUIRE_FALSE(p.eof()); + + { + constexpr static auto expectedData = std::tuple{10, 'a', 11.1}; + + auto [d1, d2, d3, d4] = + p.template try_next(fail) + .template or_else(fail) + .template or_else( + [&](auto&& data) { CHECK_EQ(data, expectedData); }) + .on_error(fail) + .template or_else(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE(d3); + REQUIRE_FALSE(d4); + CHECK_EQ(*d3, expectedData); + } + + { + REQUIRE(!p.eof()); + constexpr static auto expectedData = std::tuple{10, 20, 11.1}; + + auto [d1, d2, d3, d4] = + p.template try_next( + [&](auto& i1, auto i2, double d) { + CHECK_EQ(std::tie(i1, i2, d), expectedData); + }) + .on_error(fail) + .template or_object(fail) + .on_error(fail) + .template or_else(fail) + .on_error(fail) + .template or_else(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + REQUIRE_FALSE(d3); + REQUIRE_FALSE(d4); + CHECK_EQ(*d1, expectedData); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2, d3, d4, d5] = + p.template try_object(fail) + .on_error(expect_error) + .template or_else(fail) + .template or_else(fail) + .template or_else(fail) + .template or_else(fail) + .values(); + + REQUIRE_FALSE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE_FALSE(d3); + REQUIRE_FALSE(d4); + REQUIRE_FALSE(d5); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2] = + p.template try_next([](auto& i, auto& d) { + REQUIRE_EQ(std::tie(i, d), std::tuple{10, 11.1}); + }) + .template or_else([](auto&, auto&) { FAIL(""); }) + .values(); + + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2] = + p.template try_next([](auto&, auto&) { FAIL(""); }) + .template or_else(expect_test_struct) + .values(); + + REQUIRE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE(d2); + CHECK_EQ(d2->tied(), std::tuple{1, 11.1, 'a'}); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2, d3, d4, d5] = + p.template try_next(fail) + .template or_object() + .template or_else(expect_test_struct) + .template or_else(fail) + .template or_else>(fail) + .on_error(ignore_error) + .on_error(expect_error) + .values(); + + REQUIRE_FALSE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE_FALSE(d3); + REQUIRE_FALSE(d4); + REQUIRE_FALSE(d5); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2] = + p.template try_next>() + .on_error(ignore_error) + .on_error(fail) + .template or_else>(fail) + .on_error(ignore_error) + .on_error(fail) + .on_error(ignore_error) + .values(); + + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + CHECK_EQ(*d1, std::tuple{10, std::nullopt}); + } + + { + REQUIRE_FALSE(p.eof()); + + auto [d1, d2] = + p.template try_next>() + .on_error(fail) + .template or_else>(fail) + .on_error(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + CHECK_EQ(*d1, std::tuple{11, std::variant{"junk"}}); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2] = p.template try_object() + .template or_else(fail) + .values(); + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + CHECK_EQ(d1->tied(), std::tuple{10, 11.1, 'c'}); + } + + { + REQUIRE_FALSE(p.eof()); + + auto [d1, d2, d3, d4] = + p.template try_next([] { return false; }) + .template or_else([](auto&) { return false; }) + .template or_else() + .template or_else(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE(d3); + REQUIRE_FALSE(d4); + CHECK_EQ(d3.value(), std::tuple{10, 20}); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2, d3, d4] = + p.template try_object( + [] { return false; }) + .template or_else([](auto&) { return false; }) + .template or_object() + .template or_else(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE(d3); + REQUIRE_FALSE(d4); + CHECK_EQ(d3->tied(), std::tuple{10, 22.2, 'f'}); + } + + CHECK(p.eof()); +} + +// various scenarios +TEST_CASE("parser test composite conversion") { + test_composite_conversion(); + test_composite_conversion(); +} diff --git a/test/test_parser1_2.cpp b/test/test_parser1_2.cpp new file mode 100644 index 0000000..6ca7a0b --- /dev/null +++ b/test/test_parser1_2.cpp @@ -0,0 +1,309 @@ +#include "test_parser1.hpp" + +struct my_string { + char* data{nullptr}; + + my_string() = default; + + ~my_string() { + delete[] data; + } + + // make sure no object is copied + my_string(const my_string&) = delete; + my_string& operator=(const my_string&) = delete; + + my_string(my_string&& other) : data{other.data} { + other.data = nullptr; + } + + my_string& operator=(my_string&& other) { + data = other.data; + return *this; + } +}; + +template <> +inline bool ss::extract(const char* begin, const char* end, my_string& s) { + size_t size = end - begin; + s.data = new char[size + 1]; + strncpy(s.data, begin, size); + s.data[size] = '\0'; + return true; +} + +struct xyz { + my_string x; + my_string y; + my_string z; + auto tied() { + return std::tie(x, y, z); + } +}; + +template +void test_moving_of_parsed_composite_values() { + // to compile is enough + return; + auto [p, _] = make_parser("", ""); + p.template try_next() + .template or_else( + [](auto&&) {}) + .template or_else([](auto&) {}) + .template or_else([](auto&&) {}) + .template or_object([](auto&&) {}) + .template or_else>( + [](auto&, auto&, auto&) {}); +} + +TEST_CASE("parser test the moving of parsed composite values") { + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); +} + +TEST_CASE("parser test error mode") { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "junk" << std::endl; + out << "junk" << std::endl; + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + p.get_next(); + CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + p.get_next(); + CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); + } +} + +TEST_CASE("parser throw on error mode") { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "junk" << std::endl; + out << "junk" << std::endl; + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + try { + p.get_next(); + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + try { + p.get_next(); + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } + } +} + +static inline std::string no_quote(const std::string& s) { + if (!s.empty() && s[0] == '"') { + return {std::next(begin(s)), std::prev(end(s))}; + } + return s; +} + +template +void test_quote_multiline() { + unique_file_name f{"test_parser"}; + std::vector data = {{1, 2, "\"x\r\nx\nx\""}, + {3, 4, "\"y\ny\r\ny\""}, + {5, 6, "\"z\nz\""}, + {7, 8, "\"u\"\"\""}, + {9, 10, "v"}, + {11, 12, "\"w\n\""}}; + for (auto& [_, __, s] : data) { + update_if_crlf(s); + } + + make_and_write(f.name, data); + for (auto& [_, __, s] : data) { + s = no_quote(s); + if (s[0] == 'u') { + s = "u\""; + } + } + + auto [p, _] = + make_parser, Ts...>(f.name, + ","); + + std::vector i; + + while (!p.eof()) { + auto a = p.template get_next(); + i.emplace_back(ss::to_object(a)); + } + + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } + CHECK_EQ(i, data); + + auto [p_no_multiline, __] = + make_parser, Ts...>(f.name, ","); + while (!p.eof()) { + auto command = [&p_no_multiline = p_no_multiline] { + p_no_multiline.template get_next(); + }; + expect_error_on_command(p_no_multiline, command); + } +} + +TEST_CASE("parser test csv on multiple lines with quotes") { + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); +} + +static inline std::string no_escape(std::string& s) { + s.erase(std::remove(begin(s), end(s), '\\'), end(s)); + return s; +} + +template +void test_escape_multiline() { + unique_file_name f{"test_parser"}; + std::vector data = {{1, 2, "x\\\nx\\\r\nx"}, + {5, 6, "z\\\nz\\\nz"}, + {7, 8, "u"}, + {3, 4, "y\\\ny\\\ny"}, + {9, 10, "v\\\\"}, + {11, 12, "w\\\n"}}; + for (auto& [_, __, s] : data) { + update_if_crlf(s); + } + + make_and_write(f.name, data); + for (auto& [_, __, s] : data) { + s = no_escape(s); + if (s == "v") { + s = "v\\"; + } + } + + auto [p, _] = + make_parser, Ts...>(f.name, + ","); + std::vector i; + + while (!p.eof()) { + auto a = p.template get_next(); + i.emplace_back(ss::to_object(a)); + } + + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } + CHECK_EQ(i, data); + + auto [p_no_multiline, __] = + make_parser, Ts...>(f.name, ","); + while (!p.eof()) { + auto command = [&p_no_multiline = p_no_multiline] { + auto a = + p_no_multiline.template get_next(); + }; + expect_error_on_command(p_no_multiline, command); + } +} + +TEST_CASE("parser test csv on multiple lines with escapes") { + test_escape_multiline(); + test_escape_multiline(); + test_escape_multiline(); + test_escape_multiline(); + test_escape_multiline(); + test_escape_multiline(); +} + +template +void test_quote_escape_multiline() { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "1,2,\"just\\\n\nstrings\"" << std::endl; +#ifndef _WIN32 + out << "3,4,\"just\r\nsome\\\r\n\n\\\nstrings\"" << std::endl; + out << "5,6,\"just\\\n\\\r\n\r\n\nstrings" << std::endl; +#else + out << "3,4,\"just\nsome\\\n\n\\\nstrings\"" << std::endl; + out << "5,6,\"just\\\n\\\n\n\nstrings" << std::endl; +#endif + out << "7,8,\"just strings\"" << std::endl; + out << "9,10,just strings" << std::endl; + } + size_t bad_lines = 1; + auto num_errors = 0; + + auto [p, _] = make_parser, + ss::quote<'"'>, Ts...>(f.name); + std::vector i; + + while (!p.eof()) { + try { + auto a = p.template get_next(); + if (p.valid()) { + i.emplace_back(ss::to_object(a)); + } else { + ++num_errors; + } + } catch (const std::exception& e) { + ++num_errors; + } + } + + CHECK(bad_lines == num_errors); + + std::vector data = {{1, 2, "just\n\nstrings"}, +#ifndef _WIN32 + {3, 4, "just\r\nsome\r\n\n\nstrings"}, +#else + {3, 4, "just\nsome\n\n\nstrings"}, +#endif + {9, 10, "just strings"}}; + + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } + CHECK_EQ(i, data); +} + +TEST_CASE("parser test csv on multiple lines with quotes and escapes") { + test_quote_escape_multiline(); + test_quote_escape_multiline(); + test_quote_escape_multiline(); + test_quote_escape_multiline(); + test_quote_escape_multiline(); + test_quote_escape_multiline(); +} diff --git a/test/test_parser1_3.cpp b/test/test_parser1_3.cpp new file mode 100644 index 0000000..6885eb1 --- /dev/null +++ b/test/test_parser1_3.cpp @@ -0,0 +1,515 @@ +#include "test_parser1.hpp" + +template +void test_multiline_restricted() { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "1,2,\"just\n\nstrings\"" << std::endl; +#ifndef _WIN32 + out << "3,4,\"ju\n\r\n\nnk\"" << std::endl; + out << "5,6,just\\\n\\\r\nstrings" << std::endl; +#else + out << "3,4,\"ju\n\n\nnk\"" << std::endl; + out << "5,6,just\\\n\\\nstrings" << std::endl; +#endif + out << "7,8,ju\\\n\\\n\\\nnk" << std::endl; + out << "9,10,\"just\\\n\nstrings\"" << std::endl; + out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl; + out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl; + out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl; + out << "19,20,just strings" << std::endl; + } + auto bad_lines = 15; + auto num_errors = 0; + + ss::parser, ss::quote<'"'>, ss::escape<'\\'>, + Ts...> + p{f.name, ","}; + std::vector i; + + while (!p.eof()) { + try { + auto a = p.template get_next(); + if (p.valid()) { + i.emplace_back(ss::to_object(a)); + } else { + ++num_errors; + } + } catch (const std::exception& e) { + ++num_errors; + } + } + + CHECK(bad_lines == num_errors); + + std::vector data = {{1, 2, "just\n\nstrings"}, +#ifndef _WIN32 + {5, 6, "just\n\r\nstrings"}, +#else + {5, 6, "just\n\nstrings"}, +#endif + {9, 10, "just\n\nstrings"}, + {19, 20, "just strings"}}; + + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } + + if (i.size() != data.size()) { + CHECK_EQ(i.size(), data.size()); + } + + CHECK_EQ(i, data); +} + +TEST_CASE("parser test multiline restricted") { + test_multiline_restricted(); + test_multiline_restricted(); + test_multiline_restricted(); +} + +template +void test_unterminated_line_impl(const std::vector& lines, + size_t bad_line) { + unique_file_name f{"test_parser"}; + std::ofstream out{f.name}; + for (const auto& line : lines) { + out << line << std::endl; + } + out.close(); + + ss::parser p{f.name}; + size_t line = 0; + while (!p.eof()) { + auto command = [&] { p.template get_next(); }; + + if (line == bad_line) { + expect_error_on_command(p, command); + break; + } else { + command(); + CHECK(p.valid()); + ++line; + } + } +} + +template +void test_unterminated_line(const std::vector& lines, + size_t bad_line) { + test_unterminated_line_impl(lines, bad_line); + test_unterminated_line_impl(lines, bad_line); + test_unterminated_line_impl(lines, bad_line); +} + +TEST_CASE("parser test csv on multiline with errors") { + using multiline = ss::multiline_restricted<3>; + using escape = ss::escape<'\\'>; + using quote = ss::quote<'"'>; + + // unterminated escape + { + const std::vector lines{"1,2,just\\"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"1,2,just\\", "9,8,second"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", "1,2,just\\"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "1,2,just\\", + "3,4,third"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", + "1,2,just\\\nstrings\\", + "3,4,th\\\nird"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "3,4,second", + "1,2,just\\"}; + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + } + + { + const std::vector lines{"9,8,\\first", "3,4,second", + "1,2,jus\\t\\"}; + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + } + + // unterminated quote + { + const std::vector lines{"1,2,\"just"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"1,2,\"just", "9,8,second"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", "1,2,\"just"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "1,2,\"just", + "3,4,th\\,ird"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "3,4,second", + "1,2,\"just"}; + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + } + + { + const std::vector lines{"9,8,\"first\"", + "\"3\",4,\"sec,ond\"", + "1,2,\"ju\"\"st"}; + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + } + + // unterminated quote and escape + { + const std::vector lines{"1,2,\"just\\"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"1,2,\"just\\\n\\"}; + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"1,2,\"just\n\\"}; + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", "1,2,\"just\n\\"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "1,2,\"just\n\\", + "4,3,thrid"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,f\\\nirst", "1,2,\"just\n\\", + "4,3,thrid"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,\"f\ni\nrst\"", + "1,2,\"just\n\\", "4,3,thrid"}; + test_unterminated_line(lines, 1); + } + + // multiline limmit reached escape + { + const std::vector lines{"1,2,\\\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", + "1,2,\\\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,fi\\\nrs\\\nt", + "1,2,\\\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", + "1,2,\\\n\\\n\\\n\\\njust", + "4,3,third"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + // multiline limmit reached quote + { + const std::vector lines{"1,2,\"\n\n\n\n\njust\""}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", + "1,2,\"\n\n\n\n\njust\""}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,\"fir\nst\"", + "1,2,\"\n\n\n\n\njust\""}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + // multiline limmit reached quote and escape + { + const std::vector lines{"1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", + "1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,fi\\\nrst", + "1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,\"fi\nrst\"", + "1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,\"fi\nr\\\nst\"", + "1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + } +} + +template +struct has_type; + +template +struct has_type> + : std::disjunction...> {}; + +static inline void check_size(size_t size1, size_t size2) { + CHECK_EQ(size1, size2); +} + +template +static void test_fields_impl(const std::string file_name, + const std::vector& data, + const std::vector& fields) { + using CaseType = std::tuple; + + ss::parser p{file_name, ","}; + CHECK_FALSE(p.field_exists("Unknown")); + p.use_fields(fields); + std::vector i; + + for (const auto& a : p.template iterate()) { + i.push_back(a); + } + + check_size(i.size(), data.size()); + for (size_t j = 0; j < i.size(); ++j) { + if constexpr (has_type::value) { + CHECK_EQ(std::get(i[j]), data[j].i); + } + if constexpr (has_type::value) { + CHECK_EQ(std::get(i[j]), data[j].d); + } + if constexpr (has_type::value) { + CHECK_EQ(std::get(i[j]), data[j].s); + } + } +} + +template +static void test_fields(const std::string file_name, const std::vector& data, + const std::vector& fields) { + test_fields_impl, Ts...>(file_name, data, fields); + test_fields_impl, Ts...>(file_name, data, + fields); + test_fields_impl, Ts...>(file_name, data, + fields); +} + +TEST_CASE("parser test various cases with header") { + unique_file_name f{"test_parser"}; + constexpr static auto Int = "Int"; + constexpr static auto Dbl = "Double"; + constexpr static auto Str = "String"; + using str = std::string; + + std::vector header{Int, Dbl, Str}; + + std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, + {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; + + make_and_write(f.name, data, header); + const auto& o = f.name; + const auto& d = data; + + { + ss::parser p{f.name, ","}; + std::vector i; + + for (const auto& a : p.iterate()) { + i.emplace_back(ss::to_object(a)); + } + + CHECK_NE(i, data); + } + + { + ss::parser p{f.name, ","}; + std::vector i; + + p.ignore_next(); + for (const auto& a : p.iterate()) { + i.emplace_back(ss::to_object(a)); + } + + CHECK_EQ(i, data); + } + + { + ss::parser p{f.name, ","}; + std::vector i; + + for (const auto& a : p.iterate()) { + i.emplace_back(ss::to_object(a)); + } + + CHECK_EQ(i, data); + } + + { + ss::parser p{f.name, ","}; + p.use_fields(Int, Dbl, Str); + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + CHECK_FALSE(p.field_exists("Unknown")); + + p.use_fields(Int, "Unknown"); + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + p.use_fields(Int, Int); + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + p.use_fields(Int, Dbl); + + { + auto [int_, double_] = p.get_next(); + CHECK_EQ(int_, data[0].i); + CHECK_EQ(double_, data[0].d); + } + + p.use_fields(Dbl, Int); + + { + auto [double_, int_] = p.get_next(); + CHECK_EQ(int_, data[1].i); + CHECK_EQ(double_, data[1].d); + } + + p.use_fields(Str); + + { + auto string_ = p.get_next(); + CHECK_EQ(string_, data[2].s); + } + + p.use_fields(Str, Int, Dbl); + + { + auto [string_, int_, double_] = + p.get_next(); + CHECK_EQ(double_, data[3].d); + CHECK_EQ(int_, data[3].i); + CHECK_EQ(string_, data[3].s); + } + } + + /* python used to generate permutations + import itertools + + header = {'str': 'Str', + 'double': 'Dbl', + 'int': 'Int'} + + keys = ['str', 'int', 'double'] + + for r in range (1, 3): + combinations = list(itertools.permutations(keys, r = r)) + + for combination in combinations: + template_params = [] + arg_params = [] + for type in combination: + template_params.append(type) + arg_params.append(header[type]) + call = 'testFields<' + ', '.join(template_params) + \ + '>(o, d, {' + ', '.join(arg_params) + '});' + print(call) + */ + + test_fields(o, d, {Str}); + test_fields(o, d, {Int}); + test_fields(o, d, {Dbl}); + test_fields(o, d, {Str, Int}); + test_fields(o, d, {Str, Dbl}); + test_fields(o, d, {Int, Str}); + test_fields(o, d, {Int, Dbl}); + test_fields(o, d, {Dbl, Str}); + test_fields(o, d, {Dbl, Int}); + test_fields(o, d, {Str, Int, Dbl}); + test_fields(o, d, {Str, Dbl, Int}); + test_fields(o, d, {Int, Str, Dbl}); + test_fields(o, d, {Int, Dbl, Str}); + test_fields(o, d, {Dbl, Str, Int}); + test_fields(o, d, {Dbl, Int, Str}); +} diff --git a/test/test_parser1_4.cpp b/test/test_parser1_4.cpp new file mode 100644 index 0000000..42a385b --- /dev/null +++ b/test/test_parser1_4.cpp @@ -0,0 +1,291 @@ +#include "test_parser1.hpp" + +template +void test_invalid_fields_impl(const std::vector& lines, + const std::vector& fields) { + unique_file_name f{"test_parser"}; + std::ofstream out{f.name}; + for (const auto& line : lines) { + out << line << std::endl; + } + out.close(); + + { + // No fields specified + ss::parser p{f.name, ","}; + auto command = [&] { p.use_fields(); }; + expect_error_on_command(p, command); + } + + { + // Unknown field + ss::parser p{f.name, ","}; + auto command = [&] { p.use_fields("Unknown"); }; + expect_error_on_command(p, command); + } + + { + // Field used multiple times + ss::parser p{f.name, ","}; + auto command = [&] { p.use_fields(fields.at(0), fields.at(0)); }; + if (!fields.empty()) { + expect_error_on_command(p, command); + } + } + + { + // Mapping out of range + ss::parser p{f.name, ","}; + auto command = [&] { + p.use_fields(fields.at(0)); + p.template get_next(); + }; + if (!fields.empty()) { + expect_error_on_command(p, command); + } + } + + { + // Invalid header + ss::parser p{f.name, ","}; + auto command = [&] { p.use_fields(fields); }; + + if (!fields.empty()) { + // Pass if there are no duplicates, fail otherwise + if (std::unordered_set{fields.begin(), fields.end()} + .size() != fields.size()) { + expect_error_on_command(p, command); + } else { + command(); + CHECK(p.valid()); + if (!p.valid()) { + if constexpr (ss::setup::string_error) { + std::cout << p.error_msg() << std::endl; + } + } + } + } + } +} + +template +void test_invalid_fields(const std::vector& lines, + const std::vector& fields) { + test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); +} + +TEST_CASE("parser test invalid header fields usage") { + test_invalid_fields({}, {}); + + test_invalid_fields({"Int"}, {"Int"}); + test_invalid_fields({"Int", "1"}, {"Int"}); + test_invalid_fields({"Int", "1", "2"}, {"Int"}); + + test_invalid_fields({"Int,String"}, {"Int", "String"}); + test_invalid_fields({"Int,String", "1,hi"}, {"Int", "String"}); + test_invalid_fields({"Int,String", "2,hello"}, {"Int", "String"}); + + test_invalid_fields({"Int,String,Double"}, {"Int", "String", "Double"}); + test_invalid_fields({"Int,String,Double", "1,hi,2.34"}, + {"Int", "String", "Double"}); + test_invalid_fields({"Int,String,Double", "1,hi,2.34", "2,hello,3.45"}, + {"Int", "String", "Double"}); + + test_invalid_fields({"Int,Int,Int"}, {"Int", "Int", "Int"}); + test_invalid_fields({"Int,Int,Int", "1,2,3"}, {"Int", "Int", "Int"}); + + test_invalid_fields({"Int,String,Int"}, {"Int", "String", "Int"}); + test_invalid_fields({"Int,String,Int", "1,hi,3"}, {"Int", "String", "Int"}); +} + +template +void test_invalid_rows_with_header() { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "Int,String,Double" << std::endl; + out << "1,line1,2.34" << std::endl; + out << "2,line2" << std::endl; + out << "3,line3,67.8" << std::endl; + out << "4,line4,67.8,9" << std::endl; + out << "5,line5,9.10" << std::endl; + out << "six,line6,10.11" << std::endl; + } + + { + ss::parser p{f.name}; + + p.use_fields("Int", "String", "Double"); + using data = std::tuple; + std::vector i; + + CHECK(p.valid()); + + while (!p.eof()) { + try { + const auto& t = p.template get_next(); + if (p.valid()) { + i.push_back(t); + } + } catch (const ss::exception&) { + continue; + } + } + + std::vector expected = {{1, "line1", 2.34}, + {3, "line3", 67.8}, + {5, "line5", 9.10}}; + CHECK_EQ(i, expected); + } + + { + ss::parser p{f.name}; + + p.use_fields("Double", "Int"); + using data = std::tuple; + std::vector i; + + CHECK(p.valid()); + + while (!p.eof()) { + try { + const auto& t = p.template get_next(); + if (p.valid()) { + i.push_back(t); + } + } catch (const ss::exception&) { + continue; + } + } + + std::vector expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}}; + CHECK_EQ(i, expected); + } + + { + ss::parser p{f.name}; + + p.use_fields("String", "Double"); + using data = std::tuple; + std::vector i; + + CHECK(p.valid()); + + while (!p.eof()) { + try { + const auto& t = p.template get_next(); + if (p.valid()) { + i.push_back(t); + } + } catch (const ss::exception&) { + continue; + } + } + + std::vector expected = {{"line1", 2.34}, + {"line3", 67.8}, + {"line5", 9.10}, + {"line6", 10.11}}; + CHECK_EQ(i, expected); + } +} + +TEST_CASE("parser test invalid rows with header") { + test_invalid_rows_with_header(); + test_invalid_rows_with_header(); + test_invalid_rows_with_header(); +} + +template +void test_ignore_empty_impl(const std::vector& data) { + unique_file_name f{"test_parser"}; + make_and_write(f.name, data); + + std::vector expected; + for (const auto& d : data) { + if (d.s != X::empty) { + expected.push_back(d); + } + } + + { + ss::parser p{f.name, ","}; + + std::vector i; + for (const auto& a : p.template iterate()) { + i.push_back(a); + } + + CHECK_EQ(i, expected); + } + + { + ss::parser p{f.name, ","}; + std::vector i; + size_t n = 0; + while (!p.eof()) { + try { + ++n; + const auto& a = p.template get_next(); + if (data.at(n - 1).s == X::empty) { + CHECK_FALSE(p.valid()); + continue; + } + i.push_back(a); + } catch (...) { + CHECK_EQ(data.at(n - 1).s, X::empty); + } + } + + CHECK_EQ(i, expected); + } +} + +template +void test_ignore_empty(const std::vector& data) { + test_ignore_empty_impl(data); + test_ignore_empty_impl(data); + test_ignore_empty_impl(data); +} + +TEST_CASE("parser test various cases with empty lines") { + test_ignore_empty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); + + test_ignore_empty( + {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); + + test_ignore_empty( + {{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, "x"}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); + + test_ignore_empty( + {{1, 2, X::empty}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); + + test_ignore_empty( + {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, "x"}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); + + test_ignore_empty({{1, 2, X::empty}, + {3, 4, X::empty}, + {9, 10, X::empty}, + {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, "x"}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, X::empty}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, "w"}}); + + test_ignore_empty({{11, 12, X::empty}}); + + test_ignore_empty({}); +} From f8c5757d99edcdbe179896083c71a598c5a978c7 Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 17 Feb 2024 21:08:24 +0100 Subject: [PATCH 05/57] Fix unit test CMakeLists --- test/CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index de6b612..5e35bee 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -32,9 +32,11 @@ set(DOCTEST "${FETCHCONTENT_BASE_DIR}/doctest-src") enable_testing() -foreach(name IN ITEMS test_splitter test_parser test_converter test_extractions - test_parser2_1 test_parser2_2 test_parser2_3 - test_parser2_4 test_extractions_without_fast_float) +foreach(name IN ITEMS test_splitter test_parser1_1 test_parser1_2 + test_parser1_3 test_parser_1_4 test_converter + test_extractions test_parser2_1 test_parser2_2 + test_parser2_3 test_parser2_4 + test_extractions_without_fast_float) add_executable("${name}" "${name}.cpp") target_link_libraries("${name}" PRIVATE ssp::ssp fast_float doctest::doctest) From aacf690640210549922975532274db9f55054fce Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 17 Feb 2024 21:10:25 +0100 Subject: [PATCH 06/57] Fix unit test CMakeLists typo --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5e35bee..8439639 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -33,7 +33,7 @@ set(DOCTEST "${FETCHCONTENT_BASE_DIR}/doctest-src") enable_testing() foreach(name IN ITEMS test_splitter test_parser1_1 test_parser1_2 - test_parser1_3 test_parser_1_4 test_converter + test_parser1_3 test_parser1_4 test_converter test_extractions test_parser2_1 test_parser2_2 test_parser2_3 test_parser2_4 test_extractions_without_fast_float) From e89e268280b760e0c38c43487bd8df7cab5989ce Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 17 Feb 2024 22:21:37 +0100 Subject: [PATCH 07/57] Update test helpers random file name generator --- test/test_helpers.hpp | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index 15d20f6..98ee090 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -34,23 +34,13 @@ struct buffer { [[maybe_unused]] inline buffer buff; -std::string time_now_rand() { - std::stringstream ss; - auto t = std::time(nullptr); - auto tm = *std::localtime(&t); - ss << std::put_time(&tm, "%d%m%Y%H%M%S"); - srand(time(nullptr)); - return ss.str() + std::to_string(rand()); -} - struct unique_file_name { - static inline int i = 0; + std::string name; - const std::string name; - - unique_file_name(const std::string& test) - : name{"random_" + test + "_" + std::to_string(i++) + "_" + - time_now_rand() + "_file.csv"} { + unique_file_name(const std::string& test) { + do { + name = std::tmpnam(nullptr) + std::string{"_random_test_"} + test; + } while (std::filesystem::exists(name)); } ~unique_file_name() { From 82f8ed12b4883b413ff21a0c71ab6721f828c57e Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 11:20:36 +0100 Subject: [PATCH 08/57] Fix out of bounds reading for get_line_buffer, write more buffer mode unit tests --- include/ss/parser.hpp | 2 +- test/test_parser1_3.cpp | 233 +++++----------------------------------- test/test_parser1_4.cpp | 196 +++++++++++++++++++++++++++++++++ 3 files changed, 221 insertions(+), 210 deletions(-) diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 3a4244b..cd5e7b1 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -733,10 +733,10 @@ private: return -1; } - c = buffer[curr_char++]; if (curr_char >= csv_data_size) { return -1; } + c = buffer[curr_char++]; // TODO maybe remove this too if (*lineptr == nullptr) { diff --git a/test/test_parser1_3.cpp b/test/test_parser1_3.cpp index 6885eb1..612d17d 100644 --- a/test/test_parser1_3.cpp +++ b/test/test_parser1_3.cpp @@ -1,6 +1,6 @@ #include "test_parser1.hpp" -template +template void test_multiline_restricted() { unique_file_name f{"test_parser"}; { @@ -23,9 +23,9 @@ void test_multiline_restricted() { auto bad_lines = 15; auto num_errors = 0; - ss::parser, ss::quote<'"'>, ss::escape<'\\'>, - Ts...> - p{f.name, ","}; + auto [p, _] = + make_parser, ss::quote<'"'>, + ss::escape<'\\'>, Ts...>(f.name, ","); std::vector i; while (!p.eof()) { @@ -64,12 +64,15 @@ void test_multiline_restricted() { } TEST_CASE("parser test multiline restricted") { - test_multiline_restricted(); - test_multiline_restricted(); - test_multiline_restricted(); + test_multiline_restricted(); + test_multiline_restricted(); + test_multiline_restricted(); + test_multiline_restricted(); + test_multiline_restricted(); + test_multiline_restricted(); } -template +template void test_unterminated_line_impl(const std::vector& lines, size_t bad_line) { unique_file_name f{"test_parser"}; @@ -79,10 +82,12 @@ void test_unterminated_line_impl(const std::vector& lines, } out.close(); - ss::parser p{f.name}; + auto [p, _] = make_parser(f.name); size_t line = 0; while (!p.eof()) { - auto command = [&] { p.template get_next(); }; + auto command = [&p = p] { + p.template get_next(); + }; if (line == bad_line) { expect_error_on_command(p, command); @@ -98,9 +103,15 @@ void test_unterminated_line_impl(const std::vector& lines, template void test_unterminated_line(const std::vector& lines, size_t bad_line) { - test_unterminated_line_impl(lines, bad_line); - test_unterminated_line_impl(lines, bad_line); - test_unterminated_line_impl(lines, bad_line); + test_unterminated_line_impl(lines, bad_line); + test_unterminated_line_impl(lines, + bad_line); + test_unterminated_line_impl(lines, + bad_line); + test_unterminated_line_impl(lines, bad_line); + test_unterminated_line_impl(lines, bad_line); + test_unterminated_line_impl(lines, + bad_line); } TEST_CASE("parser test csv on multiline with errors") { @@ -317,199 +328,3 @@ TEST_CASE("parser test csv on multiline with errors") { test_unterminated_line(lines, 1); } } - -template -struct has_type; - -template -struct has_type> - : std::disjunction...> {}; - -static inline void check_size(size_t size1, size_t size2) { - CHECK_EQ(size1, size2); -} - -template -static void test_fields_impl(const std::string file_name, - const std::vector& data, - const std::vector& fields) { - using CaseType = std::tuple; - - ss::parser p{file_name, ","}; - CHECK_FALSE(p.field_exists("Unknown")); - p.use_fields(fields); - std::vector i; - - for (const auto& a : p.template iterate()) { - i.push_back(a); - } - - check_size(i.size(), data.size()); - for (size_t j = 0; j < i.size(); ++j) { - if constexpr (has_type::value) { - CHECK_EQ(std::get(i[j]), data[j].i); - } - if constexpr (has_type::value) { - CHECK_EQ(std::get(i[j]), data[j].d); - } - if constexpr (has_type::value) { - CHECK_EQ(std::get(i[j]), data[j].s); - } - } -} - -template -static void test_fields(const std::string file_name, const std::vector& data, - const std::vector& fields) { - test_fields_impl, Ts...>(file_name, data, fields); - test_fields_impl, Ts...>(file_name, data, - fields); - test_fields_impl, Ts...>(file_name, data, - fields); -} - -TEST_CASE("parser test various cases with header") { - unique_file_name f{"test_parser"}; - constexpr static auto Int = "Int"; - constexpr static auto Dbl = "Double"; - constexpr static auto Str = "String"; - using str = std::string; - - std::vector header{Int, Dbl, Str}; - - std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, - {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; - - make_and_write(f.name, data, header); - const auto& o = f.name; - const auto& d = data; - - { - ss::parser p{f.name, ","}; - std::vector i; - - for (const auto& a : p.iterate()) { - i.emplace_back(ss::to_object(a)); - } - - CHECK_NE(i, data); - } - - { - ss::parser p{f.name, ","}; - std::vector i; - - p.ignore_next(); - for (const auto& a : p.iterate()) { - i.emplace_back(ss::to_object(a)); - } - - CHECK_EQ(i, data); - } - - { - ss::parser p{f.name, ","}; - std::vector i; - - for (const auto& a : p.iterate()) { - i.emplace_back(ss::to_object(a)); - } - - CHECK_EQ(i, data); - } - - { - ss::parser p{f.name, ","}; - p.use_fields(Int, Dbl, Str); - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - CHECK_FALSE(p.field_exists("Unknown")); - - p.use_fields(Int, "Unknown"); - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - p.use_fields(Int, Int); - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - p.use_fields(Int, Dbl); - - { - auto [int_, double_] = p.get_next(); - CHECK_EQ(int_, data[0].i); - CHECK_EQ(double_, data[0].d); - } - - p.use_fields(Dbl, Int); - - { - auto [double_, int_] = p.get_next(); - CHECK_EQ(int_, data[1].i); - CHECK_EQ(double_, data[1].d); - } - - p.use_fields(Str); - - { - auto string_ = p.get_next(); - CHECK_EQ(string_, data[2].s); - } - - p.use_fields(Str, Int, Dbl); - - { - auto [string_, int_, double_] = - p.get_next(); - CHECK_EQ(double_, data[3].d); - CHECK_EQ(int_, data[3].i); - CHECK_EQ(string_, data[3].s); - } - } - - /* python used to generate permutations - import itertools - - header = {'str': 'Str', - 'double': 'Dbl', - 'int': 'Int'} - - keys = ['str', 'int', 'double'] - - for r in range (1, 3): - combinations = list(itertools.permutations(keys, r = r)) - - for combination in combinations: - template_params = [] - arg_params = [] - for type in combination: - template_params.append(type) - arg_params.append(header[type]) - call = 'testFields<' + ', '.join(template_params) + \ - '>(o, d, {' + ', '.join(arg_params) + '});' - print(call) - */ - - test_fields(o, d, {Str}); - test_fields(o, d, {Int}); - test_fields(o, d, {Dbl}); - test_fields(o, d, {Str, Int}); - test_fields(o, d, {Str, Dbl}); - test_fields(o, d, {Int, Str}); - test_fields(o, d, {Int, Dbl}); - test_fields(o, d, {Dbl, Str}); - test_fields(o, d, {Dbl, Int}); - test_fields(o, d, {Str, Int, Dbl}); - test_fields(o, d, {Str, Dbl, Int}); - test_fields(o, d, {Int, Str, Dbl}); - test_fields(o, d, {Int, Dbl, Str}); - test_fields(o, d, {Dbl, Str, Int}); - test_fields(o, d, {Dbl, Int, Str}); -} diff --git a/test/test_parser1_4.cpp b/test/test_parser1_4.cpp index 42a385b..1045850 100644 --- a/test/test_parser1_4.cpp +++ b/test/test_parser1_4.cpp @@ -1,5 +1,201 @@ #include "test_parser1.hpp" +template +struct has_type; + +template +struct has_type> + : std::disjunction...> {}; + +static inline void check_size(size_t size1, size_t size2) { + CHECK_EQ(size1, size2); +} + +template +static void test_fields_impl(const std::string file_name, + const std::vector& data, + const std::vector& fields) { + using CaseType = std::tuple; + + ss::parser p{file_name, ","}; + CHECK_FALSE(p.field_exists("Unknown")); + p.use_fields(fields); + std::vector i; + + for (const auto& a : p.template iterate()) { + i.push_back(a); + } + + check_size(i.size(), data.size()); + for (size_t j = 0; j < i.size(); ++j) { + if constexpr (has_type::value) { + CHECK_EQ(std::get(i[j]), data[j].i); + } + if constexpr (has_type::value) { + CHECK_EQ(std::get(i[j]), data[j].d); + } + if constexpr (has_type::value) { + CHECK_EQ(std::get(i[j]), data[j].s); + } + } +} + +template +static void test_fields(const std::string file_name, const std::vector& data, + const std::vector& fields) { + test_fields_impl, Ts...>(file_name, data, fields); + test_fields_impl, Ts...>(file_name, data, + fields); + test_fields_impl, Ts...>(file_name, data, + fields); +} + +TEST_CASE("parser test various cases with header") { + unique_file_name f{"test_parser"}; + constexpr static auto Int = "Int"; + constexpr static auto Dbl = "Double"; + constexpr static auto Str = "String"; + using str = std::string; + + std::vector header{Int, Dbl, Str}; + + std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, + {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; + + make_and_write(f.name, data, header); + const auto& o = f.name; + const auto& d = data; + + { + ss::parser p{f.name, ","}; + std::vector i; + + for (const auto& a : p.iterate()) { + i.emplace_back(ss::to_object(a)); + } + + CHECK_NE(i, data); + } + + { + ss::parser p{f.name, ","}; + std::vector i; + + p.ignore_next(); + for (const auto& a : p.iterate()) { + i.emplace_back(ss::to_object(a)); + } + + CHECK_EQ(i, data); + } + + { + ss::parser p{f.name, ","}; + std::vector i; + + for (const auto& a : p.iterate()) { + i.emplace_back(ss::to_object(a)); + } + + CHECK_EQ(i, data); + } + + { + ss::parser p{f.name, ","}; + p.use_fields(Int, Dbl, Str); + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + CHECK_FALSE(p.field_exists("Unknown")); + + p.use_fields(Int, "Unknown"); + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + p.use_fields(Int, Int); + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + p.use_fields(Int, Dbl); + + { + auto [int_, double_] = p.get_next(); + CHECK_EQ(int_, data[0].i); + CHECK_EQ(double_, data[0].d); + } + + p.use_fields(Dbl, Int); + + { + auto [double_, int_] = p.get_next(); + CHECK_EQ(int_, data[1].i); + CHECK_EQ(double_, data[1].d); + } + + p.use_fields(Str); + + { + auto string_ = p.get_next(); + CHECK_EQ(string_, data[2].s); + } + + p.use_fields(Str, Int, Dbl); + + { + auto [string_, int_, double_] = + p.get_next(); + CHECK_EQ(double_, data[3].d); + CHECK_EQ(int_, data[3].i); + CHECK_EQ(string_, data[3].s); + } + } + + /* python used to generate permutations + import itertools + + header = {'str': 'Str', + 'double': 'Dbl', + 'int': 'Int'} + + keys = ['str', 'int', 'double'] + + for r in range (1, 3): + combinations = list(itertools.permutations(keys, r = r)) + + for combination in combinations: + template_params = [] + arg_params = [] + for type in combination: + template_params.append(type) + arg_params.append(header[type]) + call = 'testFields<' + ', '.join(template_params) + \ + '>(o, d, {' + ', '.join(arg_params) + '});' + print(call) + */ + + test_fields(o, d, {Str}); + test_fields(o, d, {Int}); + test_fields(o, d, {Dbl}); + test_fields(o, d, {Str, Int}); + test_fields(o, d, {Str, Dbl}); + test_fields(o, d, {Int, Str}); + test_fields(o, d, {Int, Dbl}); + test_fields(o, d, {Dbl, Str}); + test_fields(o, d, {Dbl, Int}); + test_fields(o, d, {Str, Int, Dbl}); + test_fields(o, d, {Str, Dbl, Int}); + test_fields(o, d, {Int, Str, Dbl}); + test_fields(o, d, {Int, Dbl, Str}); + test_fields(o, d, {Dbl, Str, Int}); + test_fields(o, d, {Dbl, Int, Str}); +} + template void test_invalid_fields_impl(const std::vector& lines, const std::vector& fields) { From 8b07f7d6cb27802e92057c8be7a4b60e9723aa96 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 13:55:14 +0100 Subject: [PATCH 09/57] Remove tmpnam usage for random file generation --- test/test_helpers.hpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index 98ee090..2509edf 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -34,12 +34,25 @@ struct buffer { [[maybe_unused]] inline buffer buff; +std::string time_now_rand() { + srand(time(nullptr)); + std::stringstream ss; + auto t = std::time(nullptr); + auto tm = *std::localtime(&t); + ss << std::put_time(&tm, "%d%m%Y%H%M%S"); + srand(time(nullptr)); + return ss.str() + std::to_string(rand()); +} + struct unique_file_name { + static inline int i = 0; + std::string name; unique_file_name(const std::string& test) { do { - name = std::tmpnam(nullptr) + std::string{"_random_test_"} + test; + name = "random_" + test + "_" + std::to_string(i++) + "_" + + time_now_rand() + "_file.csv"; } while (std::filesystem::exists(name)); } From dbaa8131e71313ceb3703fb5eec8852de382edd9 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 15:29:03 +0100 Subject: [PATCH 10/57] Update ssp.hpp --- ssp.hpp | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 117 insertions(+), 14 deletions(-) diff --git a/ssp.hpp b/ssp.hpp index d66e595..d0d21e4 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -639,12 +639,12 @@ inline void assert_throw_on_error_not_defined() { } #if __unix__ -inline ssize_t get_line(char** lineptr, size_t* n, FILE* stream) { +inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { return getline(lineptr, n, stream); } #else using ssize_t = int64_t; -inline ssize_t get_line(char** lineptr, size_t* n, FILE* stream) { +inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { size_t pos; int c; @@ -2174,6 +2174,18 @@ public: } } + parser(const char* const csv_data_buffer, size_t csv_data_size, + const std::string& delim = ss::default_delimiter) + : file_name_{"buffer line"}, + reader_{csv_data_buffer, csv_data_size, delim} { + read_line(); + if constexpr (ignore_header) { + ignore_next(); + } else { + raw_header_ = reader_.get_buffer(); + } + } + parser(parser&& other) = default; parser& operator=(parser&& other) = default; @@ -2767,18 +2779,27 @@ private: : delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} { } + reader(const char* const buffer, size_t csv_data_size, + const std::string& delim) + : delim_{delim}, csv_data_buffer_{buffer}, + csv_data_size_{csv_data_size} { + } + reader(reader&& other) : buffer_{other.buffer_}, next_line_buffer_{other.next_line_buffer_}, - helper_buffer_{other.helper_buffer_}, converter_{std::move( - other.converter_)}, + helper_buffer_{other.helper_buffer_}, + converter_{std::move(other.converter_)}, next_line_converter_{std::move(other.next_line_converter_)}, buffer_size_{other.buffer_size_}, next_line_buffer_size_{other.next_line_buffer_size_}, - helper_size_{other.helper_size_}, delim_{std::move(other.delim_)}, - file_{other.file_}, crlf_{other.crlf_}, - line_number_{other.line_number_}, next_line_size_{ - other.next_line_size_} { + helper_buffer_size{other.helper_buffer_size}, + delim_{std::move(other.delim_)}, file_{other.file_}, + csv_data_buffer_{other.csv_data_buffer_}, + csv_data_size_{other.csv_data_size_}, + curr_char_{other.curr_char_}, crlf_{other.crlf_}, + line_number_{other.line_number_}, + next_line_size_{other.next_line_size_} { other.buffer_ = nullptr; other.next_line_buffer_ = nullptr; other.helper_buffer_ = nullptr; @@ -2794,9 +2815,12 @@ private: next_line_converter_ = std::move(other.next_line_converter_); buffer_size_ = other.buffer_size_; next_line_buffer_size_ = other.next_line_buffer_size_; - helper_size_ = other.helper_size_; + helper_buffer_size = other.helper_buffer_size; delim_ = std::move(other.delim_); file_ = other.file_; + csv_data_buffer_ = other.csv_data_buffer_; + csv_data_size_ = other.csv_data_size_; + curr_char_ = other.curr_char_; crlf_ = other.crlf_; line_number_ = other.line_number_; next_line_size_ = other.next_line_size_; @@ -2824,6 +2848,60 @@ private: reader(const reader& other) = delete; reader& operator=(const reader& other) = delete; + ssize_t get_line_buffer(char** lineptr, size_t* n, + const char* const buffer, size_t csv_data_size, + size_t& curr_char) { + size_t pos; + int c; + + // TODO remove check + if (lineptr == nullptr || buffer == nullptr || n == nullptr) { + return -1; + } + + if (curr_char >= csv_data_size) { + return -1; + } + c = buffer[curr_char++]; + + // TODO maybe remove this too + if (*lineptr == nullptr) { + *lineptr = static_cast(malloc(128)); + if (*lineptr == nullptr) { + return -1; + } + *n = 128; + } + + pos = 0; + while (curr_char <= csv_data_size) { + if (pos + 1 >= *n) { + size_t new_size = *n + (*n >> 2); + // TODO maybe remove this too + if (new_size < 128) { + new_size = 128; + } + char* new_ptr = static_cast( + realloc(static_cast(*lineptr), new_size)); + // TODO check for failed malloc in the callee + if (new_ptr == nullptr) { + return -1; + } + *n = new_size; + *lineptr = new_ptr; + } + + (*lineptr)[pos++] = c; + if (c == '\n') { + break; + } + c = buffer[curr_char++]; + } + + (*lineptr)[pos] = '\0'; + return pos; + } + // read next line each time in order to set eof_ bool read_next() { next_line_converter_.clear_error(); @@ -2834,8 +2912,16 @@ private: if (next_line_buffer_size_ > 0) { next_line_buffer_[0] = '\0'; } - ssize = get_line(&next_line_buffer_, &next_line_buffer_size_, - file_); + + if (file_) { + ssize = get_line_file(&next_line_buffer_, + &next_line_buffer_size_, file_); + } else { + ssize = get_line_buffer(&next_line_buffer_, + &next_line_buffer_size_, + csv_data_buffer_, csv_data_size_, + curr_char_); + } if (ssize == -1) { return false; @@ -2947,6 +3033,10 @@ private: } size_t remove_eol(char*& buffer, size_t ssize) { + if (buffer[ssize - 1] != '\n') { + return ssize; + } + size_t size = ssize - 1; if (ssize >= 2 && buffer[ssize - 2] == '\r') { crlf_ = true; @@ -2977,8 +3067,17 @@ private: bool append_next_line_to_buffer(char*& buffer, size_t& size) { undo_remove_eol(buffer, size); - ssize_t next_ssize = - get_line(&helper_buffer_, &helper_size_, file_); + ssize_t next_ssize; + if (file_) { + next_ssize = + get_line_file(&helper_buffer_, &helper_buffer_size, file_); + } else { + next_ssize = + get_line_buffer(&helper_buffer_, &helper_buffer_size, + csv_data_buffer_, csv_data_size_, + curr_char_); + } + if (next_ssize == -1) { return false; } @@ -3005,11 +3104,15 @@ private: size_t buffer_size_{0}; size_t next_line_buffer_size_{0}; - size_t helper_size_{0}; + size_t helper_buffer_size{0}; std::string delim_; FILE* file_{nullptr}; + const char* csv_data_buffer_{nullptr}; + size_t csv_data_size_{0}; + size_t curr_char_{0}; + bool crlf_{false}; size_t line_number_{0}; From 63a618957bedf7248d629ea7ee2d236669292f46 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 18:46:26 +0100 Subject: [PATCH 11/57] Add more unit tests for buffer mode --- test/test_parser1.hpp | 13 ++++-- test/test_parser1_4.cpp | 100 +++++++++++++++++++++++----------------- 2 files changed, 67 insertions(+), 46 deletions(-) diff --git a/test/test_parser1.hpp b/test/test_parser1.hpp index a68a939..32365c0 100644 --- a/test/test_parser1.hpp +++ b/test/test_parser1.hpp @@ -109,9 +109,8 @@ std::string make_buffer(const std::string& file_name) { std::ifstream in{file_name, std::ios::binary}; std::string tmp; std::string out; - out.reserve(sizeof(out) + 1); - while (in >> tmp) { - out += tmp; + + auto copy_if_whitespaces = [&] { std::string matches = "\n\r\t "; while (std::any_of(matches.begin(), matches.end(), [&](auto c) { return in.peek() == c; })) { @@ -123,6 +122,14 @@ std::string make_buffer(const std::string& file_name) { in.ignore(1); } } + }; + + out.reserve(sizeof(out) + 1); + + copy_if_whitespaces(); + while (in >> tmp) { + out += tmp; + copy_if_whitespaces(); } return out; } diff --git a/test/test_parser1_4.cpp b/test/test_parser1_4.cpp index 1045850..82233c6 100644 --- a/test/test_parser1_4.cpp +++ b/test/test_parser1_4.cpp @@ -7,17 +7,13 @@ template struct has_type> : std::disjunction...> {}; -static inline void check_size(size_t size1, size_t size2) { - CHECK_EQ(size1, size2); -} - -template +template static void test_fields_impl(const std::string file_name, const std::vector& data, const std::vector& fields) { using CaseType = std::tuple; - ss::parser p{file_name, ","}; + auto [p, _] = make_parser(file_name, ","); CHECK_FALSE(p.field_exists("Unknown")); p.use_fields(fields); std::vector i; @@ -26,7 +22,7 @@ static void test_fields_impl(const std::string file_name, i.push_back(a); } - check_size(i.size(), data.size()); + CHECK_EQ(i.size(), data.size()); for (size_t j = 0; j < i.size(); ++j) { if constexpr (has_type::value) { CHECK_EQ(std::get(i[j]), data[j].i); @@ -43,11 +39,16 @@ static void test_fields_impl(const std::string file_name, template static void test_fields(const std::string file_name, const std::vector& data, const std::vector& fields) { - test_fields_impl, Ts...>(file_name, data, fields); - test_fields_impl, Ts...>(file_name, data, - fields); - test_fields_impl, Ts...>(file_name, data, - fields); + test_fields_impl, Ts...>(file_name, data, fields); + test_fields_impl, Ts...>(file_name, data, + fields); + test_fields_impl, Ts...>(file_name, + data, fields); + test_fields_impl, Ts...>(file_name, data, fields); + test_fields_impl, Ts...>(file_name, data, + fields); + test_fields_impl, Ts...>(file_name, + data, fields); } TEST_CASE("parser test various cases with header") { @@ -196,34 +197,37 @@ TEST_CASE("parser test various cases with header") { test_fields(o, d, {Dbl, Int, Str}); } -template +template void test_invalid_fields_impl(const std::vector& lines, const std::vector& fields) { unique_file_name f{"test_parser"}; - std::ofstream out{f.name}; - for (const auto& line : lines) { - out << line << std::endl; + { + std::ofstream out{f.name}; + for (const auto& line : lines) { + out << line << std::endl; + } } - out.close(); { // No fields specified - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields(); }; + auto [p, _] = make_parser(f.name, ","); + auto command = [&p = p] { p.use_fields(); }; expect_error_on_command(p, command); } { // Unknown field - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields("Unknown"); }; + auto [p, _] = make_parser(f.name, ","); + auto command = [&p = p] { p.use_fields("Unknown"); }; expect_error_on_command(p, command); } { // Field used multiple times - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields(fields.at(0), fields.at(0)); }; + auto [p, _] = make_parser(f.name, ","); + auto command = [&p = p, &fields = fields] { + p.use_fields(fields.at(0), fields.at(0)); + }; if (!fields.empty()) { expect_error_on_command(p, command); } @@ -231,8 +235,8 @@ void test_invalid_fields_impl(const std::vector& lines, { // Mapping out of range - ss::parser p{f.name, ","}; - auto command = [&] { + auto [p, _] = make_parser(f.name, ","); + auto command = [&p = p, &fields = fields] { p.use_fields(fields.at(0)); p.template get_next(); }; @@ -243,8 +247,8 @@ void test_invalid_fields_impl(const std::vector& lines, { // Invalid header - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields(fields); }; + auto [p, _] = make_parser(f.name, ","); + auto command = [&p = p, &fields = fields] { p.use_fields(fields); }; if (!fields.empty()) { // Pass if there are no duplicates, fail otherwise @@ -267,9 +271,12 @@ void test_invalid_fields_impl(const std::vector& lines, template void test_invalid_fields(const std::vector& lines, const std::vector& fields) { - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); } TEST_CASE("parser test invalid header fields usage") { @@ -296,7 +303,7 @@ TEST_CASE("parser test invalid header fields usage") { test_invalid_fields({"Int,String,Int", "1,hi,3"}, {"Int", "String", "Int"}); } -template +template void test_invalid_rows_with_header() { unique_file_name f{"test_parser"}; { @@ -311,7 +318,7 @@ void test_invalid_rows_with_header() { } { - ss::parser p{f.name}; + auto [p, _] = make_parser(f.name); p.use_fields("Int", "String", "Double"); using data = std::tuple; @@ -337,7 +344,7 @@ void test_invalid_rows_with_header() { } { - ss::parser p{f.name}; + auto [p, _] = make_parser(f.name); p.use_fields("Double", "Int"); using data = std::tuple; @@ -361,7 +368,7 @@ void test_invalid_rows_with_header() { } { - ss::parser p{f.name}; + auto [p, _] = make_parser(f.name); p.use_fields("String", "Double"); using data = std::tuple; @@ -389,12 +396,15 @@ void test_invalid_rows_with_header() { } TEST_CASE("parser test invalid rows with header") { - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); + test_invalid_rows_with_header(); + test_invalid_rows_with_header(); + test_invalid_rows_with_header(); + test_invalid_rows_with_header(); + test_invalid_rows_with_header(); + test_invalid_rows_with_header(); } -template +template void test_ignore_empty_impl(const std::vector& data) { unique_file_name f{"test_parser"}; make_and_write(f.name, data); @@ -407,7 +417,8 @@ void test_ignore_empty_impl(const std::vector& data) { } { - ss::parser p{f.name, ","}; + auto [p, _] = + make_parser(f.name, ","); std::vector i; for (const auto& a : p.template iterate()) { @@ -418,7 +429,7 @@ void test_ignore_empty_impl(const std::vector& data) { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; size_t n = 0; while (!p.eof()) { @@ -441,9 +452,12 @@ void test_ignore_empty_impl(const std::vector& data) { template void test_ignore_empty(const std::vector& data) { - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); + test_ignore_empty_impl(data); + test_ignore_empty_impl(data); + test_ignore_empty_impl(data); + test_ignore_empty_impl(data); + test_ignore_empty_impl(data); + test_ignore_empty_impl(data); } TEST_CASE("parser test various cases with empty lines") { From baf4317ffaef044bcbff0b4f6000aece3c4aca95 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 18:58:51 +0100 Subject: [PATCH 12/57] Add more unit tests for buffer mode --- test/test_helpers.hpp | 52 +++++++++++++++++++++++++++++++++++++++++++ test/test_parser1.hpp | 50 ----------------------------------------- test/test_parser2.hpp | 22 ++++++++++++------ 3 files changed, 67 insertions(+), 57 deletions(-) diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index 2509edf..78d9ab9 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #ifdef CMAKE_GITHUB_CI #include @@ -134,4 +135,55 @@ std::vector> vector_combinations(const std::vector& v, } return ret; } + +std::string make_buffer(const std::string& file_name) { + std::ifstream in{file_name, std::ios::binary}; + std::string tmp; + std::string out; + + auto copy_if_whitespaces = [&] { + std::string matches = "\n\r\t "; + while (std::any_of(matches.begin(), matches.end(), + [&](auto c) { return in.peek() == c; })) { + if (in.peek() == '\r') { + out += "\r\n"; + in.ignore(2); + } else { + out += std::string{static_cast(in.peek())}; + in.ignore(1); + } + } + }; + + out.reserve(sizeof(out) + 1); + + copy_if_whitespaces(); + while (in >> tmp) { + out += tmp; + copy_if_whitespaces(); + } + return out; +} + +template +std::tuple, std::string> make_parser( + const std::string& file_name, const std::string& delim = "") { + if (buffer_mode) { + auto buffer = make_buffer(file_name); + if (delim.empty()) { + return {ss::parser{buffer.data(), buffer.size()}, + std::move(buffer)}; + } else { + return {ss::parser{buffer.data(), buffer.size(), delim}, + std::move(buffer)}; + } + } else { + if (delim.empty()) { + return {ss::parser{file_name}, std::string{}}; + } else { + return {ss::parser{file_name, delim}, std::string{}}; + } + } +} + } /* namespace */ diff --git a/test/test_parser1.hpp b/test/test_parser1.hpp index 32365c0..a0fd9ce 100644 --- a/test/test_parser1.hpp +++ b/test/test_parser1.hpp @@ -105,54 +105,4 @@ static void make_and_write(const std::string& file_name, } } -std::string make_buffer(const std::string& file_name) { - std::ifstream in{file_name, std::ios::binary}; - std::string tmp; - std::string out; - - auto copy_if_whitespaces = [&] { - std::string matches = "\n\r\t "; - while (std::any_of(matches.begin(), matches.end(), - [&](auto c) { return in.peek() == c; })) { - if (in.peek() == '\r') { - out += "\r\n"; - in.ignore(2); - } else { - out += std::string{static_cast(in.peek())}; - in.ignore(1); - } - } - }; - - out.reserve(sizeof(out) + 1); - - copy_if_whitespaces(); - while (in >> tmp) { - out += tmp; - copy_if_whitespaces(); - } - return out; -} - -template -std::tuple, std::string> make_parser( - const std::string& file_name, const std::string& delim = "") { - if (buffer_mode) { - auto buffer = make_buffer(file_name); - if (delim.empty()) { - return {ss::parser{buffer.data(), buffer.size()}, - std::move(buffer)}; - } else { - return {ss::parser{buffer.data(), buffer.size(), delim}, - std::move(buffer)}; - } - } else { - if (delim.empty()) { - return {ss::parser{file_name}, std::string{}}; - } else { - return {ss::parser{file_name, delim}, std::string{}}; - } - } -} - } /* namespace */ diff --git a/test/test_parser2.hpp b/test/test_parser2.hpp index 262cb92..ba6fd1a 100644 --- a/test/test_parser2.hpp +++ b/test/test_parser2.hpp @@ -121,7 +121,7 @@ column make_column(const std::string& input_header, } [[maybe_unused]] void replace_all2(std::string& s, const std::string& old_value, - const std::string& new_value) { + const std::string& new_value) { for (size_t i = 0; i < 999; ++i) { size_t pos = s.find(old_value); if (pos == std::string::npos) { @@ -257,7 +257,8 @@ std::vector generate_csv_data(const std::vector& data, } [[maybe_unused]] void write_to_file(const std::vector& data, - const std::string& delim, const std::string& file_name) { + const std::string& delim, + const std::string& file_name) { std::ofstream out{file_name, std::ios_base::app}; std::string line; for (size_t i = 0; i < data.size(); ++i) { @@ -299,7 +300,7 @@ std::vector generate_csv_data(const std::vector& data, CHECK(V1 == V2); \ } -template +template void test_data_combinations(const std::vector& input_data, const std::string& delim, bool include_header) { using setup = ss::setup; @@ -388,7 +389,7 @@ void test_data_combinations(const std::vector& input_data, } for (const auto& layout : unique_layout_combinations) { - ss::parser p{f.name, delim}; + auto [p, _] = make_parser(f.name, delim); if (include_header && !setup::ignore_header) { std::vector fields; @@ -409,7 +410,7 @@ void test_data_combinations(const std::vector& input_data, REQUIRE(p.valid()); } - auto check_error = [&p] { + auto check_error = [&p = p] { CHECK(p.valid()); if (!p.valid()) { if constexpr (setup::string_error) { @@ -570,8 +571,14 @@ void test_option_combinations() { {columns0, columns1, columns2, columns3, columns4, columns5, columns6, columns7}) { try { - test_data_combinations(columns, delimiter, false); - test_data_combinations(columns, delimiter, true); + test_data_combinations(columns, delimiter, + false); + test_data_combinations(columns, delimiter, + true); + test_data_combinations(columns, delimiter, + false); + test_data_combinations(columns, delimiter, + true); } catch (std::exception& e) { std::cout << typeid(ss::parser).name() << std::endl; FAIL_CHECK(std::string{e.what()}); @@ -616,6 +623,7 @@ void test_option_combinations3() { } /* namespace */ +// Tests split into multiple compilation units #if 0 TEST_CASE("parser test various cases version 2 segment 1") { From 417a03a8a4847351f8545436dc0b2af2514fcc64 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 19:36:41 +0100 Subject: [PATCH 13/57] Fix unit tests build --- test/test_helpers.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index 78d9ab9..a74d044 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -5,7 +5,6 @@ #include #include #include -#include #ifdef CMAKE_GITHUB_CI #include @@ -13,6 +12,11 @@ #include #endif +namespace ss { +template +class parser; +} /* ss */ + namespace { struct buffer { std::string data_; @@ -185,5 +189,4 @@ std::tuple, std::string> make_parser( } } } - } /* namespace */ From a2666816de2403cdadbf649d96949f02c4a9566e Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 19:58:25 +0100 Subject: [PATCH 14/57] Add maybe_unused attribute to test helper functions --- test/test_helpers.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index a74d044..d5afeae 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -39,7 +39,7 @@ struct buffer { [[maybe_unused]] inline buffer buff; -std::string time_now_rand() { +[[maybe_unused]] std::string time_now_rand() { srand(time(nullptr)); std::stringstream ss; auto t = std::time(nullptr); @@ -120,8 +120,8 @@ struct unique_file_name { } template -std::vector> vector_combinations(const std::vector& v, - size_t n) { +[[maybe_unused]] std::vector> vector_combinations( + const std::vector& v, size_t n) { std::vector> ret; if (n <= 1) { for (const auto& i : v) { @@ -140,7 +140,7 @@ std::vector> vector_combinations(const std::vector& v, return ret; } -std::string make_buffer(const std::string& file_name) { +[[maybe_unused]] std::string make_buffer(const std::string& file_name) { std::ifstream in{file_name, std::ios::binary}; std::string tmp; std::string out; @@ -170,7 +170,7 @@ std::string make_buffer(const std::string& file_name) { } template -std::tuple, std::string> make_parser( +[[maybe_unused]] std::tuple, std::string> make_parser( const std::string& file_name, const std::string& delim = "") { if (buffer_mode) { auto buffer = make_buffer(file_name); From d019edb2bfea5dcff2f4dca376995b226a033898 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 21:58:06 +0100 Subject: [PATCH 15/57] [skip ci] Update unit test CMakeList for MINGW --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8439639..e6bbc61 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ endif() if (MSVC) add_compile_options(/bigobj) elseif (MINGW) - add_compile_options(-Wa,-mbig-obj) + add_compile_options(-Wa,-mbig-obj,-mcmodel=large) endif () include(FetchContent) From d21c387a331962aa4e6e86fd192e2afec3e99f20 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 23:07:26 +0100 Subject: [PATCH 16/57] [skip ci] Try fix MINGW ci --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e6bbc61..83e469c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ endif() if (MSVC) add_compile_options(/bigobj) elseif (MINGW) - add_compile_options(-Wa,-mbig-obj,-mcmodel=large) + add_compile_options(-Wa,-mbig-obj,-Wl,--default-image-base-low) endif () include(FetchContent) From 0466c7234cbc66011464f4d5be37f11c1b5410ce Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 23:13:16 +0100 Subject: [PATCH 17/57] [skip ci] Try fix MINGW ci --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 83e469c..59fa1b2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ endif() if (MSVC) add_compile_options(/bigobj) elseif (MINGW) - add_compile_options(-Wa,-mbig-obj,-Wl,--default-image-base-low) + add_compile_options(-Wa -mbig-obj -Wl --default-image-base-low) endif () include(FetchContent) From 8bb773625b9adacca00028e524ab28d1217e57a1 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 23:16:10 +0100 Subject: [PATCH 18/57] [skip ci] Try fix MINGW ci --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 59fa1b2..c897501 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ endif() if (MSVC) add_compile_options(/bigobj) elseif (MINGW) - add_compile_options(-Wa -mbig-obj -Wl --default-image-base-low) + add_compile_options(-Wa,-mbig-obj -Wl,--default-image-base-low) endif () include(FetchContent) From 45b840a30a05907695b9826a15df4e0fc6dacc6a Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 23:22:37 +0100 Subject: [PATCH 19/57] [skip ci] Try fix MINGW ci --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c897501..7207030 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ endif() if (MSVC) add_compile_options(/bigobj) elseif (MINGW) - add_compile_options(-Wa,-mbig-obj -Wl,--default-image-base-low) + add_compile_options(-Wa,-mbig-obj -mcmodel=large) endif () include(FetchContent) From 11d57bd07376e75717b9458c2d965425f3c7718e Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 18 Feb 2024 23:40:53 +0100 Subject: [PATCH 20/57] [skip ci] Try fix MINGW ci --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7207030..cb0ca65 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ endif() if (MSVC) add_compile_options(/bigobj) elseif (MINGW) - add_compile_options(-Wa,-mbig-obj -mcmodel=large) + add_compile_options(-Wa,-mbig-obj -mcmodel=medium) endif () include(FetchContent) From e4fba8a918584bb2dedfa594d54b3e865a52eb23 Mon Sep 17 00:00:00 2001 From: ado Date: Mon, 19 Feb 2024 00:04:09 +0100 Subject: [PATCH 21/57] Split parser tests part 2 into additional segments --- test/CMakeLists.txt | 6 +++--- test/meson.build | 2 ++ test/test_parser2.hpp | 14 +++++++++++--- test/test_parser2_1.cpp | 2 -- test/test_parser2_2.cpp | 4 +--- test/test_parser2_3.cpp | 5 ++--- test/test_parser2_4.cpp | 14 +++++--------- test/test_parser2_5.cpp | 16 ++++++++++++++++ test/test_parser2_6.cpp | 18 ++++++++++++++++++ 9 files changed, 58 insertions(+), 23 deletions(-) create mode 100644 test/test_parser2_5.cpp create mode 100644 test/test_parser2_6.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cb0ca65..be399a1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -15,7 +15,7 @@ endif() if (MSVC) add_compile_options(/bigobj) elseif (MINGW) - add_compile_options(-Wa,-mbig-obj -mcmodel=medium) + add_compile_options(-Wa,-mbig-obj) endif () include(FetchContent) @@ -35,8 +35,8 @@ enable_testing() foreach(name IN ITEMS test_splitter test_parser1_1 test_parser1_2 test_parser1_3 test_parser1_4 test_converter test_extractions test_parser2_1 test_parser2_2 - test_parser2_3 test_parser2_4 - test_extractions_without_fast_float) + test_parser2_3 test_parser2_4 test_parser2_5 + test_parser2_6 test_extractions_without_fast_float) add_executable("${name}" "${name}.cpp") target_link_libraries("${name}" PRIVATE ssp::ssp fast_float doctest::doctest) diff --git a/test/meson.build b/test/meson.build index 454b16f..f4e6639 100644 --- a/test/meson.build +++ b/test/meson.build @@ -13,6 +13,8 @@ tests = [ 'parser2_2', 'parser2_3', 'parser2_4', + 'parser2_5', + 'parser2_6', 'extractions_without_fast_float', ] diff --git a/test/test_parser2.hpp b/test/test_parser2.hpp index ba6fd1a..e3e17db 100644 --- a/test/test_parser2.hpp +++ b/test/test_parser2.hpp @@ -635,25 +635,33 @@ TEST_CASE("parser test various cases version 2 segment 1") { using multiline_r = ss::multiline_restricted<10>; using trimr = ss::trim_right<' '>; using triml = ss::trim_left<' '>; + using trim = ss::trim<' '>; // segment 1 test_option_combinations3<>(); test_option_combinations3(); - test_option_combinations3(); // segment 2 + test_option_combinations3(); test_option_combinations3(); + + // segment 3 test_option_combinations3(); test_option_combinations3(); - // segment 3 + // segment 4 test_option_combinations3(); test_option_combinations3(); - // segment 4 + // segment 5 test_option_combinations(); test_option_combinations(); + + // segment 6 + test_option_combinations3(); + test_option_combinations3(); #else + test_option_combinations3(); #endif } diff --git a/test/test_parser2_1.cpp b/test/test_parser2_1.cpp index 6f1ac7a..55ec45a 100644 --- a/test/test_parser2_1.cpp +++ b/test/test_parser2_1.cpp @@ -3,12 +3,10 @@ TEST_CASE("parser test various cases version 2 segment 1") { #ifdef CMAKE_GITHUB_CI - using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; test_option_combinations3<>(); test_option_combinations3(); - test_option_combinations3(); #endif } diff --git a/test/test_parser2_2.cpp b/test/test_parser2_2.cpp index 5b5cc2c..ec76aa9 100644 --- a/test/test_parser2_2.cpp +++ b/test/test_parser2_2.cpp @@ -5,11 +5,9 @@ TEST_CASE("parser test various cases version 2 segment 2") { #ifdef CMAKE_GITHUB_CI using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; - using multiline = ss::multiline; + test_option_combinations3(); test_option_combinations3(); - test_option_combinations3(); - test_option_combinations3(); #endif } diff --git a/test/test_parser2_3.cpp b/test/test_parser2_3.cpp index 776f49a..aab905c 100644 --- a/test/test_parser2_3.cpp +++ b/test/test_parser2_3.cpp @@ -6,10 +6,9 @@ TEST_CASE("parser test various cases version 2 segment 3") { using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; using multiline = ss::multiline; - using multiline_r = ss::multiline_restricted<10>; - test_option_combinations3(); - test_option_combinations3(); + test_option_combinations3(); + test_option_combinations3(); #endif } diff --git a/test/test_parser2_4.cpp b/test/test_parser2_4.cpp index 4658337..776f49a 100644 --- a/test/test_parser2_4.cpp +++ b/test/test_parser2_4.cpp @@ -1,19 +1,15 @@ -#define SEGMENT_NAME "segment4" +#define SEGMENT_NAME "segment3" #include "test_parser2.hpp" -TEST_CASE("parser test various cases version 2 segment 4") { +TEST_CASE("parser test various cases version 2 segment 3") { +#ifdef CMAKE_GITHUB_CI using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; using multiline = ss::multiline; + using multiline_r = ss::multiline_restricted<10>; -#ifdef CMAKE_GITHUB_CI - using trimr = ss::trim_right<' '>; - using triml = ss::trim_left<' '>; - - test_option_combinations(); - test_option_combinations(); -#else test_option_combinations3(); + test_option_combinations3(); #endif } diff --git a/test/test_parser2_5.cpp b/test/test_parser2_5.cpp new file mode 100644 index 0000000..71ae35f --- /dev/null +++ b/test/test_parser2_5.cpp @@ -0,0 +1,16 @@ +#define SEGMENT_NAME "segment3" +#include "test_parser2.hpp" + +TEST_CASE("parser test various cases version 2 segment 3") { +#ifdef CMAKE_GITHUB_CI + using quote = ss::quote<'"'>; + using escape = ss::escape<'\\'>; + using multiline = ss::multiline; + using trimr = ss::trim_right<' '>; + using triml = ss::trim_left<' '>; + + test_option_combinations(); + test_option_combinations(); +#endif +} + diff --git a/test/test_parser2_6.cpp b/test/test_parser2_6.cpp new file mode 100644 index 0000000..49c429b --- /dev/null +++ b/test/test_parser2_6.cpp @@ -0,0 +1,18 @@ +#define SEGMENT_NAME "segment4" +#include "test_parser2.hpp" + +TEST_CASE("parser test various cases version 2 segment 4") { + using quote = ss::quote<'"'>; + using escape = ss::escape<'\\'>; + using multiline = ss::multiline; + +#ifdef CMAKE_GITHUB_CI + using trim = ss::trim<' '>; + + test_option_combinations3(); + test_option_combinations3(); +#else + test_option_combinations3(); +#endif +} + From 775b8c93e28eb2026ba166857bd31a7ca4c4364d Mon Sep 17 00:00:00 2001 From: ado Date: Mon, 19 Feb 2024 00:16:22 +0100 Subject: [PATCH 22/57] Fix parser tests part 2 segment issues --- include/ss/setup.hpp | 2 +- test/test_parser2_4.cpp | 4 ++-- test/test_parser2_5.cpp | 4 ++-- test/test_parser2_6.cpp | 11 ++--------- 4 files changed, 7 insertions(+), 14 deletions(-) diff --git a/include/ss/setup.hpp b/include/ss/setup.hpp index fd41da5..80cf4d5 100644 --- a/include/ss/setup.hpp +++ b/include/ss/setup.hpp @@ -109,7 +109,7 @@ struct get_matcher { struct is_matcher : is_instance_of_matcher {}; static_assert(count_v <= 1, - "the same matcher cannot" + "the same matcher cannot " "be defined multiple times"); using type = std::conditional_t::value, T, typename get_matcher::type>; diff --git a/test/test_parser2_4.cpp b/test/test_parser2_4.cpp index 776f49a..ea78aaf 100644 --- a/test/test_parser2_4.cpp +++ b/test/test_parser2_4.cpp @@ -1,7 +1,7 @@ -#define SEGMENT_NAME "segment3" +#define SEGMENT_NAME "segment4" #include "test_parser2.hpp" -TEST_CASE("parser test various cases version 2 segment 3") { +TEST_CASE("parser test various cases version 2 segment 4") { #ifdef CMAKE_GITHUB_CI using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; diff --git a/test/test_parser2_5.cpp b/test/test_parser2_5.cpp index 71ae35f..3f345b0 100644 --- a/test/test_parser2_5.cpp +++ b/test/test_parser2_5.cpp @@ -1,7 +1,7 @@ -#define SEGMENT_NAME "segment3" +#define SEGMENT_NAME "segment5" #include "test_parser2.hpp" -TEST_CASE("parser test various cases version 2 segment 3") { +TEST_CASE("parser test various cases version 2 segment 5") { #ifdef CMAKE_GITHUB_CI using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; diff --git a/test/test_parser2_6.cpp b/test/test_parser2_6.cpp index 49c429b..d6ec30f 100644 --- a/test/test_parser2_6.cpp +++ b/test/test_parser2_6.cpp @@ -1,18 +1,11 @@ -#define SEGMENT_NAME "segment4" +#define SEGMENT_NAME "segment6" #include "test_parser2.hpp" -TEST_CASE("parser test various cases version 2 segment 4") { +TEST_CASE("parser test various cases version 2 segment 6") { using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; using multiline = ss::multiline; -#ifdef CMAKE_GITHUB_CI - using trim = ss::trim<' '>; - test_option_combinations3(); - test_option_combinations3(); -#else - test_option_combinations3(); -#endif } From aaa22046a52176c6e2615a4b5891ff1d7024786f Mon Sep 17 00:00:00 2001 From: ado Date: Mon, 19 Feb 2024 01:00:42 +0100 Subject: [PATCH 23/57] Add null data buffer error handler and unit test, resolve TODOs --- include/ss/parser.hpp | 45 ++++++++++++++++++++++++--------------- ssp.hpp | 47 +++++++++++++++++++++++++---------------- test/test_parser1_1.cpp | 19 +++++++++++++++++ 3 files changed, 76 insertions(+), 35 deletions(-) diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index cd5e7b1..51031b5 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -52,11 +52,16 @@ public: const std::string& delim = ss::default_delimiter) : file_name_{"buffer line"}, reader_{csv_data_buffer, csv_data_size, delim} { - read_line(); - if constexpr (ignore_header) { - ignore_next(); + if (csv_data_buffer) { + read_line(); + if constexpr (ignore_header) { + ignore_next(); + } else { + raw_header_ = reader_.get_buffer(); + } } else { - raw_header_ = reader_.get_buffer(); + handle_error_null_buffer(); + eof_ = true; } } @@ -524,6 +529,19 @@ private: } } + void handle_error_null_buffer() { + constexpr static auto error_msg = " received null data buffer"; + + if constexpr (string_error) { + error_.clear(); + error_.append(file_name_).append(error_msg); + } else if constexpr (throw_on_error) { + throw ss::exception{file_name_ + error_msg}; + } else { + error_ = true; + } + } + void handle_error_file_not_open() { constexpr static auto error_msg = " could not be opened"; @@ -728,17 +746,11 @@ private: size_t pos; int c; - // TODO remove check - if (lineptr == nullptr || buffer == nullptr || n == nullptr) { - return -1; - } - if (curr_char >= csv_data_size) { return -1; } c = buffer[curr_char++]; - // TODO maybe remove this too if (*lineptr == nullptr) { *lineptr = static_cast(malloc(128)); if (*lineptr == nullptr) { @@ -751,13 +763,11 @@ private: while (curr_char <= csv_data_size) { if (pos + 1 >= *n) { size_t new_size = *n + (*n >> 2); - // TODO maybe remove this too if (new_size < 128) { new_size = 128; } char* new_ptr = static_cast( realloc(static_cast(*lineptr), new_size)); - // TODO check for failed malloc in the callee if (new_ptr == nullptr) { return -1; } @@ -924,11 +934,11 @@ private: } void realloc_concat(char*& first, size_t& first_size, - const char* const second, size_t second_size) { - // TODO make buffer_size an argument - next_line_buffer_size_ = first_size + second_size + 3; + size_t& buffer_size, const char* const second, + size_t second_size) { + buffer_size = first_size + second_size + 3; auto new_first = static_cast( - realloc(static_cast(first), next_line_buffer_size_)); + realloc(static_cast(first), buffer_size)); if (!first) { throw std::bad_alloc{}; } @@ -958,7 +968,8 @@ private: ++line_number_; size_t next_size = remove_eol(helper_buffer_, next_ssize); - realloc_concat(buffer, size, helper_buffer_, next_size); + realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_, + next_size); return true; } diff --git a/ssp.hpp b/ssp.hpp index d0d21e4..807e55e 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -803,7 +803,7 @@ struct get_matcher { struct is_matcher : is_instance_of_matcher {}; static_assert(count_v <= 1, - "the same matcher cannot" + "the same matcher cannot " "be defined multiple times"); using type = std::conditional_t::value, T, typename get_matcher::type>; @@ -2178,11 +2178,16 @@ public: const std::string& delim = ss::default_delimiter) : file_name_{"buffer line"}, reader_{csv_data_buffer, csv_data_size, delim} { - read_line(); - if constexpr (ignore_header) { - ignore_next(); + if (csv_data_buffer) { + read_line(); + if constexpr (ignore_header) { + ignore_next(); + } else { + raw_header_ = reader_.get_buffer(); + } } else { - raw_header_ = reader_.get_buffer(); + handle_error_null_buffer(); + eof_ = true; } } @@ -2650,6 +2655,19 @@ private: } } + void handle_error_null_buffer() { + constexpr static auto error_msg = " received null data buffer"; + + if constexpr (string_error) { + error_.clear(); + error_.append(file_name_).append(error_msg); + } else if constexpr (throw_on_error) { + throw ss::exception{file_name_ + error_msg}; + } else { + error_ = true; + } + } + void handle_error_file_not_open() { constexpr static auto error_msg = " could not be opened"; @@ -2854,17 +2872,11 @@ private: size_t pos; int c; - // TODO remove check - if (lineptr == nullptr || buffer == nullptr || n == nullptr) { - return -1; - } - if (curr_char >= csv_data_size) { return -1; } c = buffer[curr_char++]; - // TODO maybe remove this too if (*lineptr == nullptr) { *lineptr = static_cast(malloc(128)); if (*lineptr == nullptr) { @@ -2877,13 +2889,11 @@ private: while (curr_char <= csv_data_size) { if (pos + 1 >= *n) { size_t new_size = *n + (*n >> 2); - // TODO maybe remove this too if (new_size < 128) { new_size = 128; } char* new_ptr = static_cast( realloc(static_cast(*lineptr), new_size)); - // TODO check for failed malloc in the callee if (new_ptr == nullptr) { return -1; } @@ -3050,11 +3060,11 @@ private: } void realloc_concat(char*& first, size_t& first_size, - const char* const second, size_t second_size) { - // TODO make buffer_size an argument - next_line_buffer_size_ = first_size + second_size + 3; + size_t& buffer_size, const char* const second, + size_t second_size) { + buffer_size = first_size + second_size + 3; auto new_first = static_cast( - realloc(static_cast(first), next_line_buffer_size_)); + realloc(static_cast(first), buffer_size)); if (!first) { throw std::bad_alloc{}; } @@ -3084,7 +3094,8 @@ private: ++line_number_; size_t next_size = remove_eol(helper_buffer_, next_ssize); - realloc_concat(buffer, size, helper_buffer_, next_size); + realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_, + next_size); return true; } diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp index 60787f2..26e463e 100644 --- a/test/test_parser1_1.cpp +++ b/test/test_parser1_1.cpp @@ -21,6 +21,25 @@ TEST_CASE("test file not found") { } } +TEST_CASE("test null buffer") { + { + ss::parser p{nullptr, 10, ","}; + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{nullptr, 10, ","}; + CHECK_FALSE(p.valid()); + } + + try { + ss::parser p{nullptr, 10, ","}; + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } +} + template void test_various_cases() { unique_file_name f{"test_parser"}; From d4226674775427b5c6c42d097386a5f3f0b44a78 Mon Sep 17 00:00:00 2001 From: ado Date: Mon, 19 Feb 2024 01:16:19 +0100 Subject: [PATCH 24/57] [skip ci] Remove some magic numbers --- include/ss/common.hpp | 7 ++++--- include/ss/parser.hpp | 7 ++++--- ssp.hpp | 14 ++++++++------ 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 430f8ee..70c8241 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -12,6 +12,7 @@ using string_range = std::pair; using split_data = std::vector; constexpr inline auto default_delimiter = ","; +constexpr static auto get_line_initial_buffer_size = 128; template inline void assert_string_error_defined() { @@ -46,7 +47,7 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { } if (*lineptr == nullptr) { - *lineptr = static_cast(malloc(128)); + *lineptr = static_cast(malloc(get_line_initial_buffer_size)); if (*lineptr == nullptr) { return -1; } @@ -57,8 +58,8 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { while (c != EOF) { if (pos + 1 >= *n) { size_t new_size = *n + (*n >> 2); - if (new_size < 128) { - new_size = 128; + if (new_size < get_line_initial_buffer_size) { + new_size = get_line_initial_buffer_size; } char* new_ptr = static_cast( realloc(static_cast(*lineptr), new_size)); diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 51031b5..a7319ee 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -752,7 +752,8 @@ private: c = buffer[curr_char++]; if (*lineptr == nullptr) { - *lineptr = static_cast(malloc(128)); + *lineptr = + static_cast(malloc(get_line_initial_buffer_size)); if (*lineptr == nullptr) { return -1; } @@ -763,8 +764,8 @@ private: while (curr_char <= csv_data_size) { if (pos + 1 >= *n) { size_t new_size = *n + (*n >> 2); - if (new_size < 128) { - new_size = 128; + if (new_size < get_line_initial_buffer_size) { + new_size = get_line_initial_buffer_size; } char* new_ptr = static_cast( realloc(static_cast(*lineptr), new_size)); diff --git a/ssp.hpp b/ssp.hpp index 807e55e..9b555ec 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -625,6 +625,7 @@ using string_range = std::pair; using split_data = std::vector; constexpr inline auto default_delimiter = ","; +constexpr static auto get_line_initial_buffer_size = 128; template inline void assert_string_error_defined() { @@ -659,7 +660,7 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { } if (*lineptr == nullptr) { - *lineptr = static_cast(malloc(128)); + *lineptr = static_cast(malloc(get_line_initial_buffer_size)); if (*lineptr == nullptr) { return -1; } @@ -670,8 +671,8 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { while (c != EOF) { if (pos + 1 >= *n) { size_t new_size = *n + (*n >> 2); - if (new_size < 128) { - new_size = 128; + if (new_size < get_line_initial_buffer_size) { + new_size = get_line_initial_buffer_size; } char* new_ptr = static_cast( realloc(static_cast(*lineptr), new_size)); @@ -2878,7 +2879,8 @@ private: c = buffer[curr_char++]; if (*lineptr == nullptr) { - *lineptr = static_cast(malloc(128)); + *lineptr = + static_cast(malloc(get_line_initial_buffer_size)); if (*lineptr == nullptr) { return -1; } @@ -2889,8 +2891,8 @@ private: while (curr_char <= csv_data_size) { if (pos + 1 >= *n) { size_t new_size = *n + (*n >> 2); - if (new_size < 128) { - new_size = 128; + if (new_size < get_line_initial_buffer_size) { + new_size = get_line_initial_buffer_size; } char* new_ptr = static_cast( realloc(static_cast(*lineptr), new_size)); From d4fc2ee5611850a2e2781e90990883daf93977fc Mon Sep 17 00:00:00 2001 From: ado Date: Mon, 19 Feb 2024 01:23:09 +0100 Subject: [PATCH 25/57] [skip ci] Update .gitignore --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index 38a1e99..eb880af 100644 --- a/.gitignore +++ b/.gitignore @@ -6,5 +6,3 @@ build/ hbuild/ subprojects/* !subprojects/*.wrap -ssp.cpp -ssp.bin From 9d96a7d47f3843113d084d84b1cb6fd180633845 Mon Sep 17 00:00:00 2001 From: ado Date: Mon, 19 Feb 2024 01:59:10 +0100 Subject: [PATCH 26/57] [skip ci] Update coverage.yml --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 4c655c2..3c284da 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -49,7 +49,7 @@ jobs: apt install -y gcovr lcov - name: Configure - run: cmake -S test -B build -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-Wall -fprofile-arcs -ftest-coverage --coverage" + run: cmake -S test -B build -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-Wall -fprofile-arcs -ftest-coverage --coverage -fno-elide-constructors -fno-default-inline" - name: Build run: cmake --build build -j ${{steps.cores.outputs.count}} From 59f6591da3a9bdfcb8844ccc37afe46fef4c7e17 Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 21 Feb 2024 01:25:53 +0100 Subject: [PATCH 27/57] [skip ci] Update coverage.yml --- .github/workflows/coverage.yml | 2 +- include/ss/parser.hpp | 32 ++++++++++++++++++-------------- test/test_helpers.hpp | 4 +++- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 3c284da..6df81f4 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -60,7 +60,7 @@ jobs: - name: Generate coverage report run: | - lcov -d . -c -o out.info --rc lcov_branch_coverage=1 --no-external + lcov -d . -c -o out.info --rc lcov_branch_coverage=1 --no-external --filter branch --filter line lcov -e out.info '*include/ss*hpp' -o filtered.info - name: Invoke coveralls diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index a7319ee..ac6f204 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -368,20 +368,22 @@ public: template void try_convert_and_invoke(std::optional& value, Fun&& fun) { - if (!parser_.valid()) { - auto tuple_output = try_same(); - if (!parser_.valid()) { - return; - } - - if constexpr (!std::is_same_v) { - value = to_object(std::move(tuple_output)); - } else { - value = std::move(tuple_output); - } - - parser_.try_invoke(*value, std::forward(fun)); + if (parser_.valid()) { + return; } + + auto tuple_output = try_same(); + if (!parser_.valid()) { + return; + } + + if constexpr (!std::is_same_v) { + value = to_object(std::move(tuple_output)); + } else { + value = std::move(tuple_output); + } + + parser_.try_invoke(*value, std::forward(fun)); } template @@ -918,6 +920,7 @@ private: } size_t remove_eol(char*& buffer, size_t ssize) { + // TODO write unit tests if (buffer[ssize - 1] != '\n') { return ssize; } @@ -940,7 +943,8 @@ private: buffer_size = first_size + second_size + 3; auto new_first = static_cast( realloc(static_cast(first), buffer_size)); - if (!first) { + // TODO check + if (!new_first) { throw std::bad_alloc{}; } diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index d5afeae..8df69d9 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #ifdef CMAKE_GITHUB_CI #include @@ -134,7 +136,7 @@ template for (const auto& i : v) { for (auto j : inner_combinations) { j.insert(j.begin(), i); - ret.push_back(move(j)); + ret.push_back(std::move(j)); } } return ret; From 5e32d722e81a5feca302fdaefb980d77e8a23875 Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 21 Feb 2024 02:19:14 +0100 Subject: [PATCH 28/57] [skip ci] Update coverage.yml, add unit test for files and buffers without new line at the end of the file --- .github/workflows/coverage.yml | 4 +++- include/ss/parser.hpp | 2 -- test/test_parser1.hpp | 8 ++++++-- test/test_parser1_1.cpp | 29 +++++++++++++++++++++++++++++ 4 files changed, 38 insertions(+), 5 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 6df81f4..fa59e39 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -60,7 +60,9 @@ jobs: - name: Generate coverage report run: | - lcov -d . -c -o out.info --rc lcov_branch_coverage=1 --no-external --filter branch --filter line + lcov --version + lcov --help + lcov -d . -c -o out.info --rc lcov_branch_coverage=1 --no-external lcov -e out.info '*include/ss*hpp' -o filtered.info - name: Invoke coveralls diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index ac6f204..72ed190 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -920,7 +920,6 @@ private: } size_t remove_eol(char*& buffer, size_t ssize) { - // TODO write unit tests if (buffer[ssize - 1] != '\n') { return ssize; } @@ -943,7 +942,6 @@ private: buffer_size = first_size + second_size + 3; auto new_first = static_cast( realloc(static_cast(first), buffer_size)); - // TODO check if (!new_first) { throw std::bad_alloc{}; } diff --git a/test/test_parser1.hpp b/test/test_parser1.hpp index a0fd9ce..90267c8 100644 --- a/test/test_parser1.hpp +++ b/test/test_parser1.hpp @@ -80,7 +80,8 @@ std::enable_if_t, bool> operator==(const T& lhs, template static void make_and_write(const std::string& file_name, const std::vector& data, - const std::vector& header = {}) { + const std::vector& header = {}, + bool new_line_eof = true) { std::ofstream out{file_name}; #ifdef _WIN32 @@ -101,7 +102,10 @@ static void make_and_write(const std::string& file_name, } for (size_t i = 0; i < data.size(); ++i) { - out << data[i].to_string() << new_lines[i % new_lines.size()]; + out << data[i].to_string(); + if (new_line_eof || i + 1 < data.size()) { + out << new_lines[i % new_lines.size()]; + } } } diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp index 26e463e..4c8d476 100644 --- a/test/test_parser1_1.cpp +++ b/test/test_parser1_1.cpp @@ -551,3 +551,32 @@ TEST_CASE("parser test composite conversion") { test_composite_conversion(); test_composite_conversion(); } + +template +void test_no_new_line_at_eof_impl(const std::vector& data) { + unique_file_name f{"test_parser"}; + make_and_write(f.name, data, {}, false); + + auto [p, _] = make_parser(f.name); + std::vector parsed_data; + + for (const auto& el : p.template iterate()) { + parsed_data.push_back(el); + } + + CHECK_EQ(data, parsed_data); +} + +template +void test_no_new_line_at_eof() { + test_no_new_line_at_eof_impl({}); + test_no_new_line_at_eof_impl({{1, 2, "X"}}); + test_no_new_line_at_eof_impl({{1, 2, "X"}, {3, 4, "YY"}}); + test_no_new_line_at_eof_impl( + {{1, 2, "X"}, {3, 4, "YY"}, {5, 6, "ZZZ"}, {7, 8, "UUU"}}); +} + +TEST_CASE("test no new line at end of data") { + test_no_new_line_at_eof(); + test_no_new_line_at_eof(); +} From b993eb8852a147ffa77b274b5873bbb9decf5350 Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 21 Feb 2024 19:41:06 +0100 Subject: [PATCH 29/57] [skip ci] Update coverage.yml to work with lcov2.0 --- .github/workflows/coverage.yml | 5 ++++- include/ss/common.hpp | 1 + include/ss/parser.hpp | 1 + script/ci_install_lcov.sh | 7 +++++++ test/test_parser2.hpp | 3 +++ 5 files changed, 16 insertions(+), 1 deletion(-) create mode 100755 script/ci_install_lcov.sh diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index fa59e39..19869c8 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -46,7 +46,10 @@ jobs: - name: Install test coverage tools run: | apt update - apt install -y gcovr lcov + apt install -y gcovr + + - name: Install lcov2.0 + run: script/ci_install_lcov.sh - name: Configure run: cmake -S test -B build -D CMAKE_BUILD_TYPE=Debug -D CMAKE_CXX_FLAGS="-Wall -fprofile-arcs -ftest-coverage --coverage -fno-elide-constructors -fno-default-inline" diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 70c8241..095561c 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -31,6 +31,7 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { return getline(lineptr, n, stream); } #else +// TODO set ERRNO ? using ssize_t = int64_t; inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { size_t pos; diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 72ed190..b715e75 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -742,6 +742,7 @@ private: reader(const reader& other) = delete; reader& operator=(const reader& other) = delete; + // TODO set error numbers on error ssize_t get_line_buffer(char** lineptr, size_t* n, const char* const buffer, size_t csv_data_size, size_t& curr_char) { diff --git a/script/ci_install_lcov.sh b/script/ci_install_lcov.sh new file mode 100755 index 0000000..2dbe193 --- /dev/null +++ b/script/ci_install_lcov.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh + +echo yes | cpan DateTime Capture::Tiny + +wget -qO- https://github.com/linux-test-project/lcov/releases/download/v2.0/lcov-2.0.tar.gz | tar xvz +(cd lcov && make install) +lcov --version diff --git a/test/test_parser2.hpp b/test/test_parser2.hpp index e3e17db..1db140d 100644 --- a/test/test_parser2.hpp +++ b/test/test_parser2.hpp @@ -261,11 +261,14 @@ std::vector generate_csv_data(const std::vector& data, const std::string& file_name) { std::ofstream out{file_name, std::ios_base::app}; std::string line; + + // TODO remove new line at eof randomly for (size_t i = 0; i < data.size(); ++i) { line += data[i]; if (i != data.size() - 1) { line += delim; } + } out << line << std::endl; From cbbe0acb25dc95a0e483a95ab737296be0c60533 Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 21 Feb 2024 20:06:32 +0100 Subject: [PATCH 30/57] [skip ci] Fix ci_install_lcov.sh --- script/ci_install_lcov.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/ci_install_lcov.sh b/script/ci_install_lcov.sh index 2dbe193..1dda243 100755 --- a/script/ci_install_lcov.sh +++ b/script/ci_install_lcov.sh @@ -3,5 +3,5 @@ echo yes | cpan DateTime Capture::Tiny wget -qO- https://github.com/linux-test-project/lcov/releases/download/v2.0/lcov-2.0.tar.gz | tar xvz -(cd lcov && make install) +(cd lcov-2.0 && make install) lcov --version From 90a116ac7bb4fe82e3a816abac3d41110ad80b01 Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 21 Feb 2024 20:19:42 +0100 Subject: [PATCH 31/57] [skip ci] Update coverage.yml, add lcov filters --- .github/workflows/coverage.yml | 4 +--- script/ci_install_lcov.sh | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 19869c8..0b35c43 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -63,9 +63,7 @@ jobs: - name: Generate coverage report run: | - lcov --version - lcov --help - lcov -d . -c -o out.info --rc lcov_branch_coverage=1 --no-external + lcov -d . -c -o out.info --rc lcov_branch_coverage=1 --no-external --filter branch --filter line lcov -e out.info '*include/ss*hpp' -o filtered.info - name: Invoke coveralls diff --git a/script/ci_install_lcov.sh b/script/ci_install_lcov.sh index 1dda243..e1280cd 100755 --- a/script/ci_install_lcov.sh +++ b/script/ci_install_lcov.sh @@ -4,4 +4,3 @@ echo yes | cpan DateTime Capture::Tiny wget -qO- https://github.com/linux-test-project/lcov/releases/download/v2.0/lcov-2.0.tar.gz | tar xvz (cd lcov-2.0 && make install) -lcov --version From 1798b4c6f31248c125cfd79285d64d549e5a82d0 Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 21 Feb 2024 20:52:34 +0100 Subject: [PATCH 32/57] [skip ci] Update coverage.yml, fix lcov usage error --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 0b35c43..50c0cc3 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -63,7 +63,7 @@ jobs: - name: Generate coverage report run: | - lcov -d . -c -o out.info --rc lcov_branch_coverage=1 --no-external --filter branch --filter line + lcov -d . -c -o out.info --rc branch_coverage=1 --no-external --filter branch --filter line --ignore-erros mismatch lcov -e out.info '*include/ss*hpp' -o filtered.info - name: Invoke coveralls From cbb0a1ad8e2de2d77170c51c238fa9fb98d096c1 Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 21 Feb 2024 21:23:38 +0100 Subject: [PATCH 33/57] [skip ci] Fix coverage.yml typo, disable extended tests --- .github/workflows/coverage.yml | 2 +- test/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 50c0cc3..a0cc019 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -63,7 +63,7 @@ jobs: - name: Generate coverage report run: | - lcov -d . -c -o out.info --rc branch_coverage=1 --no-external --filter branch --filter line --ignore-erros mismatch + lcov -d . -c -o out.info --rc branch_coverage=1 --no-external --filter branch --filter line --ignore-errors mismatch lcov -e out.info '*include/ss*hpp' -o filtered.info - name: Invoke coveralls diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index be399a1..b92273e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -41,6 +41,6 @@ foreach(name IN ITEMS test_splitter test_parser1_1 test_parser1_2 target_link_libraries("${name}" PRIVATE ssp::ssp fast_float doctest::doctest) target_compile_definitions( - "${name}" PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN CMAKE_GITHUB_CI) + "${name}" PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN _CMAKE_GITHUB_CI) add_test(NAME "${name}" COMMAND "${name}") endforeach() From 6516c6cc94319a42602b403cbfcaa11ef079890f Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 21 Feb 2024 21:38:27 +0100 Subject: [PATCH 34/57] [skip ci] Restore CMAKE_GITHUB_CI, disable some extended tests --- test/CMakeLists.txt | 2 +- test/test_parser2_1.cpp | 2 +- test/test_parser2_2.cpp | 2 +- test/test_parser2_3.cpp | 2 +- test/test_parser2_4.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b92273e..be399a1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -41,6 +41,6 @@ foreach(name IN ITEMS test_splitter test_parser1_1 test_parser1_2 target_link_libraries("${name}" PRIVATE ssp::ssp fast_float doctest::doctest) target_compile_definitions( - "${name}" PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN _CMAKE_GITHUB_CI) + "${name}" PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN CMAKE_GITHUB_CI) add_test(NAME "${name}" COMMAND "${name}") endforeach() diff --git a/test/test_parser2_1.cpp b/test/test_parser2_1.cpp index 55ec45a..746611e 100644 --- a/test/test_parser2_1.cpp +++ b/test/test_parser2_1.cpp @@ -2,7 +2,7 @@ #include "test_parser2.hpp" TEST_CASE("parser test various cases version 2 segment 1") { -#ifdef CMAKE_GITHUB_CI +#ifdef CMAKE_GITHUB_CI_ using escape = ss::escape<'\\'>; test_option_combinations3<>(); diff --git a/test/test_parser2_2.cpp b/test/test_parser2_2.cpp index ec76aa9..761e2e7 100644 --- a/test/test_parser2_2.cpp +++ b/test/test_parser2_2.cpp @@ -2,7 +2,7 @@ #include "test_parser2.hpp" TEST_CASE("parser test various cases version 2 segment 2") { -#ifdef CMAKE_GITHUB_CI +#ifdef CMAKE_GITHUB_CI_ using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; diff --git a/test/test_parser2_3.cpp b/test/test_parser2_3.cpp index aab905c..a0b7711 100644 --- a/test/test_parser2_3.cpp +++ b/test/test_parser2_3.cpp @@ -2,7 +2,7 @@ #include "test_parser2.hpp" TEST_CASE("parser test various cases version 2 segment 3") { -#ifdef CMAKE_GITHUB_CI +#ifdef CMAKE_GITHUB_CI_ using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; using multiline = ss::multiline; diff --git a/test/test_parser2_4.cpp b/test/test_parser2_4.cpp index ea78aaf..db0581f 100644 --- a/test/test_parser2_4.cpp +++ b/test/test_parser2_4.cpp @@ -2,7 +2,7 @@ #include "test_parser2.hpp" TEST_CASE("parser test various cases version 2 segment 4") { -#ifdef CMAKE_GITHUB_CI +#ifdef CMAKE_GITHUB_CI_ using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; using multiline = ss::multiline; From 57ba23c5742585a1f06f377b000a69c194a9e3df Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 21 Feb 2024 21:53:01 +0100 Subject: [PATCH 35/57] [skip ci] Restore extended tests --- test/test_parser2_1.cpp | 2 +- test/test_parser2_2.cpp | 2 +- test/test_parser2_3.cpp | 2 +- test/test_parser2_4.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_parser2_1.cpp b/test/test_parser2_1.cpp index 746611e..55ec45a 100644 --- a/test/test_parser2_1.cpp +++ b/test/test_parser2_1.cpp @@ -2,7 +2,7 @@ #include "test_parser2.hpp" TEST_CASE("parser test various cases version 2 segment 1") { -#ifdef CMAKE_GITHUB_CI_ +#ifdef CMAKE_GITHUB_CI using escape = ss::escape<'\\'>; test_option_combinations3<>(); diff --git a/test/test_parser2_2.cpp b/test/test_parser2_2.cpp index 761e2e7..ec76aa9 100644 --- a/test/test_parser2_2.cpp +++ b/test/test_parser2_2.cpp @@ -2,7 +2,7 @@ #include "test_parser2.hpp" TEST_CASE("parser test various cases version 2 segment 2") { -#ifdef CMAKE_GITHUB_CI_ +#ifdef CMAKE_GITHUB_CI using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; diff --git a/test/test_parser2_3.cpp b/test/test_parser2_3.cpp index a0b7711..aab905c 100644 --- a/test/test_parser2_3.cpp +++ b/test/test_parser2_3.cpp @@ -2,7 +2,7 @@ #include "test_parser2.hpp" TEST_CASE("parser test various cases version 2 segment 3") { -#ifdef CMAKE_GITHUB_CI_ +#ifdef CMAKE_GITHUB_CI using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; using multiline = ss::multiline; diff --git a/test/test_parser2_4.cpp b/test/test_parser2_4.cpp index db0581f..ea78aaf 100644 --- a/test/test_parser2_4.cpp +++ b/test/test_parser2_4.cpp @@ -2,7 +2,7 @@ #include "test_parser2.hpp" TEST_CASE("parser test various cases version 2 segment 4") { -#ifdef CMAKE_GITHUB_CI_ +#ifdef CMAKE_GITHUB_CI using quote = ss::quote<'"'>; using escape = ss::escape<'\\'>; using multiline = ss::multiline; From 3ea8adedfdbbf7d27df0422bae2d1c00808521df Mon Sep 17 00:00:00 2001 From: ado Date: Thu, 22 Feb 2024 00:30:05 +0100 Subject: [PATCH 36/57] Add extended tests with no new line at eof --- include/ss/common.hpp | 4 ++-- include/ss/parser.hpp | 5 ++++- ssp.hpp | 37 ++++++++++++++++++++++--------------- test/test_parser2.hpp | 38 +++++++++++++++----------------------- 4 files changed, 43 insertions(+), 41 deletions(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 095561c..0a4f934 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -12,7 +12,7 @@ using string_range = std::pair; using split_data = std::vector; constexpr inline auto default_delimiter = ","; -constexpr static auto get_line_initial_buffer_size = 128; +constexpr inline auto get_line_initial_buffer_size = 128; template inline void assert_string_error_defined() { @@ -31,7 +31,7 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { return getline(lineptr, n, stream); } #else -// TODO set ERRNO ? + using ssize_t = int64_t; inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { size_t pos; diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index b715e75..e11969e 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -5,6 +5,7 @@ #include "exception.hpp" #include "extract.hpp" #include "restrictions.hpp" +#include #include #include #include @@ -742,7 +743,6 @@ private: reader(const reader& other) = delete; reader& operator=(const reader& other) = delete; - // TODO set error numbers on error ssize_t get_line_buffer(char** lineptr, size_t* n, const char* const buffer, size_t csv_data_size, size_t& curr_char) { @@ -812,6 +812,9 @@ private: } if (ssize == -1) { + if (errno == ENOMEM) { + throw std::bad_alloc{}; + } return false; } diff --git a/ssp.hpp b/ssp.hpp index 9b555ec..72e4c2f 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -625,7 +626,7 @@ using string_range = std::pair; using split_data = std::vector; constexpr inline auto default_delimiter = ","; -constexpr static auto get_line_initial_buffer_size = 128; +constexpr inline auto get_line_initial_buffer_size = 128; template inline void assert_string_error_defined() { @@ -644,6 +645,7 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { return getline(lineptr, n, stream); } #else + using ssize_t = int64_t; inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { size_t pos; @@ -2495,20 +2497,22 @@ public: template void try_convert_and_invoke(std::optional& value, Fun&& fun) { - if (!parser_.valid()) { - auto tuple_output = try_same(); - if (!parser_.valid()) { - return; - } - - if constexpr (!std::is_same_v) { - value = to_object(std::move(tuple_output)); - } else { - value = std::move(tuple_output); - } - - parser_.try_invoke(*value, std::forward(fun)); + if (parser_.valid()) { + return; } + + auto tuple_output = try_same(); + if (!parser_.valid()) { + return; + } + + if constexpr (!std::is_same_v) { + value = to_object(std::move(tuple_output)); + } else { + value = std::move(tuple_output); + } + + parser_.try_invoke(*value, std::forward(fun)); } template @@ -2936,6 +2940,9 @@ private: } if (ssize == -1) { + if (errno == ENOMEM) { + throw std::bad_alloc{}; + } return false; } @@ -3067,7 +3074,7 @@ private: buffer_size = first_size + second_size + 3; auto new_first = static_cast( realloc(static_cast(first), buffer_size)); - if (!first) { + if (!new_first) { throw std::bad_alloc{}; } diff --git a/test/test_parser2.hpp b/test/test_parser2.hpp index 1db140d..9d29f2a 100644 --- a/test/test_parser2.hpp +++ b/test/test_parser2.hpp @@ -258,20 +258,22 @@ std::vector generate_csv_data(const std::vector& data, [[maybe_unused]] void write_to_file(const std::vector& data, const std::string& delim, - const std::string& file_name) { + const std::string& file_name, + bool add_new_line = true) { std::ofstream out{file_name, std::ios_base::app}; std::string line; - // TODO remove new line at eof randomly for (size_t i = 0; i < data.size(); ++i) { line += data[i]; if (i != data.size() - 1) { line += delim; } - } - out << line << std::endl; + out << line; + if (add_new_line) { + out << std::endl; + } } #define CHECK_EQ_CRLF(V1, V2) \ @@ -333,7 +335,11 @@ void test_data_combinations(const std::vector& input_data, if (include_header) { auto header_data = generate_csv_data(field_header, delim); - write_to_file(header_data, delim, f.name); + if (input_data.size() == 0 && rand() % 10 == 0) { + write_to_file(header_data, delim, f.name, false); + } else { + write_to_file(header_data, delim, f.name); + } } std::vector layout; @@ -358,15 +364,12 @@ void test_data_combinations(const std::vector& input_data, expected_data.push_back(raw_data); auto data = generate_csv_data(raw_data, delim); - write_to_file(data, delim, f.name); - /* - std::cout << "[."; - for (const auto& el : data) { - std::cout << el << '.'; + if (i + 1 == n && rand() % 10 == 0) { + write_to_file(data, delim, f.name, false); + } else { + write_to_file(data, delim, f.name); } - std::cout << "]" << std::endl; - */ } auto layout_combinations = include_header && !setup::ignore_header @@ -430,7 +433,6 @@ void test_data_combinations(const std::vector& input_data, auto s0 = p.template get_next(); if (i < n) { check_error(); - // std::cout << s0 << std::endl; CHECK_EQ_CRLF(s0, expected_data[i][layout[0]].value); } else { CHECK(p.eof()); @@ -443,7 +445,6 @@ void test_data_combinations(const std::vector& input_data, p.template get_next(); if (i < n) { check_error(); - // std::cout << s0 << ' ' << s1 << std::endl; CHECK_EQ_CRLF(s0, expected_data[i][layout[0]].value); CHECK_EQ_CRLF(s1, expected_data[i][layout[1]].value); } else { @@ -458,8 +459,6 @@ void test_data_combinations(const std::vector& input_data, std::string>(); if (i < n) { check_error(); - // std::cout << s0 << ' ' << s1 << ' ' << s2 << - // std::endl; CHECK_EQ_CRLF(s0, expected_data[i][layout[0]].value); CHECK_EQ_CRLF(s1, expected_data[i][layout[1]].value); CHECK_EQ_CRLF(s2, expected_data[i][layout[2]].value); @@ -475,10 +474,6 @@ void test_data_combinations(const std::vector& input_data, std::string, std::string>(); if (i < n) { check_error(); - /* - std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3 - << std::endl; - */ CHECK_EQ_CRLF(s0, expected_data[i][layout[0]].value); CHECK_EQ_CRLF(s1, expected_data[i][layout[1]].value); CHECK_EQ_CRLF(s2, expected_data[i][layout[2]].value); @@ -496,9 +491,6 @@ void test_data_combinations(const std::vector& input_data, std::string>(); if (i < n) { check_error(); - // std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << - // s3 - // << ' ' << s4 << std::endl; CHECK_EQ_CRLF(s0, expected_data[i][layout[0]].value); CHECK_EQ_CRLF(s1, expected_data[i][layout[1]].value); CHECK_EQ_CRLF(s2, expected_data[i][layout[2]].value); From 8881649aca9d5d7efca9730dc8ed1098cd3c69d5 Mon Sep 17 00:00:00 2001 From: ado Date: Fri, 23 Feb 2024 23:25:03 +0100 Subject: [PATCH 37/57] Update get_line_buffer, update new version of get_line_file to work with data that has no new line at eof --- include/ss/common.hpp | 9 ++++--- include/ss/parser.hpp | 54 +++++++++++++++++++++-------------------- test/test_parser1_1.cpp | 13 ++++++++++ 3 files changed, 46 insertions(+), 30 deletions(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 95953c6..4493cc3 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -58,14 +58,15 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { (*lineptr)[0] = '\0'; + size_t line_used = 0; while (fgets(buff, sizeof(buff), fp) != nullptr) { - size_t line_used = strlen(*lineptr); + line_used = strlen(*lineptr); size_t buff_used = strlen(buff); - if (*n < buff_used + line_used) { + if (*n <= buff_used + line_used) { size_t new_n = *n * 2; - auto new_lineptr = static_cast(realloc(*lineptr, *n)); + auto new_lineptr = static_cast(realloc(*lineptr, new_n)); if (new_lineptr == nullptr) { errno = ENOMEM; return -1; @@ -84,7 +85,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { } } - return -1; + return (line_used != 0) ? line_used : -1; } #endif diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index e11969e..c19b834 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -744,50 +744,52 @@ private: reader& operator=(const reader& other) = delete; ssize_t get_line_buffer(char** lineptr, size_t* n, - const char* const buffer, size_t csv_data_size, - size_t& curr_char) { - size_t pos; - int c; + const char* const csv_data_buffer, + size_t csv_data_size, size_t& curr_char) { + if (lineptr == nullptr || n == nullptr || + csv_data_buffer == nullptr) { + errno = EINVAL; + return -1; + } if (curr_char >= csv_data_size) { return -1; } - c = buffer[curr_char++]; - if (*lineptr == nullptr) { - *lineptr = - static_cast(malloc(get_line_initial_buffer_size)); - if (*lineptr == nullptr) { + if (*lineptr == nullptr || *n < get_line_initial_buffer_size) { + auto new_lineptr = static_cast( + realloc(*lineptr, get_line_initial_buffer_size)); + if (new_lineptr == nullptr) { return -1; } - *n = 128; + *lineptr = new_lineptr; + *n = get_line_initial_buffer_size; } - pos = 0; + size_t line_used = 0; while (curr_char <= csv_data_size) { - if (pos + 1 >= *n) { - size_t new_size = *n + (*n >> 2); - if (new_size < get_line_initial_buffer_size) { - new_size = get_line_initial_buffer_size; - } - char* new_ptr = static_cast( - realloc(static_cast(*lineptr), new_size)); - if (new_ptr == nullptr) { + if (line_used + 1 >= *n) { + size_t new_n = *n * 2; + + char* new_lineptr = + static_cast(realloc(*lineptr, new_n)); + if (new_lineptr == nullptr) { + errno = ENOMEM; return -1; } - *n = new_size; - *lineptr = new_ptr; + *n = new_n; + *lineptr = new_lineptr; } - (*lineptr)[pos++] = c; + auto c = csv_data_buffer[curr_char++]; + (*lineptr)[line_used++] = c; if (c == '\n') { - break; + (*lineptr)[line_used] = '\0'; + return line_used; } - c = buffer[curr_char++]; } - (*lineptr)[pos] = '\0'; - return pos; + return (line_used != 0) ? line_used : -1; } // read next line each time in order to set eof_ diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp index 4c8d476..47adbd2 100644 --- a/test/test_parser1_1.cpp +++ b/test/test_parser1_1.cpp @@ -571,9 +571,22 @@ template void test_no_new_line_at_eof() { test_no_new_line_at_eof_impl({}); test_no_new_line_at_eof_impl({{1, 2, "X"}}); + test_no_new_line_at_eof_impl({{1, 2, "X"}, {}}); test_no_new_line_at_eof_impl({{1, 2, "X"}, {3, 4, "YY"}}); + test_no_new_line_at_eof_impl({{1, 2, "X"}, {3, 4, "YY"}, {}}); test_no_new_line_at_eof_impl( {{1, 2, "X"}, {3, 4, "YY"}, {5, 6, "ZZZ"}, {7, 8, "UUU"}}); + + for (size_t i = 0; i < 2 * ss::get_line_initial_buffer_size; ++i) { + test_no_new_line_at_eof_impl( + {{1, 2, std::string(i, 'X')}}); + + for (size_t j = 0; j < 2 * ss::get_line_initial_buffer_size; ++j) { + + test_no_new_line_at_eof_impl( + {{1, 2, std::string(i, 'X')}, {3, 4, std::string(j, 'Y')}}); + } + } } TEST_CASE("test no new line at end of data") { From 21b543ea4fa626d2fd18576b5132af148711dd37 Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 24 Feb 2024 13:55:31 +0100 Subject: [PATCH 38/57] [skip ci] Disable buffer mode no new line at end of data test --- test/test_parser1_1.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp index 47adbd2..4036a7e 100644 --- a/test/test_parser1_1.cpp +++ b/test/test_parser1_1.cpp @@ -591,5 +591,5 @@ void test_no_new_line_at_eof() { TEST_CASE("test no new line at end of data") { test_no_new_line_at_eof(); - test_no_new_line_at_eof(); + // test_no_new_line_at_eof(); } From c5e491041d829db467b5a1b6fac76e0517ca3d7b Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 24 Feb 2024 14:17:23 +0100 Subject: [PATCH 39/57] [skip ci] Disable file mode for no new line at end of data test --- test/test_parser1_1.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp index 4036a7e..d70b0d9 100644 --- a/test/test_parser1_1.cpp +++ b/test/test_parser1_1.cpp @@ -590,6 +590,6 @@ void test_no_new_line_at_eof() { } TEST_CASE("test no new line at end of data") { - test_no_new_line_at_eof(); - // test_no_new_line_at_eof(); + // test_no_new_line_at_eof(); + test_no_new_line_at_eof(); } From c6f6ba9821c0f58b4949ac265e23344c582a9a4b Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 24 Feb 2024 14:37:26 +0100 Subject: [PATCH 40/57] [skip ci] Reduce number of runs for no new line at end of data test --- test/test_parser1_1.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp index d70b0d9..401d171 100644 --- a/test/test_parser1_1.cpp +++ b/test/test_parser1_1.cpp @@ -581,15 +581,18 @@ void test_no_new_line_at_eof() { test_no_new_line_at_eof_impl( {{1, 2, std::string(i, 'X')}}); - for (size_t j = 0; j < 2 * ss::get_line_initial_buffer_size; ++j) { + for (size_t j = 0; j < 2 * ss::get_line_initial_buffer_size; j += 13) { test_no_new_line_at_eof_impl( {{1, 2, std::string(i, 'X')}, {3, 4, std::string(j, 'Y')}}); + + test_no_new_line_at_eof_impl( + {{1, 2, std::string(j, 'X')}, {3, 4, std::string(i, 'Y')}}); } } } TEST_CASE("test no new line at end of data") { - // test_no_new_line_at_eof(); + test_no_new_line_at_eof(); test_no_new_line_at_eof(); } From 383de57f9a226f2d392c0f9f9c85babc2c4a0646 Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 24 Feb 2024 19:09:41 +0100 Subject: [PATCH 41/57] Make ssize_t equal to intptr_t for non-POSIX environments --- include/ss/common.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 4493cc3..1b19e3f 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -34,7 +34,7 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { } #else -using ssize_t = int64_t; +using ssize_t = intptr_t; ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (lineptr == nullptr || n == nullptr || fp == nullptr) { From 88e711a5f74214a44fdcc8de03efe590449c025e Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 25 Feb 2024 02:06:48 +0100 Subject: [PATCH 42/57] Add positions method to parser, write unit tests for it, update other parser tests --- include/ss/parser.hpp | 10 ++ include/ss/setup.hpp | 8 +- ssp.hpp | 83 ++++++++------- test/test_helpers.hpp | 61 ++++++++--- test/test_parser1_1.cpp | 201 +++++++++++++++++++++++++----------- test/test_parser1_2.cpp | 145 +++++++++----------------- test/test_parser1_3.cpp | 155 +++++++++++++--------------- test/test_parser1_4.cpp | 220 +++++++++++++++++----------------------- test/test_parser2.hpp | 2 +- 9 files changed, 460 insertions(+), 425 deletions(-) diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index c19b834..50f61b0 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -106,6 +106,10 @@ public: : reader_.line_number_; } + size_t position() const { + return reader_.chars_read_; + } + template no_void_validator_tup_t get_next() { std::optional error; @@ -694,6 +698,7 @@ private: csv_data_size_{other.csv_data_size_}, curr_char_{other.curr_char_}, crlf_{other.crlf_}, line_number_{other.line_number_}, + chars_read_{other.chars_read_}, next_line_size_{other.next_line_size_} { other.buffer_ = nullptr; other.next_line_buffer_ = nullptr; @@ -718,12 +723,14 @@ private: curr_char_ = other.curr_char_; crlf_ = other.crlf_; line_number_ = other.line_number_; + chars_read_ = other.chars_read_; next_line_size_ = other.next_line_size_; other.buffer_ = nullptr; other.next_line_buffer_ = nullptr; other.helper_buffer_ = nullptr; other.file_ = nullptr; + other.csv_data_buffer_ = nullptr; } return *this; @@ -803,9 +810,11 @@ private: next_line_buffer_[0] = '\0'; } + chars_read_ = curr_char_; if (file_) { ssize = get_line_file(&next_line_buffer_, &next_line_buffer_size_, file_); + curr_char_ = ftell(file_); } else { ssize = get_line_buffer(&next_line_buffer_, &next_line_buffer_size_, @@ -1009,6 +1018,7 @@ private: bool crlf_{false}; size_t line_number_{0}; + size_t chars_read_{0}; size_t next_line_size_{0}; }; diff --git a/include/ss/setup.hpp b/include/ss/setup.hpp index 80cf4d5..2f298fc 100644 --- a/include/ss/setup.hpp +++ b/include/ss/setup.hpp @@ -165,25 +165,25 @@ using get_multiline_t = typename get_multiline::type; // string_error //////////////// -class string_error; +class string_error {}; //////////////// // ignore_header //////////////// -class ignore_header; +class ignore_header {}; //////////////// // ignore_empty //////////////// -class ignore_empty; +class ignore_empty {}; //////////////// // throw_on_error //////////////// -class throw_on_error; +class throw_on_error {}; //////////////// // setup implementation diff --git a/ssp.hpp b/ssp.hpp index c6e6620..4e7bd02 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -646,7 +646,7 @@ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { } #else -using ssize_t = int64_t; +using ssize_t = intptr_t; ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (lineptr == nullptr || n == nullptr || fp == nullptr) { @@ -670,14 +670,15 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { (*lineptr)[0] = '\0'; + size_t line_used = 0; while (fgets(buff, sizeof(buff), fp) != nullptr) { - size_t line_used = strlen(*lineptr); + line_used = strlen(*lineptr); size_t buff_used = strlen(buff); - if (*n < buff_used + line_used) { + if (*n <= buff_used + line_used) { size_t new_n = *n * 2; - auto new_lineptr = static_cast(realloc(*lineptr, *n)); + auto new_lineptr = static_cast(realloc(*lineptr, new_n)); if (new_lineptr == nullptr) { errno = ENOMEM; return -1; @@ -696,7 +697,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { } } - return -1; + return (line_used != 0) ? line_used : -1; } #endif @@ -866,25 +867,25 @@ using get_multiline_t = typename get_multiline::type; // string_error //////////////// -class string_error; +class string_error {}; //////////////// // ignore_header //////////////// -class ignore_header; +class ignore_header {}; //////////////// // ignore_empty //////////////// -class ignore_empty; +class ignore_empty {}; //////////////// // throw_on_error //////////////// -class throw_on_error; +class throw_on_error {}; //////////////// // setup implementation @@ -2238,6 +2239,10 @@ public: : reader_.line_number_; } + size_t position() const { + return reader_.chars_read_; + } + template no_void_validator_tup_t get_next() { std::optional error; @@ -2826,6 +2831,7 @@ private: csv_data_size_{other.csv_data_size_}, curr_char_{other.curr_char_}, crlf_{other.crlf_}, line_number_{other.line_number_}, + chars_read_{other.chars_read_}, next_line_size_{other.next_line_size_} { other.buffer_ = nullptr; other.next_line_buffer_ = nullptr; @@ -2850,12 +2856,14 @@ private: curr_char_ = other.curr_char_; crlf_ = other.crlf_; line_number_ = other.line_number_; + chars_read_ = other.chars_read_; next_line_size_ = other.next_line_size_; other.buffer_ = nullptr; other.next_line_buffer_ = nullptr; other.helper_buffer_ = nullptr; other.file_ = nullptr; + other.csv_data_buffer_ = nullptr; } return *this; @@ -2876,50 +2884,52 @@ private: reader& operator=(const reader& other) = delete; ssize_t get_line_buffer(char** lineptr, size_t* n, - const char* const buffer, size_t csv_data_size, - size_t& curr_char) { - size_t pos; - int c; + const char* const csv_data_buffer, + size_t csv_data_size, size_t& curr_char) { + if (lineptr == nullptr || n == nullptr || + csv_data_buffer == nullptr) { + errno = EINVAL; + return -1; + } if (curr_char >= csv_data_size) { return -1; } - c = buffer[curr_char++]; - if (*lineptr == nullptr) { - *lineptr = - static_cast(malloc(get_line_initial_buffer_size)); - if (*lineptr == nullptr) { + if (*lineptr == nullptr || *n < get_line_initial_buffer_size) { + auto new_lineptr = static_cast( + realloc(*lineptr, get_line_initial_buffer_size)); + if (new_lineptr == nullptr) { return -1; } - *n = 128; + *lineptr = new_lineptr; + *n = get_line_initial_buffer_size; } - pos = 0; + size_t line_used = 0; while (curr_char <= csv_data_size) { - if (pos + 1 >= *n) { - size_t new_size = *n + (*n >> 2); - if (new_size < get_line_initial_buffer_size) { - new_size = get_line_initial_buffer_size; - } - char* new_ptr = static_cast( - realloc(static_cast(*lineptr), new_size)); - if (new_ptr == nullptr) { + if (line_used + 1 >= *n) { + size_t new_n = *n * 2; + + char* new_lineptr = + static_cast(realloc(*lineptr, new_n)); + if (new_lineptr == nullptr) { + errno = ENOMEM; return -1; } - *n = new_size; - *lineptr = new_ptr; + *n = new_n; + *lineptr = new_lineptr; } - (*lineptr)[pos++] = c; + auto c = csv_data_buffer[curr_char++]; + (*lineptr)[line_used++] = c; if (c == '\n') { - break; + (*lineptr)[line_used] = '\0'; + return line_used; } - c = buffer[curr_char++]; } - (*lineptr)[pos] = '\0'; - return pos; + return (line_used != 0) ? line_used : -1; } // read next line each time in order to set eof_ @@ -2933,9 +2943,11 @@ private: next_line_buffer_[0] = '\0'; } + chars_read_ = curr_char_; if (file_) { ssize = get_line_file(&next_line_buffer_, &next_line_buffer_size_, file_); + curr_char_ = ftell(file_); } else { ssize = get_line_buffer(&next_line_buffer_, &next_line_buffer_size_, @@ -3139,6 +3151,7 @@ private: bool crlf_{false}; size_t line_number_{0}; + size_t chars_read_{0}; size_t next_line_size_{0}; }; diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index 8df69d9..89477c8 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -1,12 +1,14 @@ #pragma once +#include #include #include +#include #include +#include +#include #include #include #include -#include -#include #ifdef CMAKE_GITHUB_CI #include @@ -20,6 +22,24 @@ class parser; } /* ss */ namespace { + +struct bool_error {}; + +template +struct config { + using BufferMode = T; + using ErrorMode = U; + + constexpr static auto ThrowOnError = std::is_same_v; + constexpr static auto StringError = std::is_same_v; +}; + +#define ParserOptionCombinations \ + config, config, \ + config, config, \ + config, \ + config + struct buffer { std::string data_; @@ -172,23 +192,32 @@ template } template -[[maybe_unused]] std::tuple, std::string> make_parser( - const std::string& file_name, const std::string& delim = "") { +std::tuple, std::string> make_parser_impl( + const std::string& file_name, std::string delim = ss::default_delimiter) { if (buffer_mode) { auto buffer = make_buffer(file_name); - if (delim.empty()) { - return {ss::parser{buffer.data(), buffer.size()}, - std::move(buffer)}; - } else { - return {ss::parser{buffer.data(), buffer.size(), delim}, - std::move(buffer)}; - } + return {ss::parser{buffer.data(), buffer.size(), delim}, + std::move(buffer)}; } else { - if (delim.empty()) { - return {ss::parser{file_name}, std::string{}}; - } else { - return {ss::parser{file_name, delim}, std::string{}}; - } + return {ss::parser{file_name, delim}, std::string{}}; } } + +template +[[maybe_unused]] std::enable_if_t< + !std::is_same_v, + std::tuple, std::string>> +make_parser(const std::string& file_name, + std::string delim = ss::default_delimiter) { + return make_parser_impl(file_name, delim); +} + +template +[[maybe_unused]] std::enable_if_t, + std::tuple, std::string>> +make_parser(const std::string& file_name, + std::string delim = ss::default_delimiter) { + return make_parser_impl(file_name, delim); +} + } /* namespace */ diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp index 401d171..681877f 100644 --- a/test/test_parser1_1.cpp +++ b/test/test_parser1_1.cpp @@ -1,7 +1,7 @@ #include "test_parser1.hpp" TEST_CASE("test file not found") { - unique_file_name f{"test_parser"}; + unique_file_name f{"file_not_found"}; { ss::parser p{f.name, ","}; @@ -11,6 +11,7 @@ TEST_CASE("test file not found") { { ss::parser p{f.name, ","}; CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); } try { @@ -30,6 +31,7 @@ TEST_CASE("test null buffer") { { ss::parser p{nullptr, 10, ","}; CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); } try { @@ -40,20 +42,105 @@ TEST_CASE("test null buffer") { } } -template -void test_various_cases() { +struct Y { + constexpr static auto delim = ","; + std::string s1; + std::string s2; + std::string s3; + + std::string to_string() const { + return std::string{} + .append(s1) + .append(delim) + .append(s2) + .append(delim) + .append(s3); + } + + auto tied() const { + return std::tie(s1, s2, s3); + } +}; + +TEST_CASE_TEMPLATE("test position method", T, ParserOptionCombinations) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; + + unique_file_name f{"position_method"}; + std::vector data = {{"1", "21", "x"}, {"321", "4", "y"}, + {"54", "6", "zz"}, {"7", "876", "uuuu"}, + {"910", "10", "v"}, {"10", "321", "ww"}}; + make_and_write(f.name, data); + + auto [p, buff] = make_parser(f.name); + auto data_at = [&buff = buff, &f = f](auto n) { + if (!buff.empty()) { + return buff[n]; + } else { + auto file = fopen(f.name.c_str(), "r"); + fseek(file, n, SEEK_SET); + return static_cast(fgetc(file)); + } + }; + + while (!p.eof()) { + auto curr_char = p.position(); + const auto& [s1, s2, s3] = + p.template get_next(); + + auto s = s1 + "," + s2 + "," + s3; + + for (size_t i = 0; i < s1.size(); ++i) { + CHECK_EQ(data_at(curr_char + i), s[i]); + } + + auto last_char = data_at(curr_char + s.size()); + CHECK((last_char == '\n' || last_char == '\r')); + } +} + +// TODO uncomment +/* +TEST_CASE_TEMPLATE("test line method", BufferMode, std::true_type, + std::false_type) { unique_file_name f{"test_parser"}; + std::vector data = {{"1", "21", "x"}, {"321", "4", "y"}, + {"54", "6", "zz"}, {"7", "876", "uuuu"}, + {"910", "10", "v"}, {"10", "321", "ww"}}; + make_and_write(f.name, data); + + auto [p, buff] = make_parser(f.name); + + auto expected_line = 0; + CHECK_EQ(p.line(), expected_line); + + while (!p.eof()) { + auto _ = p.template get_next(); + ++expected_line; + CHECK_EQ(p.line(), expected_line); + } + + CHECK_EQ(p.line(), data.size()); +} +*/ + +TEST_CASE_TEMPLATE("parser test various valid cases", T, + ParserOptionCombinations) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; + + unique_file_name f{"various_valid_cases"}; std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; make_and_write(f.name, data); auto csv_data_buffer = make_buffer(f.name); { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); ss::parser p0{std::move(p)}; p = std::move(p0); std::vector i; - auto [p2, __] = make_parser(f.name, ","); + auto [p2, __] = make_parser(f.name, ","); std::vector i2; auto move_rotate = [&p = p, &p0 = p0] { @@ -77,13 +164,13 @@ void test_various_cases() { } { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; - auto [p2, __] = make_parser(f.name, ","); + auto [p2, __] = make_parser(f.name, ","); std::vector i2; - auto [p3, ___] = make_parser(f.name, ","); + auto [p3, ___] = make_parser(f.name, ","); std::vector i3; std::vector expected = {std::begin(data) + 1, std::end(data)}; @@ -112,9 +199,9 @@ void test_various_cases() { } { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; - auto [p2, __] = make_parser(f.name, ","); + auto [p2, __] = make_parser(f.name, ","); std::vector i2; while (!p.eof()) { @@ -131,7 +218,7 @@ void test_various_cases() { } { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; for (auto&& a : @@ -143,10 +230,10 @@ void test_various_cases() { } { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; - auto [p2, __] = make_parser(f.name, ","); + auto [p2, __] = make_parser(f.name, ","); std::vector i2; using tup = std::tuple; @@ -164,7 +251,7 @@ void test_various_cases() { } { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; using tup = std::tuple; @@ -176,7 +263,7 @@ void test_various_cases() { } { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; while (!p.eof()) { @@ -187,7 +274,7 @@ void test_various_cases() { } { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; for (auto&& a : p.template iterate()) { @@ -199,10 +286,10 @@ void test_various_cases() { { constexpr int excluded = 3; - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; - auto [p2, __] = make_parser(f.name, ","); + auto [p2, __] = make_parser(f.name, ","); std::vector i2; while (!p.eof()) { @@ -217,7 +304,7 @@ void test_various_cases() { }; } - if (!ss::setup::throw_on_error) { + if (!T::ThrowOnError) { for (auto&& a : p2.template iterate_object, double, std::string>()) { if (p2.valid()) { @@ -237,16 +324,16 @@ void test_various_cases() { [&](const X& x) { return x.i != excluded; }); CHECK_EQ(i, expected); - if (!ss::setup::throw_on_error) { + if (!T::ThrowOnError) { CHECK_EQ(i2, expected); } } { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; - auto [p2, __] = make_parser(f.name, ","); + auto [p2, __] = make_parser(f.name, ","); std::vector i2; while (!p.eof()) { @@ -261,7 +348,7 @@ void test_various_cases() { } } - if (!ss::setup::throw_on_error) { + if (!T::ThrowOnError) { for (auto&& a : p2.template iterate_object, double, std::string>()) { if (p2.valid()) { @@ -272,21 +359,21 @@ void test_various_cases() { std::vector expected = {{3, 4, "y"}}; CHECK_EQ(i, expected); - if (!ss::setup::throw_on_error) { + if (!T::ThrowOnError) { CHECK_EQ(i2, expected); } } { - unique_file_name empty_f{"test_parser"}; + unique_file_name empty_f{"various_valid_cases"}; std::vector empty_data = {}; make_and_write(empty_f.name, empty_data); - auto [p, _] = make_parser(empty_f.name, ","); + auto [p, _] = make_parser(empty_f.name, ","); std::vector i; - auto [p2, __] = make_parser(empty_f.name, ","); + auto [p2, __] = make_parser(empty_f.name, ","); std::vector i2; while (!p.eof()) { @@ -302,15 +389,6 @@ void test_various_cases() { } } -TEST_CASE("parser test various cases") { - test_various_cases(); - test_various_cases(); - test_various_cases(); - test_various_cases(); - test_various_cases(); - test_various_cases(); -} - using test_tuple = std::tuple; struct test_struct { int i; @@ -324,9 +402,10 @@ struct test_struct { static inline void expect_test_struct(const test_struct&) { } -template -void test_composite_conversion() { - unique_file_name f{"test_parser"}; +TEST_CASE_TEMPLATE("parser test composite conversion", BufferMode, + std::true_type, std::false_type) { + constexpr auto buffer_mode = BufferMode::value; + unique_file_name f{"composite_conversion"}; { std::ofstream out{f.name}; for (auto& i : @@ -336,7 +415,7 @@ void test_composite_conversion() { } } - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); auto fail = [] { FAIL(""); }; auto expect_error = [](auto error) { CHECK(!error.empty()); }; auto ignore_error = [] {}; @@ -546,18 +625,12 @@ void test_composite_conversion() { CHECK(p.eof()); } -// various scenarios -TEST_CASE("parser test composite conversion") { - test_composite_conversion(); - test_composite_conversion(); -} - -template +template void test_no_new_line_at_eof_impl(const std::vector& data) { - unique_file_name f{"test_parser"}; + unique_file_name f{"no_new_line_at_eof"}; make_and_write(f.name, data, {}, false); - auto [p, _] = make_parser(f.name); + auto [p, _] = make_parser(f.name); std::vector parsed_data; for (const auto& el : p.template iterate()) { @@ -567,32 +640,36 @@ void test_no_new_line_at_eof_impl(const std::vector& data) { CHECK_EQ(data, parsed_data); } -template +template void test_no_new_line_at_eof() { - test_no_new_line_at_eof_impl({}); - test_no_new_line_at_eof_impl({{1, 2, "X"}}); - test_no_new_line_at_eof_impl({{1, 2, "X"}, {}}); - test_no_new_line_at_eof_impl({{1, 2, "X"}, {3, 4, "YY"}}); - test_no_new_line_at_eof_impl({{1, 2, "X"}, {3, 4, "YY"}, {}}); - test_no_new_line_at_eof_impl( + test_no_new_line_at_eof_impl({}); + test_no_new_line_at_eof_impl({{1, 2, "X"}}); + test_no_new_line_at_eof_impl({{1, 2, "X"}, {}}); + test_no_new_line_at_eof_impl( + {{1, 2, "X"}, {3, 4, "YY"}}); + test_no_new_line_at_eof_impl( + {{1, 2, "X"}, {3, 4, "YY"}, {}}); + test_no_new_line_at_eof_impl( {{1, 2, "X"}, {3, 4, "YY"}, {5, 6, "ZZZ"}, {7, 8, "UUU"}}); for (size_t i = 0; i < 2 * ss::get_line_initial_buffer_size; ++i) { - test_no_new_line_at_eof_impl( + test_no_new_line_at_eof_impl( {{1, 2, std::string(i, 'X')}}); for (size_t j = 0; j < 2 * ss::get_line_initial_buffer_size; j += 13) { - test_no_new_line_at_eof_impl( + test_no_new_line_at_eof_impl( {{1, 2, std::string(i, 'X')}, {3, 4, std::string(j, 'Y')}}); - test_no_new_line_at_eof_impl( + test_no_new_line_at_eof_impl( {{1, 2, std::string(j, 'X')}, {3, 4, std::string(i, 'Y')}}); } } } -TEST_CASE("test no new line at end of data") { - test_no_new_line_at_eof(); - test_no_new_line_at_eof(); +TEST_CASE_TEMPLATE("test no new line at end of data", T, + ParserOptionCombinations) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; + test_no_new_line_at_eof(); } diff --git a/test/test_parser1_2.cpp b/test/test_parser1_2.cpp index 6ca7a0b..eaf9516 100644 --- a/test/test_parser1_2.cpp +++ b/test/test_parser1_2.cpp @@ -41,11 +41,16 @@ struct xyz { } }; -template -void test_moving_of_parsed_composite_values() { +TEST_CASE_TEMPLATE("test moving of parsed composite values", T, + config, config, + config, + config) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; + // to compile is enough return; - auto [p, _] = make_parser("", ""); + auto [p, _] = make_parser("", ""); p.template try_next() .template or_else( [](auto&&) {}) @@ -56,70 +61,41 @@ void test_moving_of_parsed_composite_values() { [](auto&, auto&, auto&) {}); } -TEST_CASE("parser test the moving of parsed composite values") { - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); -} - -TEST_CASE("parser test error mode") { - unique_file_name f{"test_parser"}; +TEST_CASE_TEMPLATE("parser test string error mode", BufferMode, std::true_type, + std::false_type) { + unique_file_name f{"string_error"}; { std::ofstream out{f.name}; out << "junk" << std::endl; out << "junk" << std::endl; } - { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); - REQUIRE_FALSE(p.eof()); - p.get_next(); - CHECK_FALSE(p.valid()); - CHECK_FALSE(p.error_msg().empty()); - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - p.get_next(); - CHECK_FALSE(p.valid()); - CHECK_FALSE(p.error_msg().empty()); - } + REQUIRE_FALSE(p.eof()); + p.template get_next(); + CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); } -TEST_CASE("parser throw on error mode") { - unique_file_name f{"test_parser"}; +TEST_CASE_TEMPLATE("parser throw on error mode", BufferMode, std::true_type, + std::false_type) { + unique_file_name f{"throw_on_error"}; { std::ofstream out{f.name}; out << "junk" << std::endl; out << "junk" << std::endl; } - { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = + make_parser(f.name, ","); - REQUIRE_FALSE(p.eof()); - try { - p.get_next(); - FAIL("Expected exception..."); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - try { - p.get_next(); - FAIL("Expected exception..."); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } + REQUIRE_FALSE(p.eof()); + try { + p.template get_next(); + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); } } @@ -130,9 +106,11 @@ static inline std::string no_quote(const std::string& s) { return s; } -template -void test_quote_multiline() { - unique_file_name f{"test_parser"}; +TEST_CASE_TEMPLATE("test quote multiline", T, ParserOptionCombinations) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; + + unique_file_name f{"quote_multiline"}; std::vector data = {{1, 2, "\"x\r\nx\nx\""}, {3, 4, "\"y\ny\r\ny\""}, {5, 6, "\"z\nz\""}, @@ -151,9 +129,8 @@ void test_quote_multiline() { } } - auto [p, _] = - make_parser, Ts...>(f.name, - ","); + auto [p, _] = make_parser>(f.name, ","); std::vector i; @@ -168,7 +145,7 @@ void test_quote_multiline() { CHECK_EQ(i, data); auto [p_no_multiline, __] = - make_parser, Ts...>(f.name, ","); + make_parser>(f.name, ","); while (!p.eof()) { auto command = [&p_no_multiline = p_no_multiline] { p_no_multiline.template get_next(); @@ -177,23 +154,16 @@ void test_quote_multiline() { } } -TEST_CASE("parser test csv on multiple lines with quotes") { - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); -} - static inline std::string no_escape(std::string& s) { s.erase(std::remove(begin(s), end(s), '\\'), end(s)); return s; } -template -void test_escape_multiline() { - unique_file_name f{"test_parser"}; +TEST_CASE_TEMPLATE("test escape multiline", T, ParserOptionCombinations) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; + + unique_file_name f{"escape_multiline"}; std::vector data = {{1, 2, "x\\\nx\\\r\nx"}, {5, 6, "z\\\nz\\\nz"}, {7, 8, "u"}, @@ -212,9 +182,8 @@ void test_escape_multiline() { } } - auto [p, _] = - make_parser, Ts...>(f.name, - ","); + auto [p, _] = make_parser>(f.name, ","); std::vector i; while (!p.eof()) { @@ -228,7 +197,7 @@ void test_escape_multiline() { CHECK_EQ(i, data); auto [p_no_multiline, __] = - make_parser, Ts...>(f.name, ","); + make_parser>(f.name, ","); while (!p.eof()) { auto command = [&p_no_multiline = p_no_multiline] { auto a = @@ -238,18 +207,11 @@ void test_escape_multiline() { } } -TEST_CASE("parser test csv on multiple lines with escapes") { - test_escape_multiline(); - test_escape_multiline(); - test_escape_multiline(); - test_escape_multiline(); - test_escape_multiline(); - test_escape_multiline(); -} +TEST_CASE_TEMPLATE("test quote escape multiline", T, ParserOptionCombinations) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; -template -void test_quote_escape_multiline() { - unique_file_name f{"test_parser"}; + unique_file_name f{"quote_escape_multiline"}; { std::ofstream out{f.name}; out << "1,2,\"just\\\n\nstrings\"" << std::endl; @@ -266,8 +228,8 @@ void test_quote_escape_multiline() { size_t bad_lines = 1; auto num_errors = 0; - auto [p, _] = make_parser, - ss::quote<'"'>, Ts...>(f.name); + auto [p, _] = make_parser, ss::quote<'"'>>(f.name); std::vector i; while (!p.eof()) { @@ -298,12 +260,3 @@ void test_quote_escape_multiline() { } CHECK_EQ(i, data); } - -TEST_CASE("parser test csv on multiple lines with quotes and escapes") { - test_quote_escape_multiline(); - test_quote_escape_multiline(); - test_quote_escape_multiline(); - test_quote_escape_multiline(); - test_quote_escape_multiline(); - test_quote_escape_multiline(); -} diff --git a/test/test_parser1_3.cpp b/test/test_parser1_3.cpp index 612d17d..07147b6 100644 --- a/test/test_parser1_3.cpp +++ b/test/test_parser1_3.cpp @@ -1,8 +1,10 @@ #include "test_parser1.hpp" -template -void test_multiline_restricted() { - unique_file_name f{"test_parser"}; +TEST_CASE_TEMPLATE("test multiline restricted", T, ParserOptionCombinations) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; + + unique_file_name f{"multiline_restricted"}; { std::ofstream out{f.name}; out << "1,2,\"just\n\nstrings\"" << std::endl; @@ -24,8 +26,8 @@ void test_multiline_restricted() { auto num_errors = 0; auto [p, _] = - make_parser, ss::quote<'"'>, - ss::escape<'\\'>, Ts...>(f.name, ","); + make_parser, + ss::quote<'"'>, ss::escape<'\\'>>(f.name, ","); std::vector i; while (!p.eof()) { @@ -63,26 +65,20 @@ void test_multiline_restricted() { CHECK_EQ(i, data); } -TEST_CASE("parser test multiline restricted") { - test_multiline_restricted(); - test_multiline_restricted(); - test_multiline_restricted(); - test_multiline_restricted(); - test_multiline_restricted(); - test_multiline_restricted(); -} +template +void test_unterminated_line(const std::vector& lines, + size_t bad_line) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; -template -void test_unterminated_line_impl(const std::vector& lines, - size_t bad_line) { - unique_file_name f{"test_parser"}; + unique_file_name f{"unterminated_line"}; std::ofstream out{f.name}; for (const auto& line : lines) { out << line << std::endl; } out.close(); - auto [p, _] = make_parser(f.name); + auto [p, _] = make_parser(f.name); size_t line = 0; while (!p.eof()) { auto command = [&p = p] { @@ -100,21 +96,8 @@ void test_unterminated_line_impl(const std::vector& lines, } } -template -void test_unterminated_line(const std::vector& lines, - size_t bad_line) { - test_unterminated_line_impl(lines, bad_line); - test_unterminated_line_impl(lines, - bad_line); - test_unterminated_line_impl(lines, - bad_line); - test_unterminated_line_impl(lines, bad_line); - test_unterminated_line_impl(lines, bad_line); - test_unterminated_line_impl(lines, - bad_line); -} - -TEST_CASE("parser test csv on multiline with errors") { +TEST_CASE_TEMPLATE("parser test csv on multiline with errors", T, + ParserOptionCombinations) { using multiline = ss::multiline_restricted<3>; using escape = ss::escape<'\\'>; using quote = ss::quote<'"'>; @@ -122,209 +105,209 @@ TEST_CASE("parser test csv on multiline with errors") { // unterminated escape { const std::vector lines{"1,2,just\\"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"1,2,just\\", "9,8,second"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"9,8,first", "1,2,just\\"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,first", "1,2,just\\", "3,4,third"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,first", "1,2,just\\\nstrings\\", "3,4,th\\\nird"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,first", "3,4,second", "1,2,just\\"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); } { const std::vector lines{"9,8,\\first", "3,4,second", "1,2,jus\\t\\"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); } // unterminated quote { const std::vector lines{"1,2,\"just"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"1,2,\"just", "9,8,second"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"9,8,first", "1,2,\"just"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,first", "1,2,\"just", "3,4,th\\,ird"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,first", "3,4,second", "1,2,\"just"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); } { const std::vector lines{"9,8,\"first\"", "\"3\",4,\"sec,ond\"", "1,2,\"ju\"\"st"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); } // unterminated quote and escape { const std::vector lines{"1,2,\"just\\"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"1,2,\"just\\\n\\"}; - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"1,2,\"just\n\\"}; - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"9,8,first", "1,2,\"just\n\\"}; - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,first", "1,2,\"just\n\\", "4,3,thrid"}; - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,f\\\nirst", "1,2,\"just\n\\", "4,3,thrid"}; - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,\"f\ni\nrst\"", "1,2,\"just\n\\", "4,3,thrid"}; - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } // multiline limmit reached escape { const std::vector lines{"1,2,\\\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"9,8,first", "1,2,\\\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,fi\\\nrs\\\nt", "1,2,\\\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,first", "1,2,\\\n\\\n\\\n\\\njust", "4,3,third"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } // multiline limmit reached quote { const std::vector lines{"1,2,\"\n\n\n\n\njust\""}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"9,8,first", "1,2,\"\n\n\n\n\njust\""}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,\"fir\nst\"", "1,2,\"\n\n\n\n\njust\""}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } // multiline limmit reached quote and escape { const std::vector lines{"1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); } { const std::vector lines{"9,8,first", "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,fi\\\nrst", "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,\"fi\nrst\"", "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } { const std::vector lines{"9,8,\"fi\nr\\\nst\"", "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); } } diff --git a/test/test_parser1_4.cpp b/test/test_parser1_4.cpp index 82233c6..993e74b 100644 --- a/test/test_parser1_4.cpp +++ b/test/test_parser1_4.cpp @@ -7,13 +7,14 @@ template struct has_type> : std::disjunction...> {}; -template -static void test_fields_impl(const std::string file_name, - const std::vector& data, - const std::vector& fields) { +template +static void test_fields(const std::string file_name, const std::vector& data, + const std::vector& fields) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; using CaseType = std::tuple; - auto [p, _] = make_parser(file_name, ","); + auto [p, _] = make_parser(file_name, ","); CHECK_FALSE(p.field_exists("Unknown")); p.use_fields(fields); std::vector i; @@ -36,23 +37,9 @@ static void test_fields_impl(const std::string file_name, } } -template -static void test_fields(const std::string file_name, const std::vector& data, - const std::vector& fields) { - test_fields_impl, Ts...>(file_name, data, fields); - test_fields_impl, Ts...>(file_name, data, - fields); - test_fields_impl, Ts...>(file_name, - data, fields); - test_fields_impl, Ts...>(file_name, data, fields); - test_fields_impl, Ts...>(file_name, data, - fields); - test_fields_impl, Ts...>(file_name, - data, fields); -} - -TEST_CASE("parser test various cases with header") { - unique_file_name f{"test_parser"}; +TEST_CASE_TEMPLATE("test various cases with header", T, + ParserOptionCombinations) { + unique_file_name f{"various_cases_with_header"}; constexpr static auto Int = "Int"; constexpr static auto Dbl = "Double"; constexpr static auto Str = "String"; @@ -180,27 +167,30 @@ TEST_CASE("parser test various cases with header") { print(call) */ - test_fields(o, d, {Str}); - test_fields(o, d, {Int}); - test_fields(o, d, {Dbl}); - test_fields(o, d, {Str, Int}); - test_fields(o, d, {Str, Dbl}); - test_fields(o, d, {Int, Str}); - test_fields(o, d, {Int, Dbl}); - test_fields(o, d, {Dbl, Str}); - test_fields(o, d, {Dbl, Int}); - test_fields(o, d, {Str, Int, Dbl}); - test_fields(o, d, {Str, Dbl, Int}); - test_fields(o, d, {Int, Str, Dbl}); - test_fields(o, d, {Int, Dbl, Str}); - test_fields(o, d, {Dbl, Str, Int}); - test_fields(o, d, {Dbl, Int, Str}); + test_fields(o, d, {Str}); + test_fields(o, d, {Int}); + test_fields(o, d, {Dbl}); + test_fields(o, d, {Str, Int}); + test_fields(o, d, {Str, Dbl}); + test_fields(o, d, {Int, Str}); + test_fields(o, d, {Int, Dbl}); + test_fields(o, d, {Dbl, Str}); + test_fields(o, d, {Dbl, Int}); + test_fields(o, d, {Str, Int, Dbl}); + test_fields(o, d, {Str, Dbl, Int}); + test_fields(o, d, {Int, Str, Dbl}); + test_fields(o, d, {Int, Dbl, Str}); + test_fields(o, d, {Dbl, Str, Int}); + test_fields(o, d, {Dbl, Int, Str}); } -template -void test_invalid_fields_impl(const std::vector& lines, - const std::vector& fields) { - unique_file_name f{"test_parser"}; +template +void test_invalid_fields(const std::vector& lines, + const std::vector& fields) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; + + unique_file_name f{"invalid_fields"}; { std::ofstream out{f.name}; for (const auto& line : lines) { @@ -210,21 +200,21 @@ void test_invalid_fields_impl(const std::vector& lines, { // No fields specified - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); auto command = [&p = p] { p.use_fields(); }; expect_error_on_command(p, command); } { // Unknown field - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); auto command = [&p = p] { p.use_fields("Unknown"); }; expect_error_on_command(p, command); } { // Field used multiple times - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); auto command = [&p = p, &fields = fields] { p.use_fields(fields.at(0), fields.at(0)); }; @@ -235,7 +225,7 @@ void test_invalid_fields_impl(const std::vector& lines, { // Mapping out of range - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); auto command = [&p = p, &fields = fields] { p.use_fields(fields.at(0)); p.template get_next(); @@ -247,7 +237,7 @@ void test_invalid_fields_impl(const std::vector& lines, { // Invalid header - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); auto command = [&p = p, &fields = fields] { p.use_fields(fields); }; if (!fields.empty()) { @@ -259,7 +249,7 @@ void test_invalid_fields_impl(const std::vector& lines, command(); CHECK(p.valid()); if (!p.valid()) { - if constexpr (ss::setup::string_error) { + if constexpr (T::StringError) { std::cout << p.error_msg() << std::endl; } } @@ -268,44 +258,38 @@ void test_invalid_fields_impl(const std::vector& lines, } } -template -void test_invalid_fields(const std::vector& lines, - const std::vector& fields) { - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); +TEST_CASE_TEMPLATE("test invalid fheader fields usage", T, + ParserOptionCombinations) { + test_invalid_fields({}, {}); + + test_invalid_fields({"Int"}, {"Int"}); + test_invalid_fields({"Int", "1"}, {"Int"}); + test_invalid_fields({"Int", "1", "2"}, {"Int"}); + + test_invalid_fields({"Int,String"}, {"Int", "String"}); + test_invalid_fields({"Int,String", "1,hi"}, {"Int", "String"}); + test_invalid_fields({"Int,String", "2,hello"}, {"Int", "String"}); + + test_invalid_fields({"Int,String,Double"}, {"Int", "String", "Double"}); + test_invalid_fields({"Int,String,Double", "1,hi,2.34"}, + {"Int", "String", "Double"}); + test_invalid_fields({"Int,String,Double", "1,hi,2.34", "2,hello,3.45"}, + {"Int", "String", "Double"}); + + test_invalid_fields({"Int,Int,Int"}, {"Int", "Int", "Int"}); + test_invalid_fields({"Int,Int,Int", "1,2,3"}, {"Int", "Int", "Int"}); + + test_invalid_fields({"Int,String,Int"}, {"Int", "String", "Int"}); + test_invalid_fields({"Int,String,Int", "1,hi,3"}, + {"Int", "String", "Int"}); } -TEST_CASE("parser test invalid header fields usage") { - test_invalid_fields({}, {}); +TEST_CASE_TEMPLATE("test invalid rows with header", T, + ParserOptionCombinations) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; - test_invalid_fields({"Int"}, {"Int"}); - test_invalid_fields({"Int", "1"}, {"Int"}); - test_invalid_fields({"Int", "1", "2"}, {"Int"}); - - test_invalid_fields({"Int,String"}, {"Int", "String"}); - test_invalid_fields({"Int,String", "1,hi"}, {"Int", "String"}); - test_invalid_fields({"Int,String", "2,hello"}, {"Int", "String"}); - - test_invalid_fields({"Int,String,Double"}, {"Int", "String", "Double"}); - test_invalid_fields({"Int,String,Double", "1,hi,2.34"}, - {"Int", "String", "Double"}); - test_invalid_fields({"Int,String,Double", "1,hi,2.34", "2,hello,3.45"}, - {"Int", "String", "Double"}); - - test_invalid_fields({"Int,Int,Int"}, {"Int", "Int", "Int"}); - test_invalid_fields({"Int,Int,Int", "1,2,3"}, {"Int", "Int", "Int"}); - - test_invalid_fields({"Int,String,Int"}, {"Int", "String", "Int"}); - test_invalid_fields({"Int,String,Int", "1,hi,3"}, {"Int", "String", "Int"}); -} - -template -void test_invalid_rows_with_header() { - unique_file_name f{"test_parser"}; + unique_file_name f{"invalid rows with header"}; { std::ofstream out{f.name}; out << "Int,String,Double" << std::endl; @@ -318,7 +302,7 @@ void test_invalid_rows_with_header() { } { - auto [p, _] = make_parser(f.name); + auto [p, _] = make_parser(f.name); p.use_fields("Int", "String", "Double"); using data = std::tuple; @@ -344,7 +328,7 @@ void test_invalid_rows_with_header() { } { - auto [p, _] = make_parser(f.name); + auto [p, _] = make_parser(f.name); p.use_fields("Double", "Int"); using data = std::tuple; @@ -368,7 +352,7 @@ void test_invalid_rows_with_header() { } { - auto [p, _] = make_parser(f.name); + auto [p, _] = make_parser(f.name); p.use_fields("String", "Double"); using data = std::tuple; @@ -395,18 +379,12 @@ void test_invalid_rows_with_header() { } } -TEST_CASE("parser test invalid rows with header") { - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); -} +template +void test_ignore_empty(const std::vector& data) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; -template -void test_ignore_empty_impl(const std::vector& data) { - unique_file_name f{"test_parser"}; + unique_file_name f{"ignore_empty"}; make_and_write(f.name, data); std::vector expected; @@ -418,7 +396,7 @@ void test_ignore_empty_impl(const std::vector& data) { { auto [p, _] = - make_parser(f.name, ","); + make_parser(f.name, ","); std::vector i; for (const auto& a : p.template iterate()) { @@ -429,7 +407,7 @@ void test_ignore_empty_impl(const std::vector& data) { } { - auto [p, _] = make_parser(f.name, ","); + auto [p, _] = make_parser(f.name, ","); std::vector i; size_t n = 0; while (!p.eof()) { @@ -450,52 +428,44 @@ void test_ignore_empty_impl(const std::vector& data) { } } -template -void test_ignore_empty(const std::vector& data) { - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); -} +TEST_CASE_TEMPLATE("test various cases with empty lines", T, + ParserOptionCombinations) { + test_ignore_empty( + {{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); -TEST_CASE("parser test various cases with empty lines") { - test_ignore_empty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); - - test_ignore_empty( + test_ignore_empty( {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); - test_ignore_empty( + test_ignore_empty( {{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); - test_ignore_empty( + test_ignore_empty( {{1, 2, "x"}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); - test_ignore_empty( + test_ignore_empty( {{1, 2, X::empty}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); - test_ignore_empty( + test_ignore_empty( {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); - test_ignore_empty( + test_ignore_empty( {{1, 2, "x"}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); - test_ignore_empty( + test_ignore_empty( {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); - test_ignore_empty({{1, 2, X::empty}, - {3, 4, X::empty}, - {9, 10, X::empty}, - {11, 12, X::empty}}); + test_ignore_empty({{1, 2, X::empty}, + {3, 4, X::empty}, + {9, 10, X::empty}, + {11, 12, X::empty}}); - test_ignore_empty( + test_ignore_empty( {{1, 2, "x"}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, X::empty}}); - test_ignore_empty( + test_ignore_empty( {{1, 2, X::empty}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, "w"}}); - test_ignore_empty({{11, 12, X::empty}}); + test_ignore_empty({{11, 12, X::empty}}); - test_ignore_empty({}); + test_ignore_empty({}); } diff --git a/test/test_parser2.hpp b/test/test_parser2.hpp index 9d29f2a..6048a33 100644 --- a/test/test_parser2.hpp +++ b/test/test_parser2.hpp @@ -314,7 +314,7 @@ void test_data_combinations(const std::vector& input_data, return; } - unique_file_name f{"test_parser2" + std::string{SEGMENT_NAME}}; + unique_file_name f{"parser_data_combinations" + std::string{SEGMENT_NAME}}; std::vector> expected_data; std::vector header; std::vector field_header; From 05f87bc78b62bfc226cf91a11a904cee2cfb5241 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 25 Feb 2024 02:57:46 +0100 Subject: [PATCH 43/57] [skip ci] Fix line method --- include/ss/parser.hpp | 5 ++--- test/test_helpers.hpp | 4 ++-- test/test_parser1_1.cpp | 13 ++++++------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 50f61b0..5e6d370 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -102,7 +102,7 @@ public: } size_t line() const { - return reader_.line_number_ > 1 ? reader_.line_number_ - 1 + return reader_.line_number_ > 0 ? reader_.line_number_ - 1 : reader_.line_number_; } @@ -697,8 +697,7 @@ private: csv_data_buffer_{other.csv_data_buffer_}, csv_data_size_{other.csv_data_size_}, curr_char_{other.curr_char_}, crlf_{other.crlf_}, - line_number_{other.line_number_}, - chars_read_{other.chars_read_}, + line_number_{other.line_number_}, chars_read_{other.chars_read_}, next_line_size_{other.next_line_size_} { other.buffer_ = nullptr; other.next_line_buffer_ = nullptr; diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index 89477c8..f6a4b34 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -78,8 +78,8 @@ struct unique_file_name { unique_file_name(const std::string& test) { do { - name = "random_" + test + "_" + std::to_string(i++) + "_" + - time_now_rand() + "_file.csv"; + name = "random_file_test_" + test + "_" + std::to_string(i++) + + "_" + time_now_rand() + "_file.csv"; } while (std::filesystem::exists(name)); } diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp index 681877f..42642d1 100644 --- a/test/test_parser1_1.cpp +++ b/test/test_parser1_1.cpp @@ -99,17 +99,17 @@ TEST_CASE_TEMPLATE("test position method", T, ParserOptionCombinations) { } } -// TODO uncomment -/* -TEST_CASE_TEMPLATE("test line method", BufferMode, std::true_type, - std::false_type) { - unique_file_name f{"test_parser"}; +TEST_CASE_TEMPLATE("test line method", T, ParserOptionCombinations) { + constexpr auto buffer_mode = T::BufferMode::value; + using ErrorMode = typename T::ErrorMode; + + unique_file_name f{"line_method"}; std::vector data = {{"1", "21", "x"}, {"321", "4", "y"}, {"54", "6", "zz"}, {"7", "876", "uuuu"}, {"910", "10", "v"}, {"10", "321", "ww"}}; make_and_write(f.name, data); - auto [p, buff] = make_parser(f.name); + auto [p, buff] = make_parser(f.name); auto expected_line = 0; CHECK_EQ(p.line(), expected_line); @@ -122,7 +122,6 @@ TEST_CASE_TEMPLATE("test line method", BufferMode, std::true_type, CHECK_EQ(p.line(), data.size()); } -*/ TEST_CASE_TEMPLATE("parser test various valid cases", T, ParserOptionCombinations) { From 110ee840ccd1ac53e2caabcd17b905c92a93c634 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 25 Feb 2024 03:54:33 +0100 Subject: [PATCH 44/57] Fix header usage functionality --- include/ss/parser.hpp | 2 +- test/test_helpers.hpp | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 5e6d370..c655d38 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -214,7 +214,7 @@ public: reader_.next_line_converter_.set_column_mapping(column_mappings, header_.size()); - if (line() == 1) { + if (line() == 0) { ignore_next(); } } diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index f6a4b34..c345883 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -78,13 +78,17 @@ struct unique_file_name { unique_file_name(const std::string& test) { do { - name = "random_file_test_" + test + "_" + std::to_string(i++) + + name = "ssp_test_" + test + "_" + std::to_string(i++) + "_" + time_now_rand() + "_file.csv"; } while (std::filesystem::exists(name)); } ~unique_file_name() { - std::filesystem::remove(name); + try { + std::filesystem::remove(name); + } catch (const std::filesystem::filesystem_error& e) { + std::cerr << e.what() << std::endl; + } } }; From f2ff40a6256dd3212b25fb77095c284c9180829a Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 25 Feb 2024 10:42:11 +0100 Subject: [PATCH 45/57] Add strict_realloc --- include/ss/common.hpp | 25 ++++++++++------------ include/ss/parser.hpp | 16 +++----------- ssp.hpp | 49 +++++++++++++++---------------------------- 3 files changed, 31 insertions(+), 59 deletions(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 1b19e3f..97b55b4 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -28,6 +28,15 @@ inline void assert_throw_on_error_not_defined() { "'throw_on_error' is enabled"); } +inline void* strict_realloc(void* ptr, size_t size) { + ptr = realloc(ptr, size); + if (!ptr) { + throw std::bad_alloc{}; + } + + return ptr; +} + #if __unix__ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { return getline(lineptr, n, stream); @@ -46,13 +55,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (*lineptr == nullptr || *n < sizeof(buff)) { size_t new_n = sizeof(buff); - auto new_lineptr = static_cast(realloc(*lineptr, new_n)); - if (new_lineptr == nullptr) { - errno = ENOMEM; - return -1; - } - - *lineptr = new_lineptr; + lineptr = static_cast(strict_realloc(*lineptr, new_n)); *n = new_n; } @@ -66,13 +69,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (*n <= buff_used + line_used) { size_t new_n = *n * 2; - auto new_lineptr = static_cast(realloc(*lineptr, new_n)); - if (new_lineptr == nullptr) { - errno = ENOMEM; - return -1; - } - - *lineptr = new_lineptr; + lineptr = static_cast(realloc(*lineptr, new_n)); *n = new_n; } diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index c655d38..ec470d2 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -764,10 +764,7 @@ private: if (*lineptr == nullptr || *n < get_line_initial_buffer_size) { auto new_lineptr = static_cast( - realloc(*lineptr, get_line_initial_buffer_size)); - if (new_lineptr == nullptr) { - return -1; - } + strict_realloc(*lineptr, get_line_initial_buffer_size)); *lineptr = new_lineptr; *n = get_line_initial_buffer_size; } @@ -778,11 +775,7 @@ private: size_t new_n = *n * 2; char* new_lineptr = - static_cast(realloc(*lineptr, new_n)); - if (new_lineptr == nullptr) { - errno = ENOMEM; - return -1; - } + static_cast(strict_realloc(*lineptr, new_n)); *n = new_n; *lineptr = new_lineptr; } @@ -955,10 +948,7 @@ private: size_t second_size) { buffer_size = first_size + second_size + 3; auto new_first = static_cast( - realloc(static_cast(first), buffer_size)); - if (!new_first) { - throw std::bad_alloc{}; - } + strict_realloc(static_cast(first), buffer_size)); first = new_first; std::copy_n(second, second_size + 1, first + first_size); diff --git a/ssp.hpp b/ssp.hpp index 4e7bd02..d478afe 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -640,6 +640,15 @@ inline void assert_throw_on_error_not_defined() { "'throw_on_error' is enabled"); } +inline void* strict_realloc(void* ptr, size_t size) { + ptr = realloc(ptr, size); + if (!ptr) { + throw std::bad_alloc{}; + } + + return ptr; +} + #if __unix__ inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { return getline(lineptr, n, stream); @@ -658,13 +667,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (*lineptr == nullptr || *n < sizeof(buff)) { size_t new_n = sizeof(buff); - auto new_lineptr = static_cast(realloc(*lineptr, new_n)); - if (new_lineptr == nullptr) { - errno = ENOMEM; - return -1; - } - - *lineptr = new_lineptr; + lineptr = static_cast(strict_realloc(*lineptr, new_n)); *n = new_n; } @@ -677,14 +680,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (*n <= buff_used + line_used) { size_t new_n = *n * 2; - - auto new_lineptr = static_cast(realloc(*lineptr, new_n)); - if (new_lineptr == nullptr) { - errno = ENOMEM; - return -1; - } - - *lineptr = new_lineptr; + lineptr = static_cast(realloc(*lineptr, new_n)); *n = new_n; } @@ -2235,7 +2231,7 @@ public: } size_t line() const { - return reader_.line_number_ > 1 ? reader_.line_number_ - 1 + return reader_.line_number_ > 0 ? reader_.line_number_ - 1 : reader_.line_number_; } @@ -2347,7 +2343,7 @@ public: reader_.next_line_converter_.set_column_mapping(column_mappings, header_.size()); - if (line() == 1) { + if (line() == 0) { ignore_next(); } } @@ -2830,8 +2826,7 @@ private: csv_data_buffer_{other.csv_data_buffer_}, csv_data_size_{other.csv_data_size_}, curr_char_{other.curr_char_}, crlf_{other.crlf_}, - line_number_{other.line_number_}, - chars_read_{other.chars_read_}, + line_number_{other.line_number_}, chars_read_{other.chars_read_}, next_line_size_{other.next_line_size_} { other.buffer_ = nullptr; other.next_line_buffer_ = nullptr; @@ -2898,10 +2893,7 @@ private: if (*lineptr == nullptr || *n < get_line_initial_buffer_size) { auto new_lineptr = static_cast( - realloc(*lineptr, get_line_initial_buffer_size)); - if (new_lineptr == nullptr) { - return -1; - } + strict_realloc(*lineptr, get_line_initial_buffer_size)); *lineptr = new_lineptr; *n = get_line_initial_buffer_size; } @@ -2912,11 +2904,7 @@ private: size_t new_n = *n * 2; char* new_lineptr = - static_cast(realloc(*lineptr, new_n)); - if (new_lineptr == nullptr) { - errno = ENOMEM; - return -1; - } + static_cast(strict_realloc(*lineptr, new_n)); *n = new_n; *lineptr = new_lineptr; } @@ -3089,10 +3077,7 @@ private: size_t second_size) { buffer_size = first_size + second_size + 3; auto new_first = static_cast( - realloc(static_cast(first), buffer_size)); - if (!new_first) { - throw std::bad_alloc{}; - } + strict_realloc(static_cast(first), buffer_size)); first = new_first; std::copy_n(second, second_size + 1, first + first_size); From f4a06d40e7ee3b7b0b680012fafeab6269fb1c15 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 25 Feb 2024 10:53:21 +0100 Subject: [PATCH 46/57] Fix non-POSIX get_line --- include/ss/common.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 97b55b4..00ae9fb 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -55,7 +55,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (*lineptr == nullptr || *n < sizeof(buff)) { size_t new_n = sizeof(buff); - lineptr = static_cast(strict_realloc(*lineptr, new_n)); + *lineptr = static_cast(strict_realloc(*lineptr, new_n)); *n = new_n; } @@ -69,7 +69,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (*n <= buff_used + line_used) { size_t new_n = *n * 2; - lineptr = static_cast(realloc(*lineptr, new_n)); + *lineptr = static_cast(strict_realloc(*lineptr, new_n)); *n = new_n; } From b3f3bdf8d129fb7de38f150f68a5ed59a4d21a25 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 25 Feb 2024 10:54:56 +0100 Subject: [PATCH 47/57] [skip ci] Update ssp.hpp --- ssp.hpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ssp.hpp b/ssp.hpp index d478afe..7054d36 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -667,7 +667,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (*lineptr == nullptr || *n < sizeof(buff)) { size_t new_n = sizeof(buff); - lineptr = static_cast(strict_realloc(*lineptr, new_n)); + *lineptr = static_cast(strict_realloc(*lineptr, new_n)); *n = new_n; } @@ -680,7 +680,8 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { if (*n <= buff_used + line_used) { size_t new_n = *n * 2; - lineptr = static_cast(realloc(*lineptr, new_n)); + + *lineptr = static_cast(strict_realloc(*lineptr, new_n)); *n = new_n; } From 0ebbee1174be4e07c10012c76cff2fb33f10af63 Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 25 Feb 2024 12:10:46 +0100 Subject: [PATCH 48/57] [skip ci] Remove obsolete check from get_line_buffer --- include/ss/parser.hpp | 6 ------ ssp.hpp | 6 ------ 2 files changed, 12 deletions(-) diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index ec470d2..2629403 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -752,12 +752,6 @@ private: ssize_t get_line_buffer(char** lineptr, size_t* n, const char* const csv_data_buffer, size_t csv_data_size, size_t& curr_char) { - if (lineptr == nullptr || n == nullptr || - csv_data_buffer == nullptr) { - errno = EINVAL; - return -1; - } - if (curr_char >= csv_data_size) { return -1; } diff --git a/ssp.hpp b/ssp.hpp index 7054d36..5fa3db0 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -2882,12 +2882,6 @@ private: ssize_t get_line_buffer(char** lineptr, size_t* n, const char* const csv_data_buffer, size_t csv_data_size, size_t& curr_char) { - if (lineptr == nullptr || n == nullptr || - csv_data_buffer == nullptr) { - errno = EINVAL; - return -1; - } - if (curr_char >= csv_data_size) { return -1; } From f8e14b1fcfcf68eb619841c49404f47566380d1d Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 25 Feb 2024 13:02:34 +0100 Subject: [PATCH 49/57] [skip ci] Add std:: to invoked C std lib functions --- include/ss/common.hpp | 10 +++++----- include/ss/extract.hpp | 4 ++-- include/ss/parser.hpp | 12 ++++++------ include/ss/splitter.hpp | 2 +- ssp.hpp | 28 ++++++++++++++-------------- test/test_helpers.hpp | 8 ++++---- test/test_parser1_1.cpp | 6 +++--- 7 files changed, 35 insertions(+), 35 deletions(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 00ae9fb..7ecdb33 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -29,7 +29,7 @@ inline void assert_throw_on_error_not_defined() { } inline void* strict_realloc(void* ptr, size_t size) { - ptr = realloc(ptr, size); + ptr = std::realloc(ptr, size); if (!ptr) { throw std::bad_alloc{}; } @@ -62,9 +62,9 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { (*lineptr)[0] = '\0'; size_t line_used = 0; - while (fgets(buff, sizeof(buff), fp) != nullptr) { - line_used = strlen(*lineptr); - size_t buff_used = strlen(buff); + while (std::fgets(buff, sizeof(buff), fp) != nullptr) { + line_used = std::strlen(*lineptr); + size_t buff_used = std::strlen(buff); if (*n <= buff_used + line_used) { size_t new_n = *n * 2; @@ -73,7 +73,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { *n = new_n; } - memcpy(*lineptr + line_used, buff, buff_used); + std::memcpy(*lineptr + line_used, buff, buff_used); line_used += buff_used; (*lineptr)[line_used] = '\0'; diff --git a/include/ss/extract.hpp b/include/ss/extract.hpp index a26828d..b49a6d8 100644 --- a/include/ss/extract.hpp +++ b/include/ss/extract.hpp @@ -169,9 +169,9 @@ inline bool extract(const char* begin, const char* end, bool& value) { } } else { size_t size = end - begin; - if (size == 4 && strncmp(begin, "true", size) == 0) { + if (size == 4 && std::strncmp(begin, "true", size) == 0) { value = true; - } else if (size == 5 && strncmp(begin, "false", size) == 0) { + } else if (size == 5 && std::strncmp(begin, "false", size) == 0) { value = false; } else { return false; diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 2629403..154a881 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -675,7 +675,7 @@ private: struct reader { reader(const std::string& file_name_, const std::string& delim) - : delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} { + : delim_{delim}, file_{std::fopen(file_name_.c_str(), "rb")} { } reader(const char* const buffer, size_t csv_data_size, @@ -736,12 +736,12 @@ private: } ~reader() { - free(buffer_); - free(next_line_buffer_); - free(helper_buffer_); + std::free(buffer_); + std::free(next_line_buffer_); + std::free(helper_buffer_); if (file_) { - fclose(file_); + std::fclose(file_); } } @@ -800,7 +800,7 @@ private: if (file_) { ssize = get_line_file(&next_line_buffer_, &next_line_buffer_size_, file_); - curr_char_ = ftell(file_); + curr_char_ = std::ftell(file_); } else { ssize = get_line_buffer(&next_line_buffer_, &next_line_buffer_size_, diff --git a/include/ss/splitter.hpp b/include/ss/splitter.hpp index dd8f365..d974e83 100644 --- a/include/ss/splitter.hpp +++ b/include/ss/splitter.hpp @@ -199,7 +199,7 @@ private: }; bool match(const char* const curr, const std::string& delim) { - return strncmp(curr, delim.c_str(), delim.size()) == 0; + return std::strncmp(curr, delim.c_str(), delim.size()) == 0; }; size_t delimiter_size(char) { diff --git a/ssp.hpp b/ssp.hpp index 5fa3db0..6f0ea8d 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -641,7 +641,7 @@ inline void assert_throw_on_error_not_defined() { } inline void* strict_realloc(void* ptr, size_t size) { - ptr = realloc(ptr, size); + ptr = std::realloc(ptr, size); if (!ptr) { throw std::bad_alloc{}; } @@ -674,9 +674,9 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { (*lineptr)[0] = '\0'; size_t line_used = 0; - while (fgets(buff, sizeof(buff), fp) != nullptr) { - line_used = strlen(*lineptr); - size_t buff_used = strlen(buff); + while (std::fgets(buff, sizeof(buff), fp) != nullptr) { + line_used = std::strlen(*lineptr); + size_t buff_used = std::strlen(buff); if (*n <= buff_used + line_used) { size_t new_n = *n * 2; @@ -685,7 +685,7 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { *n = new_n; } - memcpy(*lineptr + line_used, buff, buff_used); + std::memcpy(*lineptr + line_used, buff, buff_used); line_used += buff_used; (*lineptr)[line_used] = '\0'; @@ -1183,7 +1183,7 @@ private: }; bool match(const char* const curr, const std::string& delim) { - return strncmp(curr, delim.c_str(), delim.size()) == 0; + return std::strncmp(curr, delim.c_str(), delim.size()) == 0; }; size_t delimiter_size(char) { @@ -1623,9 +1623,9 @@ inline bool extract(const char* begin, const char* end, bool& value) { } } else { size_t size = end - begin; - if (size == 4 && strncmp(begin, "true", size) == 0) { + if (size == 4 && std::strncmp(begin, "true", size) == 0) { value = true; - } else if (size == 5 && strncmp(begin, "false", size) == 0) { + } else if (size == 5 && std::strncmp(begin, "false", size) == 0) { value = false; } else { return false; @@ -2805,7 +2805,7 @@ private: struct reader { reader(const std::string& file_name_, const std::string& delim) - : delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} { + : delim_{delim}, file_{std::fopen(file_name_.c_str(), "rb")} { } reader(const char* const buffer, size_t csv_data_size, @@ -2866,12 +2866,12 @@ private: } ~reader() { - free(buffer_); - free(next_line_buffer_); - free(helper_buffer_); + std::free(buffer_); + std::free(next_line_buffer_); + std::free(helper_buffer_); if (file_) { - fclose(file_); + std::fclose(file_); } } @@ -2930,7 +2930,7 @@ private: if (file_) { ssize = get_line_file(&next_line_buffer_, &next_line_buffer_size_, file_); - curr_char_ = ftell(file_); + curr_char_ = std::ftell(file_); } else { ssize = get_line_buffer(&next_line_buffer_, &next_line_buffer_size_, diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index c345883..188e731 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -62,12 +62,12 @@ struct buffer { [[maybe_unused]] inline buffer buff; [[maybe_unused]] std::string time_now_rand() { - srand(time(nullptr)); + std::srand(std::time(nullptr)); std::stringstream ss; auto t = std::time(nullptr); auto tm = *std::localtime(&t); ss << std::put_time(&tm, "%d%m%Y%H%M%S"); - srand(time(nullptr)); + std::srand(std::time(nullptr)); return ss.str() + std::to_string(rand()); } @@ -78,8 +78,8 @@ struct unique_file_name { unique_file_name(const std::string& test) { do { - name = "ssp_test_" + test + "_" + std::to_string(i++) + - "_" + time_now_rand() + "_file.csv"; + name = "ssp_test_" + test + "_" + std::to_string(i++) + "_" + + time_now_rand() + "_file.csv"; } while (std::filesystem::exists(name)); } diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp index 42642d1..650393b 100644 --- a/test/test_parser1_1.cpp +++ b/test/test_parser1_1.cpp @@ -77,9 +77,9 @@ TEST_CASE_TEMPLATE("test position method", T, ParserOptionCombinations) { if (!buff.empty()) { return buff[n]; } else { - auto file = fopen(f.name.c_str(), "r"); - fseek(file, n, SEEK_SET); - return static_cast(fgetc(file)); + auto file = std::fopen(f.name.c_str(), "r"); + std::fseek(file, n, SEEK_SET); + return static_cast(std::fgetc(file)); } }; From 0a695cf09ef48650e925045809218e3ec720aeee Mon Sep 17 00:00:00 2001 From: ado Date: Sun, 25 Feb 2024 17:46:35 +0100 Subject: [PATCH 50/57] Add ss::uint8 and ss::int8, add unit tests for them --- include/ss/extract.hpp | 51 +++++++++- ssp.hpp | 51 +++++++++- test/test_converter.cpp | 205 +++++++++++++++++++------------------- test/test_extractions.cpp | 176 ++++++++++++++++---------------- 4 files changed, 294 insertions(+), 189 deletions(-) diff --git a/include/ss/extract.hpp b/include/ss/extract.hpp index b49a6d8..64b6b1b 100644 --- a/include/ss/extract.hpp +++ b/include/ss/extract.hpp @@ -77,6 +77,38 @@ std::enable_if_t, std::optional> to_num( #endif +//////////////// +// numeric_wrapper +//////////////// + +template +struct numeric_wrapper { + using type = T; + + numeric_wrapper() = default; + numeric_wrapper(numeric_wrapper&&) = default; + numeric_wrapper(const numeric_wrapper&) = default; + + numeric_wrapper& operator=(numeric_wrapper&&) = default; + numeric_wrapper& operator=(const numeric_wrapper&) = default; + + numeric_wrapper(T other) : value{other} { + } + + operator T() { + return value; + } + + operator T() const { + return value; + } + + T value; +}; + +using int8 = numeric_wrapper; +using uint8 = numeric_wrapper; + template std::enable_if_t, std::optional> to_num( const char* const begin, const char* const end) { @@ -89,6 +121,18 @@ std::enable_if_t, std::optional> to_num( return ret; } +template +std::enable_if_t, std::optional> to_num( + const char* const begin, const char* const end) { + T ret; + auto [ptr, ec] = std::from_chars(begin, end, ret.value); + + if (ec != std::errc() || ptr != end) { + return std::nullopt; + } + return ret; +} + //////////////// // extract //////////////// @@ -103,7 +147,8 @@ struct unsupported_type { template std::enable_if_t && !std::is_floating_point_v && !is_instance_of_v && - !is_instance_of_v, + !is_instance_of_v && + !is_instance_of_v, bool> extract(const char*, const char*, T&) { static_assert(error::unsupported_type::value, @@ -112,7 +157,9 @@ extract(const char*, const char*, T&) { } template -std::enable_if_t || std::is_floating_point_v, bool> +std::enable_if_t || std::is_floating_point_v || + is_instance_of_v, + bool> extract(const char* begin, const char* end, T& value) { auto optional_value = to_num(begin, end); if (!optional_value) { diff --git a/ssp.hpp b/ssp.hpp index 6f0ea8d..e6578b5 100644 --- a/ssp.hpp +++ b/ssp.hpp @@ -1531,6 +1531,38 @@ std::enable_if_t, std::optional> to_num( #endif +//////////////// +// numeric_wrapper +//////////////// + +template +struct numeric_wrapper { + using type = T; + + numeric_wrapper() = default; + numeric_wrapper(numeric_wrapper&&) = default; + numeric_wrapper(const numeric_wrapper&) = default; + + numeric_wrapper& operator=(numeric_wrapper&&) = default; + numeric_wrapper& operator=(const numeric_wrapper&) = default; + + numeric_wrapper(T other) : value{other} { + } + + operator T() { + return value; + } + + operator T() const { + return value; + } + + T value; +}; + +using int8 = numeric_wrapper; +using uint8 = numeric_wrapper; + template std::enable_if_t, std::optional> to_num( const char* const begin, const char* const end) { @@ -1543,6 +1575,18 @@ std::enable_if_t, std::optional> to_num( return ret; } +template +std::enable_if_t, std::optional> to_num( + const char* const begin, const char* const end) { + T ret; + auto [ptr, ec] = std::from_chars(begin, end, ret.value); + + if (ec != std::errc() || ptr != end) { + return std::nullopt; + } + return ret; +} + //////////////// // extract //////////////// @@ -1557,7 +1601,8 @@ struct unsupported_type { template std::enable_if_t && !std::is_floating_point_v && !is_instance_of_v && - !is_instance_of_v, + !is_instance_of_v && + !is_instance_of_v, bool> extract(const char*, const char*, T&) { static_assert(error::unsupported_type::value, @@ -1566,7 +1611,9 @@ extract(const char*, const char*, T&) { } template -std::enable_if_t || std::is_floating_point_v, bool> +std::enable_if_t || std::is_floating_point_v || + is_instance_of_v, + bool> extract(const char* begin, const char* end, T& value) { auto optional_value = to_num(begin, end); if (!optional_value) { diff --git a/test/test_converter.cpp b/test/test_converter.cpp index 6dd4562..29312b7 100644 --- a/test/test_converter.cpp +++ b/test/test_converter.cpp @@ -46,90 +46,88 @@ TEST_CASE("converter test split with exceptions") { } } -TEST_CASE("converter test valid conversions") { +TEST_CASE_TEMPLATE("converter test valid conversions", T, int, ss::uint8) { ss::converter c; { - auto tup = c.convert("5"); + auto tup = c.convert("5"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } { - auto tup = c.convert("5,junk"); + auto tup = c.convert("5,junk"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } { - auto tup = c.convert("junk,5"); + auto tup = c.convert("junk,5"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } { - auto tup = c.convert("5\njunk\njunk", "\n"); + auto tup = c.convert("5\njunk\njunk", "\n"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } { - auto tup = c.convert("junk 5 junk", " "); + auto tup = c.convert("junk 5 junk", " "); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } { - auto tup = c.convert("junk\tjunk\t5", "\t"); + auto tup = c.convert("junk\tjunk\t5", "\t"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } { auto tup = - c.convert>("junk\tjunk\t5", "\t"); + c.convert>("junk\tjunk\t5", "\t"); REQUIRE(c.valid()); REQUIRE(tup.has_value()); CHECK_EQ(tup, 5); } { - auto tup = c.convert("5,6.6,junk"); + auto tup = c.convert("5,6.6,junk"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple(5, 6.6)); } { - auto tup = c.convert("5,junk,6.6"); + auto tup = c.convert("5,junk,6.6"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple(5, 6.6)); } { - auto tup = c.convert("junk;5;6.6", ";"); + auto tup = c.convert("junk;5;6.6", ";"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple(5, 6.6)); } { - auto tup = - c.convert, double>("junk;5;6.6", ";"); + auto tup = c.convert, double>("junk;5;6.6", ";"); REQUIRE(c.valid()); REQUIRE(std::get<0>(tup).has_value()); CHECK_EQ(tup, std::make_tuple(5, 6.6)); } { auto tup = - c.convert, double>("junk;5.4;6.6", ";"); + c.convert, double>("junk;5.4;6.6", ";"); REQUIRE(c.valid()); REQUIRE_FALSE(std::get<0>(tup).has_value()); - CHECK_EQ(tup, std::make_tuple(std::optional{}, 6.6)); + CHECK_EQ(tup, std::make_tuple(std::optional{}, 6.6)); } { auto tup = - c.convert, double>("junk;5;6.6", - ";"); + c.convert, double>("junk;5;6.6", ";"); REQUIRE(c.valid()); - REQUIRE(std::holds_alternative(std::get<0>(tup))); - CHECK_EQ(tup, std::make_tuple(std::variant{5}, 6.6)); + REQUIRE(std::holds_alternative(std::get<0>(tup))); + CHECK_EQ(tup, std::make_tuple(std::variant{5}, 6.6)); } { auto tup = - c.convert, double>("junk;5.5;6.6", - ";"); + c.convert, double>("junk;5.5;6.6", + ";"); REQUIRE(c.valid()); REQUIRE(std::holds_alternative(std::get<0>(tup))); - CHECK_EQ(tup, std::make_tuple(std::variant{5.5}, 6.6)); + CHECK_EQ(tup, std::make_tuple(std::variant{5.5}, 6.6)); } { auto tup = c.convert c; try { - auto tup = c.convert("5"); + auto tup = c.convert("5"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } catch (ss::exception& e) { @@ -152,7 +151,7 @@ TEST_CASE("converter test valid conversions with exceptions") { } try { - auto tup = c.convert("5,junk"); + auto tup = c.convert("5,junk"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } catch (ss::exception& e) { @@ -160,7 +159,7 @@ TEST_CASE("converter test valid conversions with exceptions") { } try { - auto tup = c.convert("junk,5"); + auto tup = c.convert("junk,5"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } catch (ss::exception& e) { @@ -168,7 +167,7 @@ TEST_CASE("converter test valid conversions with exceptions") { } try { - auto tup = c.convert("5\njunk\njunk", "\n"); + auto tup = c.convert("5\njunk\njunk", "\n"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } catch (ss::exception& e) { @@ -176,7 +175,7 @@ TEST_CASE("converter test valid conversions with exceptions") { } try { - auto tup = c.convert("junk 5 junk", " "); + auto tup = c.convert("junk 5 junk", " "); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } catch (ss::exception& e) { @@ -184,7 +183,7 @@ TEST_CASE("converter test valid conversions with exceptions") { } try { - auto tup = c.convert("junk\tjunk\t5", "\t"); + auto tup = c.convert("junk\tjunk\t5", "\t"); REQUIRE(c.valid()); CHECK_EQ(tup, 5); } catch (ss::exception& e) { @@ -193,7 +192,7 @@ TEST_CASE("converter test valid conversions with exceptions") { try { auto tup = - c.convert>("junk\tjunk\t5", "\t"); + c.convert>("junk\tjunk\t5", "\t"); REQUIRE(c.valid()); REQUIRE(tup.has_value()); CHECK_EQ(tup, 5); @@ -202,7 +201,7 @@ TEST_CASE("converter test valid conversions with exceptions") { } try { - auto tup = c.convert("5,6.6,junk"); + auto tup = c.convert("5,6.6,junk"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple(5, 6.6)); } catch (ss::exception& e) { @@ -210,7 +209,7 @@ TEST_CASE("converter test valid conversions with exceptions") { } try { - auto tup = c.convert("5,junk,6.6"); + auto tup = c.convert("5,junk,6.6"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple(5, 6.6)); } catch (ss::exception& e) { @@ -218,7 +217,7 @@ TEST_CASE("converter test valid conversions with exceptions") { } try { - auto tup = c.convert("junk;5;6.6", ";"); + auto tup = c.convert("junk;5;6.6", ";"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple(5, 6.6)); } catch (ss::exception& e) { @@ -226,8 +225,7 @@ TEST_CASE("converter test valid conversions with exceptions") { } try { - auto tup = - c.convert, double>("junk;5;6.6", ";"); + auto tup = c.convert, double>("junk;5;6.6", ";"); REQUIRE(c.valid()); REQUIRE(std::get<0>(tup).has_value()); CHECK_EQ(tup, std::make_tuple(5, 6.6)); @@ -237,32 +235,31 @@ TEST_CASE("converter test valid conversions with exceptions") { try { auto tup = - c.convert, double>("junk;5.4;6.6", ";"); + c.convert, double>("junk;5.4;6.6", ";"); REQUIRE(c.valid()); REQUIRE_FALSE(std::get<0>(tup).has_value()); - CHECK_EQ(tup, std::make_tuple(std::optional{}, 6.6)); + CHECK_EQ(tup, std::make_tuple(std::optional{}, 6.6)); } catch (ss::exception& e) { FAIL(std::string{e.what()}); } try { auto tup = - c.convert, double>("junk;5;6.6", - ";"); + c.convert, double>("junk;5;6.6", ";"); REQUIRE(c.valid()); - REQUIRE(std::holds_alternative(std::get<0>(tup))); - CHECK_EQ(tup, std::make_tuple(std::variant{5}, 6.6)); + REQUIRE(std::holds_alternative(std::get<0>(tup))); + CHECK_EQ(tup, std::make_tuple(std::variant{5}, 6.6)); } catch (ss::exception& e) { FAIL(std::string{e.what()}); } try { auto tup = - c.convert, double>("junk;5.5;6.6", - ";"); + c.convert, double>("junk;5.5;6.6", + ";"); REQUIRE(c.valid()); REQUIRE(std::holds_alternative(std::get<0>(tup))); - CHECK_EQ(tup, std::make_tuple(std::variant{5.5}, 6.6)); + CHECK_EQ(tup, std::make_tuple(std::variant{5.5}, 6.6)); } catch (ss::exception& e) { FAIL(std::string{e.what()}); } @@ -278,110 +275,114 @@ TEST_CASE("converter test valid conversions with exceptions") { } } -TEST_CASE("converter test invalid conversions") { +TEST_CASE_TEMPLATE("converter test invalid conversions", T, int, ss::uint8) { ss::converter c; - c.convert(""); + c.convert(""); REQUIRE_FALSE(c.valid()); - c.convert("1", ""); + c.convert("1", ""); REQUIRE_FALSE(c.valid()); - c.convert("10", ""); + c.convert("10", ""); REQUIRE_FALSE(c.valid()); - c.convert(""); + c.convert(""); REQUIRE_FALSE(c.valid()); - c.convert(",junk"); + c.convert(",junk"); REQUIRE_FALSE(c.valid()); - c.convert("junk,"); + c.convert("junk,"); REQUIRE_FALSE(c.valid()); - c.convert("x"); + c.convert("x"); REQUIRE_FALSE(c.valid()); - c.convert("x"); + c.convert("x"); REQUIRE_FALSE(c.valid()); - c.convert("x,junk"); + c.convert("x,junk"); REQUIRE_FALSE(c.valid()); - c.convert("junk,x"); + c.convert("junk,x"); REQUIRE_FALSE(c.valid()); - c.convert, double>("junk;.5.5;6", ";"); + c.convert, double>("junk;.5.5;6", ";"); REQUIRE_FALSE(c.valid()); } -TEST_CASE("converter test invalid conversions with exceptions") { +TEST_CASE_TEMPLATE("converter test invalid conversions with exceptions", T, int, + ss::uint8) { ss::converter c; - REQUIRE_EXCEPTION(c.convert("")); - REQUIRE_EXCEPTION(c.convert("1", "")); - REQUIRE_EXCEPTION(c.convert("10", "")); - REQUIRE_EXCEPTION(c.convert("")); - REQUIRE_EXCEPTION(c.convert(",junk")); - REQUIRE_EXCEPTION(c.convert("junk,")); - REQUIRE_EXCEPTION(c.convert("x")); - REQUIRE_EXCEPTION(c.convert("x")); - REQUIRE_EXCEPTION(c.convert("x,junk")); - REQUIRE_EXCEPTION(c.convert("junk,x")); + REQUIRE_EXCEPTION(c.convert("")); + REQUIRE_EXCEPTION(c.convert("1", "")); + REQUIRE_EXCEPTION(c.convert("10", "")); + REQUIRE_EXCEPTION(c.convert("")); + REQUIRE_EXCEPTION(c.convert(",junk")); + REQUIRE_EXCEPTION(c.convert("junk,")); + REQUIRE_EXCEPTION(c.convert("x")); + REQUIRE_EXCEPTION(c.convert("x")); + REQUIRE_EXCEPTION(c.convert("x,junk")); + REQUIRE_EXCEPTION(c.convert("junk,x")); REQUIRE_EXCEPTION( - c.convert, double>("junk;.5.5;6", ";")); + c.convert, double>("junk;.5.5;6", ";")); } -TEST_CASE("converter test ss:ax restriction (all except)") { +TEST_CASE_TEMPLATE("converter test ss:ax restriction (all except)", T, int, + ss::uint8) { ss::converter c; - c.convert>("0"); + c.convert>("0"); REQUIRE_FALSE(c.valid()); - c.convert>("1"); + c.convert>("1"); REQUIRE_FALSE(c.valid()); - c.convert>("junk,c,1"); + c.convert>("junk,c,1"); REQUIRE_FALSE(c.valid()); - c.convert, char>("1,c"); + c.convert, char>("1,c"); REQUIRE_FALSE(c.valid()); { - int tup = c.convert>("3"); + T tup = c.convert>("3"); REQUIRE(c.valid()); CHECK_EQ(tup, 3); } { - std::tuple tup = c.convert>("c,3"); + std::tuple tup = c.convert>("c,3"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple('c', 3)); } { - std::tuple tup = c.convert, char>("3,c"); + std::tuple tup = c.convert, char>("3,c"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple(3, 'c')); } } -TEST_CASE("converter test ss:ax restriction (all except) with exceptions") { +TEST_CASE_TEMPLATE( + "converter test ss:ax restriction (all except) with exceptions", T, int, + ss::uint8) { ss::converter c; - REQUIRE_EXCEPTION(c.convert>("0")); - REQUIRE_EXCEPTION(c.convert>("1")); - REQUIRE_EXCEPTION(c.convert>("junk,c,1")); - REQUIRE_EXCEPTION(c.convert, char>("1,c")); + REQUIRE_EXCEPTION(c.convert>("0")); + REQUIRE_EXCEPTION(c.convert>("1")); + REQUIRE_EXCEPTION(c.convert>("junk,c,1")); + REQUIRE_EXCEPTION(c.convert, char>("1,c")); try { { - int tup = c.convert>("3"); + T tup = c.convert>("3"); CHECK_EQ(tup, 3); } { - std::tuple tup = c.convert>("c,3"); + std::tuple tup = c.convert>("c,3"); CHECK_EQ(tup, std::make_tuple('c', 3)); } { - std::tuple tup = c.convert, char>("3,c"); + std::tuple tup = c.convert, char>("3,c"); CHECK_EQ(tup, std::make_tuple(3, 'c')); } } catch (ss::exception& e) { @@ -456,65 +457,68 @@ TEST_CASE("converter test ss:nx restriction (none except) with exceptions") { } } -TEST_CASE("converter test ss:ir restriction (in range)") { +TEST_CASE_TEMPLATE("converter test ss:ir restriction (in range)", T, int, + ss::uint8) { ss::converter c; - c.convert>("3"); + c.convert>("3"); REQUIRE_FALSE(c.valid()); - c.convert>("c,3"); + c.convert>("c,3"); REQUIRE_FALSE(c.valid()); - c.convert, char>("3,c"); + c.convert, char>("3,c"); REQUIRE_FALSE(c.valid()); { - auto tup = c.convert>("3"); + auto tup = c.convert>("3"); REQUIRE(c.valid()); CHECK_EQ(tup, 3); } { - auto tup = c.convert>("2"); + auto tup = c.convert>("2"); REQUIRE(c.valid()); CHECK_EQ(tup, 2); } { - auto tup = c.convert>("c,junk,1"); + auto tup = c.convert>("c,junk,1"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple('c', 1)); } { - auto tup = c.convert, char>("1,c"); + auto tup = c.convert, char>("1,c"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple(1, 'c')); } } -TEST_CASE("converter test ss:ir restriction (in range) with exceptions") { +TEST_CASE_TEMPLATE( + "converter test ss:ir restriction (in range) with exceptions", T, int, + ss::uint8) { ss::converter c; - REQUIRE_EXCEPTION(c.convert>("3")); - REQUIRE_EXCEPTION(c.convert>("c,3")); - REQUIRE_EXCEPTION(c.convert, char>("3,c")); + REQUIRE_EXCEPTION(c.convert>("3")); + REQUIRE_EXCEPTION(c.convert>("c,3")); + REQUIRE_EXCEPTION(c.convert, char>("3,c")); try { { - auto tup = c.convert>("3"); + auto tup = c.convert>("3"); REQUIRE(c.valid()); CHECK_EQ(tup, 3); } { - auto tup = c.convert>("2"); + auto tup = c.convert>("2"); REQUIRE(c.valid()); CHECK_EQ(tup, 2); } { - auto tup = c.convert>("c,junk,1"); + auto tup = c.convert>("c,junk,1"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple('c', 1)); } { - auto tup = c.convert, char>("1,c"); + auto tup = c.convert, char>("1,c"); REQUIRE(c.valid()); CHECK_EQ(tup, std::make_tuple(1, 'c')); } @@ -978,4 +982,3 @@ TEST_CASE("converter test invalid split conversions with exceptions") { buff(R"(just,some,2,"strings\")"))); CHECK(c.unterminated_quote()); } - diff --git a/test/test_extractions.cpp b/test/test_extractions.cpp index 4cab853..1765809 100644 --- a/test/test_extractions.cpp +++ b/test/test_extractions.cpp @@ -2,6 +2,16 @@ #include #include +template +struct std::numeric_limits> + : public std::numeric_limits {}; + +template +struct std::is_signed> : public std::is_signed {}; + +template +struct std::is_unsigned> : public std::is_unsigned {}; + TEST_CASE("testing extract functions for floating point values") { CHECK_FLOATING_CONVERSION(123.456, float); CHECK_FLOATING_CONVERSION(123.456, double); @@ -22,18 +32,18 @@ TEST_CASE("testing extract functions for floating point values") { #define CHECK_DECIMAL_CONVERSION(input, type) \ { \ std::string s = #input; \ - auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ - REQUIRE(t.has_value()); \ - CHECK_EQ(t.value(), type(input)); \ + type value; \ + bool valid = ss::extract(s.c_str(), s.c_str() + s.size(), value); \ + REQUIRE(valid); \ + CHECK_EQ(value, type(input)); \ } \ - { \ - /* check negative too */ \ - if (std::is_signed_v) { \ - auto s = std::string("-") + #input; \ - auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ - REQUIRE(t.has_value()); \ - CHECK_EQ(t.value(), type(-input)); \ - } \ + /* check negative too */ \ + if (std::is_signed_v) { \ + std::string s = std::string("-") + #input; \ + type value; \ + bool valid = ss::extract(s.c_str(), s.c_str() + s.size(), value); \ + REQUIRE(valid); \ + CHECK_EQ(value, type(-input)); \ } using us = unsigned short; @@ -43,6 +53,8 @@ using ll = long long; using ull = unsigned long long; TEST_CASE("extract test functions for decimal values") { + CHECK_DECIMAL_CONVERSION(12, ss::int8); + CHECK_DECIMAL_CONVERSION(12, ss::uint8); CHECK_DECIMAL_CONVERSION(1234, short); CHECK_DECIMAL_CONVERSION(1234, us); CHECK_DECIMAL_CONVERSION(1234, int); @@ -54,6 +66,9 @@ TEST_CASE("extract test functions for decimal values") { } TEST_CASE("extract test functions for numbers with invalid inputs") { + // negative unsigned value for numeric_wrapper + CHECK_INVALID_CONVERSION("-12", ss::uint8); + // negative unsigned value CHECK_INVALID_CONVERSION("-1234", ul); @@ -70,46 +85,38 @@ TEST_CASE("extract test functions for numbers with invalid inputs") { CHECK_INVALID_CONVERSION("", int); } -#define CHECK_OUT_OF_RANGE_CONVERSION(type) \ - { \ - std::string s = std::to_string(std::numeric_limits::max()); \ - auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ - CHECK(t.has_value()); \ - for (auto& i : s) { \ - if (i != '9' && i != '.') { \ - i = '9'; \ - break; \ - } \ - } \ - t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ - CHECK_FALSE(t.has_value()); \ - } \ - { \ - std::string s = std::to_string(std::numeric_limits::min()); \ - auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ - CHECK(t.has_value()); \ - for (auto& i : s) { \ - if (std::is_signed_v && i != '9' && i != '.') { \ - i = '9'; \ - break; \ - } else if (std::is_unsigned_v) { \ - s = "-1"; \ - break; \ - } \ - } \ - t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ - CHECK_FALSE(t.has_value()); \ +TEST_CASE_TEMPLATE( + "extract test functions for numbers with out of range inputs", T, short, us, + int, ui, long, ul, ll, ull, ss::uint8) { + { + std::string s = std::to_string(std::numeric_limits::max()); + auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); + CHECK(t.has_value()); + for (auto& i : s) { + if (i != '9' && i != '.') { + i = '9'; + break; + } + } + t = ss::to_num(s.c_str(), s.c_str() + s.size()); + CHECK_FALSE(t.has_value()); + } + { + std::string s = std::to_string(std::numeric_limits::min()); + auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); + CHECK(t.has_value()); + for (auto& i : s) { + if (std::is_signed_v && i != '9' && i != '.') { + i = '9'; + break; + } else if (std::is_unsigned_v) { + s = "-1"; + break; + } + } + t = ss::to_num(s.c_str(), s.c_str() + s.size()); + CHECK_FALSE(t.has_value()); } - -TEST_CASE("extract test functions for numbers with out of range inputs") { - CHECK_OUT_OF_RANGE_CONVERSION(short); - CHECK_OUT_OF_RANGE_CONVERSION(us); - CHECK_OUT_OF_RANGE_CONVERSION(int); - CHECK_OUT_OF_RANGE_CONVERSION(ui); - CHECK_OUT_OF_RANGE_CONVERSION(long); - CHECK_OUT_OF_RANGE_CONVERSION(ul); - CHECK_OUT_OF_RANGE_CONVERSION(ll); - CHECK_OUT_OF_RANGE_CONVERSION(ull); } TEST_CASE("extract test functions for boolean values") { @@ -142,12 +149,12 @@ TEST_CASE("extract test functions for char values") { } } -TEST_CASE("extract test functions for std::optional") { - for (const auto& [i, s] : - {std::pair, std::string>{1, "1"}, - {69, "69"}, - {-4, "-4"}}) { - std::optional v; +TEST_CASE_TEMPLATE("extract test functions for std::optional", T, int, + ss::int8) { + for (const auto& [i, s] : {std::pair, std::string>{1, "1"}, + {69, "69"}, + {-4, "-4"}}) { + std::optional v; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), v)); REQUIRE(v.has_value()); CHECK_EQ(*v, i); @@ -164,7 +171,7 @@ TEST_CASE("extract test functions for std::optional") { } for (const std::string s : {"aa", "xxx", ""}) { - std::optional v; + std::optional v; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), v)); CHECK_FALSE(v.has_value()); } @@ -176,56 +183,57 @@ TEST_CASE("extract test functions for std::optional") { } } -TEST_CASE("extract test functions for std::variant") { +TEST_CASE_TEMPLATE("extract test functions for std::variant", T, int, + ss::uint8) { { std::string s = "22"; { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); CHECK_NOT_VARIANT(var, double); CHECK_NOT_VARIANT(var, std::string); - REQUIRE_VARIANT(var, 22, int); + REQUIRE_VARIANT(var, 22, T); } { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - CHECK_NOT_VARIANT(var, int); + CHECK_NOT_VARIANT(var, T); CHECK_NOT_VARIANT(var, std::string); REQUIRE_VARIANT(var, 22, double); } { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - CHECK_NOT_VARIANT(var, int); + CHECK_NOT_VARIANT(var, T); CHECK_NOT_VARIANT(var, double); REQUIRE_VARIANT(var, "22", std::string); } { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - REQUIRE_VARIANT(var, 22, int); + REQUIRE_VARIANT(var, 22, T); } } { std::string s = "22.2"; { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - CHECK_NOT_VARIANT(var, int); + CHECK_NOT_VARIANT(var, T); CHECK_NOT_VARIANT(var, std::string); REQUIRE_VARIANT(var, 22.2, double); } { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - CHECK_NOT_VARIANT(var, int); + CHECK_NOT_VARIANT(var, T); CHECK_NOT_VARIANT(var, std::string); REQUIRE_VARIANT(var, 22.2, double); } { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - CHECK_NOT_VARIANT(var, int); + CHECK_NOT_VARIANT(var, T); CHECK_NOT_VARIANT(var, double); REQUIRE_VARIANT(var, "22.2", std::string); } @@ -233,45 +241,45 @@ TEST_CASE("extract test functions for std::variant") { { std::string s = "2.2.2"; { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - CHECK_NOT_VARIANT(var, int); + CHECK_NOT_VARIANT(var, T); CHECK_NOT_VARIANT(var, double); REQUIRE_VARIANT(var, "2.2.2", std::string); } { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - CHECK_NOT_VARIANT(var, int); + CHECK_NOT_VARIANT(var, T); CHECK_NOT_VARIANT(var, double); REQUIRE_VARIANT(var, "2.2.2", std::string); } { - std::variant var; + std::variant var; REQUIRE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - CHECK_NOT_VARIANT(var, int); + CHECK_NOT_VARIANT(var, T); CHECK_NOT_VARIANT(var, double); REQUIRE_VARIANT(var, "2.2.2", std::string); } { - std::variant var; + std::variant var; REQUIRE_FALSE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - REQUIRE_VARIANT(var, int{}, int); + REQUIRE_VARIANT(var, T{}, T); CHECK_NOT_VARIANT(var, double); } { - std::variant var; + std::variant var; REQUIRE_FALSE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); REQUIRE_VARIANT(var, double{}, double); - CHECK_NOT_VARIANT(var, int); + CHECK_NOT_VARIANT(var, T); } { - std::variant var; + std::variant var; REQUIRE_FALSE(ss::extract(s.c_str(), s.c_str() + s.size(), var)); - REQUIRE_VARIANT(var, int{}, int); + REQUIRE_VARIANT(var, T{}, T); } } } From c516a6f826c31ebaf8969501296ae6a2ac423176 Mon Sep 17 00:00:00 2001 From: ado Date: Mon, 26 Feb 2024 02:37:30 +0100 Subject: [PATCH 51/57] Fix extraction tests --- test/test_extractions.cpp | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/test/test_extractions.cpp b/test/test_extractions.cpp index 1765809..88ec317 100644 --- a/test/test_extractions.cpp +++ b/test/test_extractions.cpp @@ -2,15 +2,31 @@ #include #include -template -struct std::numeric_limits> - : public std::numeric_limits {}; +namespace { template -struct std::is_signed> : public std::is_signed {}; +struct numeric_limits : public std::numeric_limits {}; template -struct std::is_unsigned> : public std::is_unsigned {}; +struct numeric_limits> : public std::numeric_limits { +}; + +template +struct is_signed : public std::is_signed {}; + +template <> +struct is_signed : public std::true_type {}; + +template +struct is_unsigned : public std::is_unsigned {}; + +template <> +struct is_unsigned : public std::true_type {}; + +} /* namespace */ + +static_assert(is_signed::value); +static_assert(is_unsigned::value); TEST_CASE("testing extract functions for floating point values") { CHECK_FLOATING_CONVERSION(123.456, float); @@ -38,7 +54,7 @@ TEST_CASE("testing extract functions for floating point values") { CHECK_EQ(value, type(input)); \ } \ /* check negative too */ \ - if (std::is_signed_v) { \ + if (is_signed::value) { \ std::string s = std::string("-") + #input; \ type value; \ bool valid = ss::extract(s.c_str(), s.c_str() + s.size(), value); \ @@ -89,7 +105,7 @@ TEST_CASE_TEMPLATE( "extract test functions for numbers with out of range inputs", T, short, us, int, ui, long, ul, ll, ull, ss::uint8) { { - std::string s = std::to_string(std::numeric_limits::max()); + std::string s = std::to_string(numeric_limits::max()); auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); CHECK(t.has_value()); for (auto& i : s) { @@ -102,14 +118,14 @@ TEST_CASE_TEMPLATE( CHECK_FALSE(t.has_value()); } { - std::string s = std::to_string(std::numeric_limits::min()); + std::string s = std::to_string(numeric_limits::min()); auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); CHECK(t.has_value()); for (auto& i : s) { - if (std::is_signed_v && i != '9' && i != '.') { + if (is_signed::value && i != '9' && i != '.') { i = '9'; break; - } else if (std::is_unsigned_v) { + } else if (is_unsigned::value) { s = "-1"; break; } From 852481d233fdbd7c1f1d99d1fd092a2bac54376f Mon Sep 17 00:00:00 2001 From: ado Date: Tue, 27 Feb 2024 02:49:50 +0100 Subject: [PATCH 52/57] Fix converter unit tests --- test/test_converter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_converter.cpp b/test/test_converter.cpp index 29312b7..659e78f 100644 --- a/test/test_converter.cpp +++ b/test/test_converter.cpp @@ -119,7 +119,7 @@ TEST_CASE_TEMPLATE("converter test valid conversions", T, int, ss::uint8) { c.convert, double>("junk;5;6.6", ";"); REQUIRE(c.valid()); REQUIRE(std::holds_alternative(std::get<0>(tup))); - CHECK_EQ(tup, std::make_tuple(std::variant{5}, 6.6)); + CHECK_EQ(tup, std::make_tuple(std::variant{T(5)}, 6.6)); } { auto tup = @@ -248,7 +248,7 @@ TEST_CASE_TEMPLATE("converter test valid conversions with exceptions", T, int, c.convert, double>("junk;5;6.6", ";"); REQUIRE(c.valid()); REQUIRE(std::holds_alternative(std::get<0>(tup))); - CHECK_EQ(tup, std::make_tuple(std::variant{5}, 6.6)); + CHECK_EQ(tup, std::make_tuple(std::variant{T(5)}, 6.6)); } catch (ss::exception& e) { FAIL(std::string{e.what()}); } From df78865f046b9ba87914395d02a19e9b0400ea2d Mon Sep 17 00:00:00 2001 From: ado Date: Tue, 27 Feb 2024 23:56:13 +0100 Subject: [PATCH 53/57] [skip ci] Update README --- README.md | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 148792c..4dca2ef 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ Bill (Heath) Gates 65 3.3 * Can work without exceptions * [Works with headers](#headers) * [Works with quotes, escapes and spacings](#setup) + * [Works with csv data stored in buffers](#buffer-mode) * [Works with values containing new lines](#multiline) * [Columns and rows can be ignored](#special-types) * [Works with any type of delimiter](#delimiter) @@ -210,6 +211,19 @@ auto s = p.get_next(); This works with the iteration loop too. *Note, the order in which the members of the tied method are returned must match the order of the elements in the csv*. +## Buffer mode +The parser also works with buffers containing csv data instead of files. To parse buffer data with the parser simply create it by passing it the buffer as a **`const char*`** which represents the buffer, and its size. The initial example using a buffer instead of a file would look like this: +```cpp +std::string buffer = "James Bailey,65,2.5\nBrian S. Wolfe,40,1.9\n"; + +ss::parser p{buffer.c_str(), buffer.size()}; + +for (const auto& [id, age, grade] : p.iterate()) { + std::cout << id << ' ' << age << ' ' << grade << std::endl; +} + +return 0; +``` ## Setup By default, many of the features supported by the parser are disabled. They can be enabled within the template parameters of the parser. For example, to enable quoting and escaping the parser would look like: ```cpp @@ -383,6 +397,12 @@ if (std::holds_alternative(grade)) { // grade set as char } ``` +Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers arround the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character. +```cpp +// returns std::tuple +auto [id, age, grade] = p.get_next(); +uint8_t age_copy = age; +``` ## Restrictions Custom **`restrictions`** can be used to narrow down the conversions of unwanted values. **`ss::ir`** (in range) and **`ss::ne`** (none empty) are some of those: @@ -454,12 +474,13 @@ The **`eof`** method can be used to detect if the end of the file was reached. Detailed error messages can be accessed via the **`error_msg`** method, and to enable them **`ss::string_error`** needs to be included in the setup. If **`ss::string_error`** is not defined, the **`error_msg`** method will not be defined either. The line number can be fetched using the **`line`** method. - +The cursor position can be fetched using the **`position`** method. ```cpp -const std::string& parser::error_msg(); -bool parser::valid(); -bool parser::eof(); -size_t parser::line(); +const std::string& parser::error_msg() const; +bool parser::valid() const; +bool parser::eof() const; +size_t parser::line() const; +size_t parser::position() const; // ... ss::parser parser; From 063d56fad9fa60c517a3559061afad822c287412 Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 28 Feb 2024 00:01:37 +0100 Subject: [PATCH 54/57] [skip ci] Update README --- README.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 4dca2ef..a936e0e 100644 --- a/README.md +++ b/README.md @@ -17,13 +17,13 @@ [![windows-msys2-clang](https://github.com/red0124/ssp/workflows/win-msys2-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msys2-clang.yml) [![windows-msvc](https://github.com/red0124/ssp/workflows/win-msvc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msvc.yml) -A header only "csv" parser which is fast and versatile with modern C++ api. Requires compiler with C++17 support. [Can also be used to convert strings to specific types.](#the-converter) +A header only CSV parser which is fast and versatile with modern C++ API. Requires compiler with C++17 support. [Can also be used to efficiently convert strings to specific types.](#the-converter) Conversion for floating point values invoked using [fast-float](https://github.com/fastfloat/fast_float) . \ Function traits taken from *qt-creator* . # Example -Lets say we have a csv file containing students in a given format \ and we want to parse and print all the valid values: +Lets say we have a CSV file containing students in a given format \ and we want to parse and print all the valid values: ```shell $ cat students.csv @@ -58,7 +58,7 @@ Bill (Heath) Gates 65 3.3 * Can work without exceptions * [Works with headers](#headers) * [Works with quotes, escapes and spacings](#setup) - * [Works with csv data stored in buffers](#buffer-mode) + * [Works with CSV data stored in buffers](#buffer-mode) * [Works with values containing new lines](#multiline) * [Columns and rows can be ignored](#special-types) * [Works with any type of delimiter](#delimiter) @@ -159,7 +159,7 @@ while (!p.eof()) { The alternate example with exceptions disabled will be used to show some of the features of the library. The **`get_next`** method returns a tuple of objects specified inside the template type list. -If a conversion could not be applied, the method would return a tuple of default constructed objects, and the **`valid`** method would return **`false`**, for example if the third (grade) column in our csv could not be converted to a float the conversion would fail. +If a conversion could not be applied, the method would return a tuple of default constructed objects, and the **`valid`** method would return **`false`**, for example if the third (grade) column in our CSV could not be converted to a float the conversion would fail. If **`get_next`** is called with a **`tuple`** as template parameter it would behave identically to passing the same tuple parameters to **`get_next`**: ```cpp @@ -203,16 +203,16 @@ struct student { auto tied() { return std::tie(id, age, grade); } }; ``` -The method can be used to compare the object, serialize it, deserialize it, etc. Now **`get_next`** can accept such a struct and deduce the types to which to convert the csv. +The method can be used to compare the object, serialize it, deserialize it, etc. Now **`get_next`** can accept such a struct and deduce the types to which to convert the CSV. ```cpp // returns student auto s = p.get_next(); ``` This works with the iteration loop too. -*Note, the order in which the members of the tied method are returned must match the order of the elements in the csv*. +*Note, the order in which the members of the tied method are returned must match the order of the elements in the CSV*. ## Buffer mode -The parser also works with buffers containing csv data instead of files. To parse buffer data with the parser simply create it by passing it the buffer as a **`const char*`** which represents the buffer, and its size. The initial example using a buffer instead of a file would look like this: +The parser also works with buffers containing CSV data instead of files. To parse buffer data with the parser simply create it by passing it the buffer as a **`const char*`** which represents the buffer, and its size. The initial example using a buffer instead of a file would look like this: ```cpp std::string buffer = "James Bailey,65,2.5\nBrian S. Wolfe,40,1.9\n"; @@ -255,7 +255,7 @@ Empty lines can be ignored by defining **`ss::ignore_empty`** within the setup p ```cpp ss::parser p{file_name}; ``` -If this setup option is not set then reading an empty line will result in an error (unless only one column is present within the csv). +If this setup option is not set then reading an empty line will result in an error (unless only one column is present within the CSV). ### Quoting Quoting can be enabled by defining **`ss::quote`** within the setup parameters. A single character can be defined as the quoting character, for example to use **`"`** as a quoting character: @@ -304,7 +304,7 @@ Escaping and quoting can be used to leave the space if needed. ``` ### Multiline -Multiline can be enabled by defining **`ss::multilne`** within the setup parameters. It enables the possibility to have the new line characters within rows. The new line character needs to be either escaped or within quotes so either **`ss::escape`** or **`ss::quote`** need to be enabled. There is a specific problem when using multiline, for example, if a row had an unterminated quote, the parser would assume it to be a new line within the row, so until another quote is found, it will treat it as one line which is fine usually, but it can cause the whole csv file to be treated as a single line by mistake. To prevent this **`ss::multiline_restricted`** can be used which accepts an unsigned number representing the maximum number of lines which can be allowed as a single multiline. Examples: +Multiline can be enabled by defining **`ss::multilne`** within the setup parameters. It enables the possibility to have the new line characters within rows. The new line character needs to be either escaped or within quotes so either **`ss::escape`** or **`ss::quote`** need to be enabled. There is a specific problem when using multiline, for example, if a row had an unterminated quote, the parser would assume it to be a new line within the row, so until another quote is found, it will treat it as one line which is fine usually, but it can cause the whole CSV file to be treated as a single line by mistake. To prevent this **`ss::multiline_restricted`** can be used which accepts an unsigned number representing the maximum number of lines which can be allowed as a single multiline. Examples: ```cpp ss::parser, ss::escape<'\\'>> p{file_name}; @@ -355,7 +355,7 @@ Gates 65 3.3' ``` ## Special types -Passing **`void`** makes the parser ignore a column. In the initial example **`void`** could be given as the second template parameter to ignore the second (age) column in the csv, a tuple of only 2 parameters would be retuned: +Passing **`void`** makes the parser ignore a column. In the initial example **`void`** could be given as the second template parameter to ignore the second (age) column in the CSV, a tuple of only 2 parameters would be retuned: ```cpp // returns std::tuple auto [id, grade] = p.get_next(); @@ -495,7 +495,7 @@ ss::parser parser; ## Substitute conversions -The parser can also be used to effectively parse files whose rows are not always in the same format (not a classical csv but still csv-like). A more complicated example would be the best way to demonstrate such a scenario.\ +The parser can also be used to effectively parse files whose rows are not always in the same format (not a classical CSV but still CSV-like). A more complicated example would be the best way to demonstrate such a scenario.\ ***Important, substitute conversions do not work when throw_on_error is enabled.*** Supposing we have a file containing different shapes in given formats: From 7bbe2879cd44b3c27797972aad64d8c8d1ee1cfb Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 28 Feb 2024 00:02:58 +0100 Subject: [PATCH 55/57] [skip ci] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a936e0e..d4c6617 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ Conversion for floating point values invoked using [fast-float](https://github.c Function traits taken from *qt-creator* . # Example -Lets say we have a CSV file containing students in a given format \ and we want to parse and print all the valid values: +Lets say we have a CSV file containing students in a given format (Id,Age,Grade) and we want to parse and print all the valid values: ```shell $ cat students.csv From 0d3d8fa83ec4d29e9830a4e32f48e2c443e297b6 Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 28 Feb 2024 00:04:59 +0100 Subject: [PATCH 56/57] [skip ci] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d4c6617..0334dd1 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ This works with the iteration loop too. *Note, the order in which the members of the tied method are returned must match the order of the elements in the CSV*. ## Buffer mode -The parser also works with buffers containing CSV data instead of files. To parse buffer data with the parser simply create it by passing it the buffer as a **`const char*`** which represents the buffer, and its size. The initial example using a buffer instead of a file would look like this: +The parser also works with buffers containing CSV data instead of files. To parse buffer data with the parser simply create it by passing it the buffer as a **`const char*`** and its size. The initial example using a buffer instead of a file would look like this: ```cpp std::string buffer = "James Bailey,65,2.5\nBrian S. Wolfe,40,1.9\n"; From 899a6e6f5e82402fa781b8ac09ffc3589cbdd92e Mon Sep 17 00:00:00 2001 From: ado Date: Wed, 28 Feb 2024 00:08:04 +0100 Subject: [PATCH 57/57] [skip ci] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0334dd1..6f00e9c 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ This works with the iteration loop too. *Note, the order in which the members of the tied method are returned must match the order of the elements in the CSV*. ## Buffer mode -The parser also works with buffers containing CSV data instead of files. To parse buffer data with the parser simply create it by passing it the buffer as a **`const char*`** and its size. The initial example using a buffer instead of a file would look like this: +The parser also works with buffers containing CSV data instead of files. To parse buffer data with the parser simply create the parser by giving it the buffer, as **`const char*`**, and its size. The initial example using a buffer instead of a file would look similar to this: ```cpp std::string buffer = "James Bailey,65,2.5\nBrian S. Wolfe,40,1.9\n";