From f04ede3a497a7a88dda91ab622ace5d91037705a Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 17 Feb 2024 00:55:36 +0100 Subject: [PATCH] Add option to read csv data from a buffer, add some unit tests for the new feature --- include/ss/common.hpp | 2 +- include/ss/parser.hpp | 127 +++++++++++++++++++++++--- test/test_parser.cpp | 204 +++++++++++++++++++++++++++--------------- 3 files changed, 250 insertions(+), 83 deletions(-) diff --git a/include/ss/common.hpp b/include/ss/common.hpp index 7531e29..d16475f 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -26,7 +26,7 @@ inline void assert_throw_on_error_not_defined() { } #if __unix__ -inline ssize_t get_line(char** lineptr, size_t* n, FILE* stream) { +inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { return getline(lineptr, n, stream); } #else diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index db80dd3..31907fe 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -48,6 +48,18 @@ public: } } + parser(const char* const csv_data_buffer, size_t csv_data_size, + const std::string& delim = ss::default_delimiter) + : file_name_{"buffer line"}, + reader_{csv_data_buffer, csv_data_size, delim} { + read_line(); + if constexpr (ignore_header) { + ignore_next(); + } else { + raw_header_ = reader_.get_buffer(); + } + } + parser(parser&& other) = default; parser& operator=(parser&& other) = default; @@ -641,18 +653,27 @@ private: : delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} { } + reader(const char* const buffer, size_t csv_data_size, + const std::string& delim) + : delim_{delim}, csv_data_buffer_{buffer}, + csv_data_size_{csv_data_size} { + } + reader(reader&& other) : buffer_{other.buffer_}, next_line_buffer_{other.next_line_buffer_}, - helper_buffer_{other.helper_buffer_}, converter_{std::move( - other.converter_)}, + helper_buffer_{other.helper_buffer_}, + converter_{std::move(other.converter_)}, next_line_converter_{std::move(other.next_line_converter_)}, buffer_size_{other.buffer_size_}, next_line_buffer_size_{other.next_line_buffer_size_}, - helper_size_{other.helper_size_}, delim_{std::move(other.delim_)}, - file_{other.file_}, crlf_{other.crlf_}, - line_number_{other.line_number_}, next_line_size_{ - other.next_line_size_} { + helper_buffer_size{other.helper_buffer_size}, + delim_{std::move(other.delim_)}, file_{other.file_}, + csv_data_buffer_{other.csv_data_buffer_}, + csv_data_size_{other.csv_data_size_}, + curr_char_{other.curr_char_}, crlf_{other.crlf_}, + line_number_{other.line_number_}, + next_line_size_{other.next_line_size_} { other.buffer_ = nullptr; other.next_line_buffer_ = nullptr; other.helper_buffer_ = nullptr; @@ -668,9 +689,12 @@ private: next_line_converter_ = std::move(other.next_line_converter_); buffer_size_ = other.buffer_size_; next_line_buffer_size_ = other.next_line_buffer_size_; - helper_size_ = other.helper_size_; + helper_buffer_size = other.helper_buffer_size; delim_ = std::move(other.delim_); file_ = other.file_; + csv_data_buffer_ = other.csv_data_buffer_; + csv_data_size_ = other.csv_data_size_; + curr_char_ = other.curr_char_; crlf_ = other.crlf_; line_number_ = other.line_number_; next_line_size_ = other.next_line_size_; @@ -698,6 +722,60 @@ private: reader(const reader& other) = delete; reader& operator=(const reader& other) = delete; + ssize_t get_line_buffer(char** lineptr, size_t* n, + const char* const buffer, size_t csv_data_size, + size_t& curr_char) { + size_t pos; + int c; + + // TODO remove check + if (lineptr == nullptr || buffer == nullptr || n == nullptr) { + return -1; + } + + c = buffer[curr_char++]; + if (curr_char >= csv_data_size) { + return -1; + } + + // TODO maybe remove this too + if (*lineptr == nullptr) { + *lineptr = static_cast(malloc(128)); + if (*lineptr == nullptr) { + return -1; + } + *n = 128; + } + + pos = 0; + while (curr_char <= csv_data_size) { + if (pos + 1 >= *n) { + size_t new_size = *n + (*n >> 2); + // TODO maybe remove this too + if (new_size < 128) { + new_size = 128; + } + char* new_ptr = static_cast( + realloc(static_cast(*lineptr), new_size)); + // TODO check for failed malloc in the callee + if (new_ptr == nullptr) { + return -1; + } + *n = new_size; + *lineptr = new_ptr; + } + + (*lineptr)[pos++] = c; + if (c == '\n') { + break; + } + c = buffer[curr_char++]; + } + + (*lineptr)[pos] = '\0'; + return pos; + } + // read next line each time in order to set eof_ bool read_next() { next_line_converter_.clear_error(); @@ -708,8 +786,16 @@ private: if (next_line_buffer_size_ > 0) { next_line_buffer_[0] = '\0'; } - ssize = get_line(&next_line_buffer_, &next_line_buffer_size_, - file_); + + if (file_) { + ssize = get_line_file(&next_line_buffer_, + &next_line_buffer_size_, file_); + } else { + ssize = get_line_buffer(&next_line_buffer_, + &next_line_buffer_size_, + csv_data_buffer_, csv_data_size_, + curr_char_); + } if (ssize == -1) { return false; @@ -821,6 +907,10 @@ private: } size_t remove_eol(char*& buffer, size_t ssize) { + if (buffer[ssize - 1] != '\n') { + return ssize; + } + size_t size = ssize - 1; if (ssize >= 2 && buffer[ssize - 2] == '\r') { crlf_ = true; @@ -851,8 +941,17 @@ private: bool append_next_line_to_buffer(char*& buffer, size_t& size) { undo_remove_eol(buffer, size); - ssize_t next_ssize = - get_line(&helper_buffer_, &helper_size_, file_); + ssize_t next_ssize; + if (file_) { + next_ssize = + get_line_file(&helper_buffer_, &helper_buffer_size, file_); + } else { + next_ssize = + get_line_buffer(&helper_buffer_, &helper_buffer_size, + csv_data_buffer_, csv_data_size_, + curr_char_); + } + if (next_ssize == -1) { return false; } @@ -879,11 +978,15 @@ private: size_t buffer_size_{0}; size_t next_line_buffer_size_{0}; - size_t helper_size_{0}; + size_t helper_buffer_size{0}; std::string delim_; FILE* file_{nullptr}; + const char* csv_data_buffer_{nullptr}; + size_t csv_data_size_{0}; + size_t curr_char_{0}; + bool crlf_{false}; size_t line_number_{0}; diff --git a/test/test_parser.cpp b/test/test_parser.cpp index e7089ff..865c3de 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -38,7 +38,7 @@ void expect_error_on_command(ss::parser& p, } } -void update_if_crlf(std::string& s) { +[[maybe_unused]] void update_if_crlf(std::string& s) { #ifdef _WIN32 replace_all(s, "\r\n", "\n"); #else @@ -102,6 +102,31 @@ static void make_and_write(const std::string& file_name, out << data[i].to_string() << new_lines[i % new_lines.size()]; } } + +std::string make_buffer(const std::string& file_name) { + std::ifstream in{file_name, std::ios::binary}; + std::string tmp; + std::string out; + out.reserve(sizeof(out) + 1); + while (in >> tmp) { + out += tmp; + out.append("\n"); + } + return out; +} + +template +std::tuple, std::string> make_parser( + const std::string& file_name, const std::string& delim) { + if (buffer_mode) { + auto buffer = make_buffer(file_name); + return {ss::parser{buffer.data(), buffer.size(), delim}, + std::move(buffer)}; + } else { + return {ss::parser{file_name, delim}, std::string{}}; + } +} + } /* namespace */ TEST_CASE("test file not found") { @@ -125,22 +150,23 @@ TEST_CASE("test file not found") { } } -template +template void test_various_cases() { unique_file_name f{"test_parser"}; std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; make_and_write(f.name, data); + auto csv_data_buffer = make_buffer(f.name); { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); ss::parser p0{std::move(p)}; p = std::move(p0); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; - auto move_rotate = [&] { + auto move_rotate = [&p = p, &p0 = p0] { auto p1 = std::move(p); p0 = std::move(p1); p = std::move(p0); @@ -152,7 +178,7 @@ void test_various_cases() { i.emplace_back(ss::to_object(a)); } - for (const auto& a : p2.iterate()) { + for (const auto& a : p2.template iterate()) { i2.emplace_back(ss::to_object(a)); } @@ -161,13 +187,13 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; - ss::parser p3{f.name, ","}; + auto [p3, ___] = make_parser(f.name, ","); std::vector i3; std::vector expected = {std::begin(data) + 1, std::end(data)}; @@ -175,18 +201,18 @@ void test_various_cases() { p.ignore_next(); while (!p.eof()) { - auto a = p.get_next(); + auto a = p.template get_next(); i.emplace_back(ss::to_object(a)); } p2.ignore_next(); - for (const auto& a : p2.iterate()) { + for (const auto& a : p2.template iterate()) { i2.emplace_back(ss::to_object(a)); } p3.ignore_next(); - for (auto it = p3.iterate().begin(); it != p3.iterate().end(); - ++it) { + for (auto it = p3.template iterate().begin(); + it != p3.template iterate().end(); ++it) { i3.emplace_back(ss::to_object(*it)); } @@ -196,16 +222,17 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; while (!p.eof()) { - i.push_back(p.get_object()); + i.push_back(p.template get_object()); } - for (auto&& a : p2.iterate_object()) { + for (auto&& a : + p2.template iterate_object()) { i2.push_back(std::move(a)); } @@ -214,10 +241,11 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - for (auto&& a : p.iterate_object()) { + for (auto&& a : + p.template iterate_object()) { i.push_back(std::move(a)); } @@ -225,19 +253,19 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; using tup = std::tuple; while (!p.eof()) { - i.push_back(p.get_object()); + i.push_back(p.template get_object()); } - for (auto it = p2.iterate_object().begin(); - it != p2.iterate_object().end(); it++) { + for (auto it = p2.template iterate_object().begin(); + it != p2.template iterate_object().end(); it++) { i2.push_back({it->i, it->d, it->s}); } @@ -246,11 +274,11 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; using tup = std::tuple; - for (auto&& a : p.iterate_object()) { + for (auto&& a : p.template iterate_object()) { i.push_back(std::move(a)); } @@ -258,21 +286,21 @@ void test_various_cases() { } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; while (!p.eof()) { - i.push_back(p.get_next()); + i.push_back(p.template get_next()); } CHECK_EQ(i, data); } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - for (auto&& a : p.iterate()) { + for (auto&& a : p.template iterate()) { i.push_back(std::move(a)); } @@ -281,24 +309,30 @@ void test_various_cases() { { constexpr int excluded = 3; - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; while (!p.eof()) { - auto a = - p.get_object, double, std::string>(); - if (p.valid()) { - i.push_back(a); - } + try { + auto a = p.template get_object, double, + std::string>(); + if (p.valid()) { + i.push_back(a); + } + } catch (...) { + // ignore + }; } - for (auto&& a : p2.iterate_object, double, - std::string>()) { - if (p2.valid()) { - i2.push_back(std::move(a)); + if (!ss::setup::throw_on_error) { + for (auto&& a : p2.template iterate_object, + double, std::string>()) { + if (p2.valid()) { + i2.push_back(std::move(a)); + } } } @@ -312,33 +346,45 @@ void test_various_cases() { std::copy_if(data.begin(), data.end(), expected.begin(), [&](const X& x) { return x.i != excluded; }); CHECK_EQ(i, expected); - CHECK_EQ(i2, expected); + + if (!ss::setup::throw_on_error) { + CHECK_EQ(i2, expected); + } } { - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); std::vector i; - ss::parser p2{f.name, ","}; + auto [p2, __] = make_parser(f.name, ","); std::vector i2; while (!p.eof()) { - auto a = p.get_object, double, std::string>(); - if (p.valid()) { - i.push_back(a); + try { + auto a = p.template get_object, double, + std::string>(); + if (p.valid()) { + i.push_back(a); + } + } catch (...) { + // ignore } } - for (auto&& a : - p2.iterate_object, double, std::string>()) { - if (p2.valid()) { - i2.push_back(std::move(a)); + if (!ss::setup::throw_on_error) { + for (auto&& a : p2.template iterate_object, + double, std::string>()) { + if (p2.valid()) { + i2.push_back(std::move(a)); + } } } std::vector expected = {{3, 4, "y"}}; CHECK_EQ(i, expected); - CHECK_EQ(i2, expected); + if (!ss::setup::throw_on_error) { + CHECK_EQ(i2, expected); + } } { @@ -347,17 +393,17 @@ void test_various_cases() { make_and_write(empty_f.name, empty_data); - ss::parser p{empty_f.name, ","}; + auto [p, _] = make_parser(empty_f.name, ","); std::vector i; - ss::parser p2{empty_f.name, ","}; + auto [p2, __] = make_parser(empty_f.name, ","); std::vector i2; while (!p.eof()) { - i.push_back(p.get_next()); + i.push_back(p.template get_next()); } - for (auto&& a : p2.iterate()) { + for (auto&& a : p2.template iterate()) { i2.push_back(std::move(a)); } @@ -367,9 +413,12 @@ void test_various_cases() { } TEST_CASE("parser test various cases") { - test_various_cases(); - test_various_cases(); - test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); } using test_tuple = std::tuple; @@ -385,7 +434,7 @@ struct test_struct { static inline void expect_test_struct(const test_struct&) { } -template +template void test_composite_conversion() { unique_file_name f{"test_parser"}; { @@ -397,7 +446,7 @@ void test_composite_conversion() { } } - ss::parser p{f.name, ","}; + auto [p, _] = make_parser(f.name, ","); auto fail = [] { FAIL(""); }; auto expect_error = [](auto error) { CHECK(!error.empty()); }; auto ignore_error = [] {}; @@ -609,7 +658,8 @@ void test_composite_conversion() { // various scenarios TEST_CASE("parser test composite conversion") { - test_composite_conversion(); + test_composite_conversion(); + test_composite_conversion(); } struct my_string { @@ -653,7 +703,7 @@ struct xyz { } }; -template +template void test_moving_of_parsed_composite_values() { // to compile is enough return; @@ -669,8 +719,10 @@ void test_moving_of_parsed_composite_values() { } TEST_CASE("parser test the moving of parsed composite values") { - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); } TEST_CASE("parser test error mode") { @@ -681,12 +733,23 @@ TEST_CASE("parser test error mode") { out << "junk" << std::endl; } - ss::parser p(f.name, ","); + { + auto [p, _] = make_parser(f.name, ","); - REQUIRE_FALSE(p.eof()); - p.get_next(); - CHECK_FALSE(p.valid()); - CHECK_FALSE(p.error_msg().empty()); + REQUIRE_FALSE(p.eof()); + p.get_next(); + CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + p.get_next(); + CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); + } } TEST_CASE("parser throw on error mode") { @@ -1680,3 +1743,4 @@ TEST_CASE("parser test various cases with empty lines") { test_ignore_empty({}); } +