From a6db4a7ad2924e547f9c9ad58a72e5b110f8ee1d Mon Sep 17 00:00:00 2001 From: ado Date: Tue, 25 Jul 2023 00:56:38 +0200 Subject: [PATCH] WIP, Write additional parser tests --- include/ss/parser.hpp | 5 + test/test_parser.cpp | 500 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 450 insertions(+), 55 deletions(-) diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 55d77e9..0f6bf02 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -681,8 +681,13 @@ private: } } +<<<<<<< Updated upstream next_line_converter_.resplit(next_line_buffer_, size, delim_); +======= + next_line_converter_.resplit(next_line_buffer_, + next_line_size_, delim_); +>>>>>>> Stashed changes } } } diff --git a/test/test_parser.cpp b/test/test_parser.cpp index 0005dac..8f47481 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include std::string time_now_rand() { std::stringstream ss; @@ -25,7 +27,7 @@ struct unique_file_name { } ~unique_file_name() { - std::filesystem::remove(name); + // std::filesystem::remove(name); } }; @@ -104,6 +106,7 @@ static void make_and_write(const std::string& file_name, } } +#if 0 #include TEST_CASE("parser test various cases") { @@ -868,13 +871,13 @@ template struct has_type> : std::disjunction...> {}; -void checkSize(size_t size1, size_t size2) { +void check_size(size_t size1, size_t size2) { CHECK_EQ(size1, size2); } template -void testFields(const std::string file_name, const std::vector& data, - const std::vector& fields) { +void test_fields(const std::string file_name, const std::vector& data, + const std::vector& fields) { using CaseType = std::tuple; ss::parser p{file_name, ","}; @@ -886,7 +889,7 @@ void testFields(const std::string file_name, const std::vector& data, i.push_back(a); } - checkSize(i.size(), data.size()); + check_size(i.size(), data.size()); for (size_t j = 0; j < i.size(); ++j) { if constexpr (has_type::value) { CHECK_EQ(std::get(i[j]), data[j].i); @@ -1029,24 +1032,24 @@ TEST_CASE("parser test various cases with header") { print(call) */ - testFields(o, d, {Str}); - testFields(o, d, {Int}); - testFields(o, d, {Dbl}); - testFields(o, d, {Str, Int}); - testFields(o, d, {Str, Dbl}); - testFields(o, d, {Int, Str}); - testFields(o, d, {Int, Dbl}); - testFields(o, d, {Dbl, Str}); - testFields(o, d, {Dbl, Int}); - testFields(o, d, {Str, Int, Dbl}); - testFields(o, d, {Str, Dbl, Int}); - testFields(o, d, {Int, Str, Dbl}); - testFields(o, d, {Int, Dbl, Str}); - testFields(o, d, {Dbl, Str, Int}); - testFields(o, d, {Dbl, Int, Str}); + test_fields(o, d, {Str}); + test_fields(o, d, {Int}); + test_fields(o, d, {Dbl}); + test_fields(o, d, {Str, Int}); + test_fields(o, d, {Str, Dbl}); + test_fields(o, d, {Int, Str}); + test_fields(o, d, {Int, Dbl}); + test_fields(o, d, {Dbl, Str}); + test_fields(o, d, {Dbl, Int}); + test_fields(o, d, {Str, Int, Dbl}); + test_fields(o, d, {Str, Dbl, Int}); + test_fields(o, d, {Int, Str, Dbl}); + test_fields(o, d, {Int, Dbl, Str}); + test_fields(o, d, {Dbl, Str, Int}); + test_fields(o, d, {Dbl, Int, Str}); } -void testIgnoreEmpty(const std::vector& data) { +void test_ignore_empty(const std::vector& data) { unique_file_name f; make_and_write(f.name, data); @@ -1087,53 +1090,440 @@ void testIgnoreEmpty(const std::vector& data) { } TEST_CASE("parser test various cases with empty lines") { - testIgnoreEmpty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); + test_ignore_empty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); - testIgnoreEmpty( + test_ignore_empty( {{1, 2, X::make_empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); - testIgnoreEmpty( + test_ignore_empty( {{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::make_empty}}); - testIgnoreEmpty( + test_ignore_empty( {{1, 2, "x"}, {5, 6, X::make_empty}, {9, 10, "v"}, {11, 12, "w"}}); - testIgnoreEmpty({{1, 2, X::make_empty}, - {5, 6, X::make_empty}, - {9, 10, "v"}, - {11, 12, "w"}}); + test_ignore_empty({{1, 2, X::make_empty}, + {5, 6, X::make_empty}, + {9, 10, "v"}, + {11, 12, "w"}}); - testIgnoreEmpty({{1, 2, X::make_empty}, - {3, 4, "y"}, - {9, 10, "v"}, - {11, 12, X::make_empty}}); + test_ignore_empty({{1, 2, X::make_empty}, + {3, 4, "y"}, + {9, 10, "v"}, + {11, 12, X::make_empty}}); - testIgnoreEmpty({{1, 2, "x"}, - {3, 4, "y"}, - {9, 10, X::make_empty}, - {11, 12, X::make_empty}}); + test_ignore_empty({{1, 2, "x"}, + {3, 4, "y"}, + {9, 10, X::make_empty}, + {11, 12, X::make_empty}}); - testIgnoreEmpty({{1, 2, X::make_empty}, - {3, 4, "y"}, - {9, 10, X::make_empty}, - {11, 12, X::make_empty}}); + test_ignore_empty({{1, 2, X::make_empty}, + {3, 4, "y"}, + {9, 10, X::make_empty}, + {11, 12, X::make_empty}}); - testIgnoreEmpty({{1, 2, X::make_empty}, - {3, 4, X::make_empty}, - {9, 10, X::make_empty}, - {11, 12, X::make_empty}}); + test_ignore_empty({{1, 2, X::make_empty}, + {3, 4, X::make_empty}, + {9, 10, X::make_empty}, + {11, 12, X::make_empty}}); - testIgnoreEmpty({{1, 2, "x"}, - {3, 4, X::make_empty}, - {9, 10, X::make_empty}, - {11, 12, X::make_empty}}); + test_ignore_empty({{1, 2, "x"}, + {3, 4, X::make_empty}, + {9, 10, X::make_empty}, + {11, 12, X::make_empty}}); - testIgnoreEmpty({{1, 2, X::make_empty}, - {3, 4, X::make_empty}, - {9, 10, X::make_empty}, - {11, 12, "w"}}); + test_ignore_empty({{1, 2, X::make_empty}, + {3, 4, X::make_empty}, + {9, 10, X::make_empty}, + {11, 12, "w"}}); - testIgnoreEmpty({{11, 12, X::make_empty}}); + test_ignore_empty({{11, 12, X::make_empty}}); - testIgnoreEmpty({}); + test_ignore_empty({}); +} +#endif + +//////////////// +// parser tests v2 +//////////////// + +#include +#include +struct random_number_generator { + size_t z1 = 12341; + size_t z2 = 12342; + size_t z3 = 12343; + size_t z4 = 12344; + + size_t rand() { + unsigned int b; + b = ((z1 << 6) ^ z1) >> 13; + z1 = ((z1 & 4294967294U) << 18) ^ b; + b = ((z2 << 2) ^ z2) >> 27; + z2 = ((z2 & 4294967288U) << 2) ^ b; + b = ((z3 << 13) ^ z3) >> 21; + z3 = ((z3 & 4294967280U) << 7) ^ b; + b = ((z4 << 3) ^ z4) >> 12; + z4 = ((z4 & 4294967168U) << 13) ^ b; + return (z1 ^ z2 ^ z3 ^ z4); + } + + template + size_t rand_index(const T& s) { + REQUIRE(!s.empty()); + return rand() % s.size(); + } + + bool rand_bool() { + return rand() % 4 == 0; + } + + template + void rand_insert(std::string& dst, const T& src) { + dst.insert(rand_index(dst), std::string{src}); + } + + template + void rand_insert_n(std::string& dst, const T& src, size_t n_max) { + size_t n = rand() % n_max; + for (size_t i = 0; i < n; ++i) { + rand_insert(dst, src); + } + } +} rng; + +struct field { + std::string value; + bool is_string = false; + bool has_spaces_left = false; + bool has_spaces_right = false; + bool has_new_line = false; + + field(const std::string& input) { + value = input; + is_string = true; + + has_spaces_left = !input.empty() && input.front() == ' '; + has_spaces_right = !input.empty() && input.back() == ' '; + has_new_line = input.find_first_of('\n') != std::string::npos; + } + + field(int input) { + value = std::to_string(input); + } + + field(double input) { + value = std::to_string(input); + } +}; + +struct column { + std::string header; + std::vector fields; +}; + +template +column make_column(const std::string& input_header, + const std::vector& input_fields) { + using setup = ss::setup; + std::vector filtered_fields; + + for (const auto& el : input_fields) { + if (!setup::multiline::enabled && el.has_new_line) { + continue; + } + + if (!setup::escape::enabled && !setup::quote::enabled) { + if (!setup::trim_left::enabled && el.has_spaces_left) { + continue; + } + + if (!setup::trim_right::enabled && el.has_spaces_right) { + continue; + } + } + + filtered_fields.push_back(el); + } + + return column{.header = input_header, .fields = filtered_fields}; +} + +void replace_all2(std::string& s, const std::string& old_value, + const std::string& new_value) { + while (true) { + size_t pos = s.find(old_value); + if (pos == std::string::npos) { + return; + } + s.replace(pos, old_value.size(), new_value); + } +} + +template +std::vector generate_csv_data(const std::vector& data, + const std::string& delim) { + (void)delim; + using setup = ss::setup; + constexpr static auto escape = '\\'; + constexpr static auto quote = '"'; + constexpr static auto helper0 = '#'; + constexpr static auto helper1 = '$'; + constexpr static auto new_line = '\n'; + + std::vector output; + + if (setup::escape::enabled && setup::quote::enabled) { + for (const auto& el : data) { + auto value = el.value; + + replace_all2(value, {escape, quote}, {helper1}); + + bool quote_newline = rng.rand_bool(); + + // handle escape + replace_all2(value, {escape}, {helper0}); + rng.rand_insert_n(value, escape, 2); + if (!quote_newline) { + replace_all2(value, {new_line}, {helper1}); + replace_all2(value, {helper1}, {escape, new_line}); + } + replace_all2(value, {escape, escape}, {escape}); + replace_all2(value, {escape, helper0}, {helper0}); + replace_all2(value, {helper0, escape}, {helper0}); + replace_all2(value, {helper0}, {escape, escape}); + + replace_all2(value, {helper1}, {escape, quote}); + + replace_all2(value, {escape, quote}, {helper1}); + + if (rng.rand_bool() || quote_newline) { + replace_all2(value, {quote}, {helper0}); + if (rng.rand_bool()) { + replace_all2(value, {helper0}, {escape, quote}); + } else { + replace_all2(value, {helper0}, {quote, quote}); + } + value = std::string{quote} + value + std::string{quote}; + } + + replace_all2(value, {helper1}, {escape, quote}); + + output.push_back(value); + } + } else if (setup::escape::enabled) { + for (const auto& el : data) { + auto value = el.value; + replace_all2(value, {escape}, {helper0}); + rng.rand_insert_n(value, escape, 3); + replace_all2(value, {new_line}, {helper1}); + replace_all2(value, {helper1}, {escape, new_line}); + replace_all2(value, {escape, escape}, {escape}); + replace_all2(value, {escape, helper0}, {helper0}); + replace_all2(value, {helper0, escape}, {helper0}); + replace_all2(value, {helper0}, {escape, escape}); + output.push_back(value); + } + } else if (setup::quote::enabled) { + for (const auto& el : data) { + auto value = el.value; + if (rng.rand_bool() || el.has_new_line) { + replace_all2(value, {quote}, {helper0}); + replace_all2(value, {helper0}, {quote, quote}); + value = std::string{quote} + value + std::string{quote}; + } + output.push_back(value); + } + } else { + for (const auto& el : data) { + output.push_back(el.value); + } + } + + if (setup::trim_right::enabled) { + for (auto& el : output) { + size_t n = rng.rand(); + for (size_t i = 0; i < n % 3; ++i) { + el = el + " "; + } + } + } + + if (setup::trim_left::enabled) { + for (auto& el : output) { + size_t n = rng.rand(); + for (size_t i = 0; i < n % 3; ++i) { + el = " " + el; + } + } + } + + return output; +} + +void write_to_file(const std::vector& data, + const std::string& delim, const std::string& file_name) { + std::ofstream out{file_name, std::ios_base::app}; + for (size_t i = 0; i < data.size(); ++i) { + out << data[i]; + if (i != data.size() - 1) { + out << delim; + } + } + out << std::endl; + out.close(); +} + +template +void test_combinations(const std::vector& input_data, + const std::string& delim) { + // TODO test without string_error + using setup = ss::setup; + + unique_file_name f; + std::vector> expected_data; + size_t n = rng.rand() % 10; + + for (size_t i = 0; i < n; ++i) { + std::vector raw_data; + for (const auto& el : input_data) { + const auto& fields = el.fields; + if (fields.empty()) { + continue; + } + + raw_data.push_back(fields[rng.rand_index(fields)]); + } + + expected_data.push_back(raw_data); + auto data = generate_csv_data(raw_data, delim); + write_to_file(data, delim, f.name); + + // TODO remove + std::cout << "[."; + for (const auto& el : data) { + std::cout << el << '.'; + } + std::cout << "]" << std::endl; + } + + std::cout << delim << std::endl; + ss::parser p{f.name, delim}; + + auto check_error = [&p] { + CHECK(p.valid()); + if (!p.valid()) { + std::cout << p.error_msg() << std::endl; + } + }; + + for (size_t i = 0; i < n; ++i) { + switch (expected_data[i].size()) { + case 0: + // TODO handle; + break; + case 1: { + auto s0 = p.template get_next(); + check_error(); + std::cout << s0 << std::endl; + CHECK(s0 == expected_data[i][0].value); + break; + } + case 2: { + auto [s0, s1] = p.template get_next(); + check_error(); + std::cout << s0 << ' ' << s1 << std::endl; + CHECK(s0 == expected_data[i][0].value); + CHECK(s1 == expected_data[i][1].value); + break; + } + case 3: { + auto [s0, s1, s2] = + p.template get_next(); + check_error(); + std::cout << s0 << ' ' << s1 << ' ' << s2 << std::endl; + CHECK(s0 == expected_data[i][0].value); + CHECK(s1 == expected_data[i][1].value); + CHECK(s2 == expected_data[i][2].value); + break; + } + case 4: { + auto [s0, s1, s2, s3] = + p.template get_next(); + check_error(); + std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3 << std::endl; + CHECK(s0 == expected_data[i][0].value); + CHECK(s1 == expected_data[i][1].value); + CHECK(s2 == expected_data[i][2].value); + CHECK(s3 == expected_data[i][3].value); + break; + } + case 5: { + auto [s0, s1, s2, s3, s4] = + p.template get_next(); + check_error(); + std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3 << ' ' << s4 + << std::endl; + CHECK(s0 == expected_data[i][0].value); + CHECK(s1 == expected_data[i][1].value); + CHECK(s2 == expected_data[i][2].value); + CHECK(s3 == expected_data[i][3].value); + CHECK(s4 == expected_data[i][4].value); + break; + } + // ... + default: + // TODO handle + break; + } + } +} + +// TODO rename +template +void test_combinations_impl() { + column data0 = + make_column("data0", {field{111}, field{11}, field{1}}); + + column data1 = make_column("data1", {field{"hel\\lo"}, field{"h\ni"}, + field{"new\nline"}}); + + column data2 = + make_column("data2", {field{222}, field{22}, field{12345}}); + + column data3 = + make_column("data3", {field{"h\"mm"}, field{"::::::::"}}); + + column data4 = + make_column("data4", {field{"h\"\"e\\llloooo"}, field{":D"}}); + + auto columns0 = std::vector{data0, data1, data2, data3, data4}; + auto columns1 = std::vector{data4, data3, data2, data1, data0}; + auto columns2 = std::vector{data2, data3, data0, data4, data1}; + + for (size_t i = 0; i < 2; ++i) { + for (const auto& delimiter: {",", "-", "--"}) { + test_combinations(columns0, delimiter); + test_combinations(columns1, delimiter); + test_combinations(columns2, delimiter); + } + } +} + +TEST_CASE("parser test various cases version 2") { + using quote = ss::quote<'"'>; + using escape = ss::escape<'\\'>; + using trim = ss::trim<' '>; + using triml = ss::trim_left<' '>; + using trimr = ss::trim_right<' '>; + using multiline = ss::multiline; + + test_combinations_impl<>(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); }