From d6cf9bd00685da21364795c953272e0f1f75f275 Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 29 Jul 2023 20:41:31 +0200 Subject: [PATCH] WIP, Move additional parser tests to separate file --- test/CMakeLists.txt | 24 +- test/meson.build | 1 + test/test_converter.cpp | 2 +- test/test_helpers.hpp | 113 ++++--- test/test_parser.cpp | 631 ++-------------------------------------- test/test_parser2.cpp | 578 ++++++++++++++++++++++++++++++++++++ test/test_splitter.cpp | 6 +- 7 files changed, 687 insertions(+), 668 deletions(-) create mode 100644 test/test_parser2.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 60078c0..47c2998 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,37 +5,33 @@ project(ssp_tests CXX) # ---- Dependencies ---- include(FetchContent) -fetchcontent_declare(ssp SOURCE_DIR "${PROJECT_SOURCE_DIR}/..") -fetchcontent_makeavailable(ssp) +FetchContent_Declare(ssp SOURCE_DIR "${PROJECT_SOURCE_DIR}/..") +FetchContent_MakeAvailable(ssp) if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") target_compile_options(ssp INTERFACE -Wall -Wextra) endif() include(FetchContent) -fetchcontent_declare( +FetchContent_Declare( DOCTEST GIT_REPOSITORY https://github.com/red0124/doctest GIT_TAG origin/master - GIT_SHALLOW TRUE -) + GIT_SHALLOW TRUE) -fetchcontent_makeavailable(DOCTEST) +FetchContent_MakeAvailable(DOCTEST) set(DOCTEST "${FETCHCONTENT_BASE_DIR}/doctest-src") # ---- Test ---- enable_testing() -foreach(name IN ITEMS test_splitter test_parser test_converter test_extractions) +foreach(name IN ITEMS test_splitter test_parser test_parser2 test_converter + test_extractions) add_executable("${name}" "${name}.cpp") - target_link_libraries( - "${name}" - PRIVATE ssp::ssp fast_float doctest::doctest - ) + target_link_libraries("${name}" PRIVATE ssp::ssp fast_float + doctest::doctest) target_compile_definitions( - "${name}" - PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN CMAKE_GITHUB_CI - ) + "${name}" PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN CMAKE_GITHUB_CI) add_test(NAME "${name}" COMMAND "${name}") endforeach() diff --git a/test/meson.build b/test/meson.build index ddfc5d4..9e6ec07 100644 --- a/test/meson.build +++ b/test/meson.build @@ -3,6 +3,7 @@ test_sources = files([ 'test_splitter.cpp', 'test_converter.cpp', 'test_parser.cpp', + 'test_parser2.cpp', 'test_extractions.cpp', 'test_extractions_without_fast_float.cpp', ]) diff --git a/test/test_converter.cpp b/test/test_converter.cpp index a4e6f79..d0d18af 100644 --- a/test/test_converter.cpp +++ b/test/test_converter.cpp @@ -588,7 +588,7 @@ TEST_CASE("converter test ss:oor restriction (out of range) with exceptions") { } } -const std::vector extracted_vector = {1, 2, 3}; +const inline std::vector extracted_vector = {1, 2, 3}; // custom extract template <> diff --git a/test/test_helpers.hpp b/test/test_helpers.hpp index 7d158a4..07ded4a 100644 --- a/test/test_helpers.hpp +++ b/test/test_helpers.hpp @@ -1,6 +1,9 @@ #pragma once -#include #include +#include +#include +#include +#include #ifdef CMAKE_GITHUB_CI #include @@ -8,57 +11,83 @@ #include #endif +namespace { struct buffer { - std::string data_; + std::string data_; - char *operator()(const std::string &data) { - data_ = data; - return data_.data(); - } + char* operator()(const std::string& data) { + data_ = data; + return data_.data(); + } - char *append(const std::string &data) { - data_ += data; - return data_.data(); - } + char* append(const std::string& data) { + data_ += data; + return data_.data(); + } - char *append_overwrite_last(const std::string &data, size_t size) { - data_.resize(data_.size() - size); - return append(data); - } + char* append_overwrite_last(const std::string& data, size_t size) { + data_.resize(data_.size() - size); + return append(data); + } }; [[maybe_unused]] inline buffer buff; +std::string time_now_rand() { + std::stringstream ss; + auto t = std::time(nullptr); + auto tm = *std::localtime(&t); + ss << std::put_time(&tm, "%d%m%Y%H%M%S"); + srand(time(nullptr)); + return ss.str() + std::to_string(rand()); +} + +struct unique_file_name { + static inline int i = 0; + + const std::string name; + + unique_file_name(const std::string& test) + : name{"random_" + test + "_" + std::to_string(i++) + "_" + time_now_rand() + + "_file.csv"} { + } + + ~unique_file_name() { + // TODO uncomment + // std::filesystem::remove(name); + } +}; + #define CHECK_FLOATING_CONVERSION(input, type) \ - { \ - auto eps = std::numeric_limits::min(); \ - std::string s = #input; \ - auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ - REQUIRE(t.has_value()); \ - CHECK_LT(std::abs(t.value() - type(input)), eps); \ - } \ - { \ - /* check negative too */ \ - auto eps = std::numeric_limits::min(); \ - auto s = std::string("-") + #input; \ - auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ - REQUIRE(t.has_value()); \ - CHECK_LT(std::abs(t.value() - type(-input)), eps); \ - } + { \ + auto eps = std::numeric_limits::min(); \ + std::string s = #input; \ + auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ + REQUIRE(t.has_value()); \ + CHECK_LT(std::abs(t.value() - type(input)), eps); \ + } \ + { \ + /* check negative too */ \ + auto eps = std::numeric_limits::min(); \ + auto s = std::string("-") + #input; \ + auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ + REQUIRE(t.has_value()); \ + CHECK_LT(std::abs(t.value() - type(-input)), eps); \ + } #define CHECK_INVALID_CONVERSION(input, type) \ - { \ - std::string s = input; \ - auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ - CHECK_FALSE(t.has_value()); \ - } + { \ + std::string s = input; \ + auto t = ss::to_num(s.c_str(), s.c_str() + s.size()); \ + CHECK_FALSE(t.has_value()); \ + } #define REQUIRE_VARIANT(var, el, type) \ - { \ - auto ptr = std::get_if(&var); \ - REQUIRE(ptr); \ - REQUIRE_EQ(el, *ptr); \ - } + { \ + auto ptr = std::get_if(&var); \ + REQUIRE(ptr); \ + REQUIRE_EQ(el, *ptr); \ + } #define CHECK_NOT_VARIANT(var, type) CHECK(!std::holds_alternative(var)); @@ -71,8 +100,8 @@ struct buffer { } template -std::vector> vector_combinations( - const std::vector& v, size_t n) { +std::vector> vector_combinations(const std::vector& v, + size_t n) { std::vector> ret; if (n <= 1) { for (const auto& i : v) { @@ -90,4 +119,4 @@ std::vector> vector_combinations( } return ret; } - +} /* namespace */ diff --git a/test/test_parser.cpp b/test/test_parser.cpp index 6243d1b..980bec9 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -3,38 +3,15 @@ #include #include #include +#include #include #include #include #include -// TODO add single header tests -std::string time_now_rand() { - std::stringstream ss; - auto t = std::time(nullptr); - auto tm = *std::localtime(&t); - ss << std::put_time(&tm, "%d%m%Y%H%M%S"); - srand(time(nullptr)); - return ss.str() + std::to_string(rand()); -} - -inline int i = 0; -struct unique_file_name { - const std::string name; - - unique_file_name() - : name{"random_" + std::to_string(i++) + time_now_rand() + - "_file.csv"} { - } - - ~unique_file_name() { - // TODO uncomment - // std::filesystem::remove(name); - } -}; - -void replace_all(std::string& s, const std::string& from, - const std::string& to) { +namespace { +[[maybe_unused]] void replace_all(std::string& s, const std::string& from, + const std::string& to) { if (from.empty()) return; size_t start_pos = 0; while ((start_pos = s.find(from, start_pos)) != std::string::npos) { @@ -107,12 +84,10 @@ static void make_and_write(const std::string& file_name, out << data[i].to_string() << new_lines[i % new_lines.size()]; } } - -#if 0 -#include +} /* namespace */ TEST_CASE("parser test various cases") { - unique_file_name f; + unique_file_name f{"test_parser"}; std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; make_and_write(f.name, data); @@ -320,7 +295,7 @@ TEST_CASE("parser test various cases") { } { - unique_file_name empty_f; + unique_file_name empty_f{"test_parser"}; std::vector empty_data = {}; make_and_write(empty_f.name, empty_data); @@ -354,12 +329,12 @@ struct test_struct { } }; -void expect_test_struct(const test_struct&) { +static inline void expect_test_struct(const test_struct&) { } // various scenarios TEST_CASE("parser test composite conversion") { - unique_file_name f; + unique_file_name f{"test_parser"}; { std::ofstream out{f.name}; for (auto& i : @@ -624,7 +599,7 @@ TEST_CASE("parser test the moving of parsed composite values") { } TEST_CASE("parser test error mode") { - unique_file_name f; + unique_file_name f{"test_parser"}; { std::ofstream out{f.name}; out << "junk" << std::endl; @@ -639,7 +614,7 @@ TEST_CASE("parser test error mode") { CHECK_FALSE(p.error_msg().empty()); } -std::string no_quote(const std::string& s) { +static inline std::string no_quote(const std::string& s) { if (!s.empty() && s[0] == '"') { return {std::next(begin(s)), std::prev(end(s))}; } @@ -647,7 +622,7 @@ std::string no_quote(const std::string& s) { } TEST_CASE("parser test csv on multiple lines with quotes") { - unique_file_name f; + unique_file_name f{"test_parser"}; std::vector data = {{1, 2, "\"x\r\nx\nx\""}, {3, 4, "\"y\ny\r\ny\""}, {5, 6, "\"z\nz\""}, @@ -686,13 +661,13 @@ TEST_CASE("parser test csv on multiple lines with quotes") { } } -std::string no_escape(std::string& s) { +static inline std::string no_escape(std::string& s) { s.erase(std::remove(begin(s), end(s), '\\'), end(s)); return s; } TEST_CASE("parser test csv on multiple lines with escapes") { - unique_file_name f; + unique_file_name f{"test_parser"}; std::vector data = {{1, 2, "x\\\nx\\\r\nx"}, {5, 6, "z\\\nz\\\nz"}, {7, 8, "u"}, @@ -732,7 +707,7 @@ TEST_CASE("parser test csv on multiple lines with escapes") { } TEST_CASE("parser test csv on multiple lines with quotes and escapes") { - unique_file_name f; + unique_file_name f{"test_parser"}; { std::ofstream out{f.name}; out << "1,2,\"just\\\n\nstrings\"" << std::endl; @@ -772,7 +747,7 @@ TEST_CASE("parser test csv on multiple lines with quotes and escapes") { } TEST_CASE("parser test multiline restricted") { - unique_file_name f; + unique_file_name f{"test_parser"}; { std::ofstream out{f.name}; out << "1,2,\"just\n\nstrings\"" << std::endl; @@ -825,13 +800,13 @@ template struct has_type> : std::disjunction...> {}; -void check_size(size_t size1, size_t size2) { +static inline void check_size(size_t size1, size_t size2) { CHECK_EQ(size1, size2); } template -void test_fields(const std::string file_name, const std::vector& data, - const std::vector& fields) { +static void test_fields(const std::string file_name, const std::vector& data, + const std::vector& fields) { using CaseType = std::tuple; ss::parser p{file_name, ","}; @@ -858,7 +833,7 @@ void test_fields(const std::string file_name, const std::vector& data, } TEST_CASE("parser test various cases with header") { - unique_file_name f; + unique_file_name f{"test_parser"}; constexpr static auto Int = "Int"; constexpr static auto Dbl = "Double"; constexpr static auto Str = "String"; @@ -1003,8 +978,8 @@ TEST_CASE("parser test various cases with header") { test_fields(o, d, {Dbl, Int, Str}); } -void test_ignore_empty(const std::vector& data) { - unique_file_name f; +static inline void test_ignore_empty(const std::vector& data) { + unique_file_name f{"test_parser"}; make_and_write(f.name, data); std::vector expected; @@ -1094,565 +1069,3 @@ TEST_CASE("parser test various cases with empty lines") { test_ignore_empty({}); } -#endif - -//////////////// -// parser tests v2 -//////////////// - -#include -#include -struct random_number_generator { - size_t z1 = 12341; - size_t z2 = 12342; - size_t z3 = 12343; - size_t z4 = 12344; - - size_t rand() { - unsigned int b; - b = ((z1 << 6) ^ z1) >> 13; - z1 = ((z1 & 4294967294U) << 18) ^ b; - b = ((z2 << 2) ^ z2) >> 27; - z2 = ((z2 & 4294967288U) << 2) ^ b; - b = ((z3 << 13) ^ z3) >> 21; - z3 = ((z3 & 4294967280U) << 7) ^ b; - b = ((z4 << 3) ^ z4) >> 12; - z4 = ((z4 & 4294967168U) << 13) ^ b; - return (z1 ^ z2 ^ z3 ^ z4); - } - - template - size_t rand_index(const T& s) { - REQUIRE(!s.empty()); - return rand() % s.size(); - } - - bool rand_bool() { - return (rand() % 100) > 50; - } - - template - void rand_insert(std::string& dst, const T& src) { - dst.insert(rand_index(dst), std::string{src}); - } - - template - void rand_insert_n(std::string& dst, const T& src, size_t n_max) { - size_t n = rand() % n_max; - for (size_t i = 0; i < n; ++i) { - rand_insert(dst, src); - } - } -} rng; - -struct field { - std::string value; - bool is_string = false; - bool has_spaces_left = false; - bool has_spaces_right = false; - bool has_new_line = false; - - field(const std::string& input) { - value = input; - is_string = true; - - has_spaces_left = !input.empty() && input.front() == ' '; - has_spaces_right = !input.empty() && input.back() == ' '; - has_new_line = input.find_first_of('\n') != std::string::npos; - } - - field(int input) { - value = std::to_string(input); - } - - field(double input) { - value = std::to_string(input); - } -}; - -struct column { - std::string header; - std::vector fields; -}; - -template -column make_column(const std::string& input_header, - const std::vector& input_fields) { - using setup = ss::setup; - std::vector filtered_fields; - - for (const auto& el : input_fields) { - if (!setup::multiline::enabled && el.has_new_line) { - continue; - } - - if (!setup::escape::enabled && !setup::quote::enabled) { - if (setup::trim_left::enabled && el.has_spaces_left) { - continue; - } - - if (setup::trim_right::enabled && el.has_spaces_right) { - continue; - } - } - - filtered_fields.push_back(el); - } - - column c; - c.header = input_header; - c.fields = filtered_fields; - return c; -} - -void replace_all2(std::string& s, const std::string& old_value, - const std::string& new_value) { - for (size_t i = 0; i < 999; ++i) { - size_t pos = s.find(old_value); - if (pos == std::string::npos) { - return; - } - s.replace(pos, old_value.size(), new_value); - } - FAIL("bad replace"); -} - -template -std::vector generate_csv_data(const std::vector& data, - const std::string& delim) { - (void)delim; - using setup = ss::setup; - constexpr static auto escape = '\\'; - constexpr static auto quote = '"'; - constexpr static auto space = ' '; - constexpr static auto new_line = '\n'; - constexpr static auto helper0 = '#'; - constexpr static auto helper1 = '$'; - // constexpr static auto helper3 = '&'; - - std::vector output; - - if (setup::escape::enabled && setup::quote::enabled) { - for (const auto& el : data) { - auto value = el.value; - - replace_all2(value, {escape, quote}, {helper1}); - - bool quote_newline = rng.rand_bool(); - bool quote_spacings = rng.rand_bool(); - bool has_spaces = el.has_spaces_right || el.has_spaces_left; - - // handle escape - replace_all2(value, {escape}, {helper0}); - rng.rand_insert_n(value, escape, 2); - if (!quote_newline) { - replace_all2(value, {new_line}, {helper1}); - replace_all2(value, {helper1}, {escape, new_line}); - } - replace_all2(value, {escape, escape}, {escape}); - replace_all2(value, {escape, helper0}, {helper0}); - replace_all2(value, {helper0, escape}, {helper0}); - replace_all2(value, {helper0}, {escape, escape}); - - replace_all2(value, {helper1}, {escape, quote}); - - replace_all2(value, {escape, quote}, {helper1}); - - if (rng.rand_bool() || quote_newline || - (quote_spacings && has_spaces)) { - replace_all2(value, {quote}, {helper0}); - if (rng.rand_bool()) { - replace_all2(value, {helper0}, {escape, quote}); - } else { - replace_all2(value, {helper0}, {quote, quote}); - } - value = std::string{quote} + value + std::string{quote}; - } - - replace_all2(value, {helper1}, {escape, quote}); - - if (!quote_spacings && has_spaces) { - replace_all2(value, {escape, space}, {helper0}); - replace_all2(value, {space}, {helper0}); - replace_all2(value, {helper0}, {escape, space}); - } - - output.push_back(value); - } - } else if (setup::escape::enabled) { - for (const auto& el : data) { - auto value = el.value; - - replace_all2(value, {escape}, {helper0}); - rng.rand_insert_n(value, escape, 3); - replace_all2(value, {new_line}, {helper1}); - replace_all2(value, {helper1}, {escape, new_line}); - - replace_all2(value, {escape, escape}, {escape}); - replace_all2(value, {escape, helper0}, {helper0}); - - replace_all2(value, {helper0, escape}, {helper0}); - replace_all2(value, {helper0}, {escape, escape}); - - if (setup::trim_right::enabled || setup::trim_left::enabled) { - // escape space - replace_all2(value, {escape, space}, {helper0}); - replace_all2(value, {space}, {helper0}); - replace_all2(value, {helper0}, {escape, space}); - } - - output.push_back(value); - } - } else if (setup::quote::enabled) { - for (const auto& el : data) { - auto value = el.value; - if (rng.rand_bool() || el.has_new_line || el.has_spaces_left || - el.has_spaces_right) { - replace_all2(value, {quote}, {helper0}); - replace_all2(value, {helper0}, {quote, quote}); - value = std::string{quote} + value + std::string{quote}; - } - output.push_back(value); - } - } else { - for (const auto& el : data) { - output.push_back(el.value); - } - } - - if (setup::trim_right::enabled) { - for (auto& el : output) { - size_t n = rng.rand(); - for (size_t i = 0; i < n % 3; ++i) { - el = el + " "; - } - } - } - - if (setup::trim_left::enabled) { - for (auto& el : output) { - size_t n = rng.rand(); - for (size_t i = 0; i < n % 3; ++i) { - el = " " + el; - } - } - } - - return output; -} - -void write_to_file(const std::vector& data, - const std::string& delim, const std::string& file_name) { - std::ofstream out{file_name, std::ios_base::app}; - std::string line; - for (size_t i = 0; i < data.size(); ++i) { - line += data[i]; - if (i != data.size() - 1) { - line += delim; - } - } - -#ifdef _WIN32 - line += "\n"; -#else - line += "\r\n"; -#endif - - out << line; -} - -template -void test_combinations(const std::vector& input_data, - const std::string& delim, bool include_header) { - // TODO test without string_error - using setup = ss::setup; - - unique_file_name f; - std::vector> expected_data; - std::vector header; - std::vector field_header; - - for (const auto& el : input_data) { - header.push_back(el.header); - field_header.push_back(field{el.header}); - } - - if (include_header) { - auto header_data = generate_csv_data(field_header, delim); - write_to_file(header_data, delim, f.name); - } - - std::vector layout; - size_t n = 1 + rng.rand() % 10; - - for (size_t i = 0; i < input_data.size(); ++i) { - layout.push_back(i); - } - - for (size_t i = 0; i < n; ++i) { - std::vector raw_data; - for (const auto& el : input_data) { - const auto& fields = el.fields; - if (fields.empty()) { - continue; - } - - raw_data.push_back(fields[rng.rand_index(fields)]); - } - - expected_data.push_back(raw_data); - auto data = generate_csv_data(raw_data, delim); - write_to_file(data, delim, f.name); - - /* - std::cout << "[."; - for (const auto& el : data) { - std::cout << el << '.'; - } - std::cout << "]" << std::endl; - */ - } - - auto layout_combinations = vector_combinations(layout, layout.size()); - - auto remove_duplicates = [](const auto& vec) { - std::vector unique_vec; - std::unordered_set vec_set; - for (const auto& el : vec) { - if (vec_set.find(el) == vec_set.end()) { - vec_set.insert(el); - unique_vec.push_back(el); - } - } - - return unique_vec; - }; - - std::vector> unique_layout_combinations; - for (const auto& layout : layout_combinations) { - unique_layout_combinations.push_back(remove_duplicates(layout)); - } - - if (!include_header) { - unique_layout_combinations.clear(); - unique_layout_combinations.push_back(layout); - } - - for (const auto& layout : unique_layout_combinations) { - ss::parser p{f.name, delim}; - - if (include_header) { - std::vector fields; - for (const auto& index : layout) { - fields.push_back(header[index]); - } - - p.use_fields(fields); - - if (!p.valid()) { - std::cout << p.error_msg() << std::endl; - } - - REQUIRE(p.valid()); - } - - auto check_error = [&p] { - CHECK(p.valid()); - if (!p.valid()) { - std::cout << p.error_msg() << std::endl; - } - }; - - int num_columns = layout.size(); - for (size_t i = 0; i < n + 1; ++i) { - switch (num_columns) { - case 1: { - auto s0 = p.template get_next(); - if (i < n) { - check_error(); - // std::cout << s0 << std::endl; - CHECK(s0 == expected_data[i][layout[0]].value); - } else { - CHECK(p.eof()); - CHECK(!p.valid()); - } - break; - } - case 2: { - auto [s0, s1] = p.template get_next(); - if (i < n) { - check_error(); - // std::cout << s0 << ' ' << s1 << std::endl; - CHECK(s0 == expected_data[i][layout[0]].value); - CHECK(s1 == expected_data[i][layout[1]].value); - } else { - CHECK(p.eof()); - CHECK(!p.valid()); - } - break; - } - case 3: { - auto [s0, s1, s2] = - p.template get_next(); - if (i < n) { - check_error(); - // std::cout << s0 << ' ' << s1 << ' ' << s2 << std::endl; - CHECK(s0 == expected_data[i][layout[0]].value); - CHECK(s1 == expected_data[i][layout[1]].value); - CHECK(s2 == expected_data[i][layout[2]].value); - } else { - CHECK(p.eof()); - CHECK(!p.valid()); - } - break; - } - case 4: { - auto [s0, s1, s2, s3] = - p.template get_next(); - if (i < n) { - check_error(); - /* - std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3 - << std::endl; - */ - CHECK(s0 == expected_data[i][layout[0]].value); - CHECK(s1 == expected_data[i][layout[1]].value); - CHECK(s2 == expected_data[i][layout[2]].value); - CHECK(s3 == expected_data[i][layout[3]].value); - } else { - CHECK(p.eof()); - CHECK(!p.valid()); - } - break; - } - case 5: { - auto [s0, s1, s2, s3, s4] = - p.template get_next(); - if (i < n) { - check_error(); - // std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3 - // << ' ' << s4 << std::endl; - CHECK(s0 == expected_data[i][layout[0]].value); - CHECK(s1 == expected_data[i][layout[1]].value); - CHECK(s2 == expected_data[i][layout[2]].value); - CHECK(s3 == expected_data[i][layout[3]].value); - CHECK(s4 == expected_data[i][layout[4]].value); - } else { - CHECK(p.eof()); - CHECK(!p.valid()); - } - break; - } - default: - FAIL(("Invalid number of columns: " + - std::to_string(num_columns))); - break; - } - } - } -} - -// TODO rename -template -void test_combinations_impl() { - column ints0 = - make_column("ints0", {field{123}, field{45}, field{6}}); - column ints1 = - make_column("ints1", {field{123}, field{45}, field{6}}); - column ints2 = - make_column("ints2", {field{123}, field{45}, field{6}}); - - column floats0 = - make_column("floats0", {field{1.23}, field{456.7}, field{0.8}, - field{910}, field{123456789.987654321}}); - column floats1 = - make_column("floats1", {field{1.23}, field{456.7}, field{0.8}, - field{910}, field{123456789.987654321}}); - column floats2 = - make_column("floats2", {field{1.23}, field{456.7}, field{0.8}, - field{910}, field{123456789.987654321}}); - - column strings0 = - make_column("strings0", {field{"just"}, field{"some"}, - field{"random"}, field{"string"}}); - - column strings1 = - make_column("strings1", {field{"st\"rings"}, field{"w\"\"ith"}, - field{"qu\"otes\\"}, field{"\\a\\n\\d"}, - field{"escapes\""}}); - - column strings2 = - make_column("strings2", - {field{" with "}, field{" spaces"}, - field{"and "}, field{"\nnew"}, field{" \nlines"}, - field{" a\n\nn\n\nd "}, field{" \nso\n "}, - field{"on"}}); - - auto columns0 = std::vector{ints0, strings0, floats0, strings1, strings2}; - auto columns1 = std::vector{strings2, strings1, floats0, strings0, ints0}; - auto columns2 = std::vector{floats0, strings1, ints0, strings2, strings0}; - auto columns3 = std::vector{ints0, ints1, ints2}; - auto columns4 = std::vector{floats0, floats1, floats2}; - auto columns5 = std::vector{strings1, strings2}; - auto columns6 = std::vector{strings1}; - auto columns7 = std::vector{strings2}; - - for (size_t i = 0; i < 3; ++i) { - for (const auto& delimiter : {",", "-", "--"}) { - for (const auto& columns : - {columns0, columns1, columns2, columns3, columns4, columns5, - columns6, columns7}) { - test_combinations(columns, delimiter, false); - test_combinations(columns, delimiter, true); - } - } - } -} - -TEST_CASE("parser test various cases version 2") { - // TODO handle crlf - using quote = ss::quote<'"'>; - using escape = ss::escape<'\\'>; - using trim = ss::trim<' '>; - using triml = ss::trim_left<' '>; - using trimr = ss::trim_right<' '>; - using multiline = ss::multiline; - - test_combinations_impl<>(); - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); - test_combinations_impl(); -} diff --git a/test/test_parser2.cpp b/test/test_parser2.cpp new file mode 100644 index 0000000..2115e4b --- /dev/null +++ b/test/test_parser2.cpp @@ -0,0 +1,578 @@ +#include "test_helpers.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// parser tests v2 + +namespace { +struct random_number_generator { + size_t z1 = 12341; + size_t z2 = 12342; + size_t z3 = 12343; + size_t z4 = 12344; + + size_t rand() { + unsigned int b; + b = ((z1 << 6) ^ z1) >> 13; + z1 = ((z1 & 4294967294U) << 18) ^ b; + b = ((z2 << 2) ^ z2) >> 27; + z2 = ((z2 & 4294967288U) << 2) ^ b; + b = ((z3 << 13) ^ z3) >> 21; + z3 = ((z3 & 4294967280U) << 7) ^ b; + b = ((z4 << 3) ^ z4) >> 12; + z4 = ((z4 & 4294967168U) << 13) ^ b; + return (z1 ^ z2 ^ z3 ^ z4); + } + + template + size_t rand_index(const T& s) { + REQUIRE(!s.empty()); + return rand() % s.size(); + } + + bool rand_bool() { + return (rand() % 100) > 50; + } + + template + void rand_insert(std::string& dst, const T& src) { + dst.insert(rand_index(dst), std::string{src}); + } + + template + void rand_insert_n(std::string& dst, const T& src, size_t n_max) { + size_t n = rand() % n_max; + for (size_t i = 0; i < n; ++i) { + rand_insert(dst, src); + } + } +} rng; + +struct field { + std::string value; + bool is_string = false; + bool has_spaces_left = false; + bool has_spaces_right = false; + bool has_new_line = false; + + field(const std::string& input) { + value = input; + is_string = true; + + has_spaces_left = !input.empty() && input.front() == ' '; + has_spaces_right = !input.empty() && input.back() == ' '; + has_new_line = input.find_first_of('\n') != std::string::npos; + } + + field(int input) { + value = std::to_string(input); + } + + field(double input) { + value = std::to_string(input); + } +}; + +struct column { + std::string header; + std::vector fields; +}; + +template +column make_column(const std::string& input_header, + const std::vector& input_fields) { + using setup = ss::setup; + std::vector filtered_fields; + + for (const auto& el : input_fields) { + if (!setup::multiline::enabled && el.has_new_line) { + continue; + } + + if (!setup::escape::enabled && !setup::quote::enabled) { + if (setup::trim_left::enabled && el.has_spaces_left) { + continue; + } + + if (setup::trim_right::enabled && el.has_spaces_right) { + continue; + } + } + + filtered_fields.push_back(el); + } + + column c; + c.header = input_header; + c.fields = filtered_fields; + return c; +} + +void replace_all2(std::string& s, const std::string& old_value, + const std::string& new_value) { + for (size_t i = 0; i < 999; ++i) { + size_t pos = s.find(old_value); + if (pos == std::string::npos) { + return; + } + s.replace(pos, old_value.size(), new_value); + } + FAIL("bad replace"); +} + +template +std::vector generate_csv_data(const std::vector& data, + const std::string& delim) { + (void)delim; + using setup = ss::setup; + constexpr static auto escape = '\\'; + constexpr static auto quote = '"'; + constexpr static auto space = ' '; + constexpr static auto new_line = '\n'; + constexpr static auto helper0 = '#'; + constexpr static auto helper1 = '$'; + // constexpr static auto helper3 = '&'; + + std::vector output; + + if (setup::escape::enabled && setup::quote::enabled) { + for (const auto& el : data) { + auto value = el.value; + + replace_all2(value, {escape, quote}, {helper1}); + + bool quote_newline = rng.rand_bool(); + bool quote_spacings = rng.rand_bool(); + bool has_spaces = el.has_spaces_right || el.has_spaces_left; + + // handle escape + replace_all2(value, {escape}, {helper0}); + rng.rand_insert_n(value, escape, 2); + if (!quote_newline) { + replace_all2(value, {new_line}, {helper1}); + replace_all2(value, {helper1}, {escape, new_line}); + } + replace_all2(value, {escape, escape}, {escape}); + replace_all2(value, {escape, helper0}, {helper0}); + replace_all2(value, {helper0, escape}, {helper0}); + replace_all2(value, {helper0}, {escape, escape}); + + replace_all2(value, {helper1}, {escape, quote}); + + replace_all2(value, {escape, quote}, {helper1}); + + if (rng.rand_bool() || quote_newline || + (quote_spacings && has_spaces)) { + replace_all2(value, {quote}, {helper0}); + if (rng.rand_bool()) { + replace_all2(value, {helper0}, {escape, quote}); + } else { + replace_all2(value, {helper0}, {quote, quote}); + } + value = std::string{quote} + value + std::string{quote}; + } + + replace_all2(value, {helper1}, {escape, quote}); + + if (!quote_spacings && has_spaces) { + replace_all2(value, {escape, space}, {helper0}); + replace_all2(value, {space}, {helper0}); + replace_all2(value, {helper0}, {escape, space}); + } + + output.push_back(value); + } + } else if (setup::escape::enabled) { + for (const auto& el : data) { + auto value = el.value; + + replace_all2(value, {escape}, {helper0}); + rng.rand_insert_n(value, escape, 3); + replace_all2(value, {new_line}, {helper1}); + replace_all2(value, {helper1}, {escape, new_line}); + + replace_all2(value, {escape, escape}, {escape}); + replace_all2(value, {escape, helper0}, {helper0}); + + replace_all2(value, {helper0, escape}, {helper0}); + replace_all2(value, {helper0}, {escape, escape}); + + if (setup::trim_right::enabled || setup::trim_left::enabled) { + // escape space + replace_all2(value, {escape, space}, {helper0}); + replace_all2(value, {space}, {helper0}); + replace_all2(value, {helper0}, {escape, space}); + } + + output.push_back(value); + } + } else if (setup::quote::enabled) { + for (const auto& el : data) { + auto value = el.value; + if (rng.rand_bool() || el.has_new_line || el.has_spaces_left || + el.has_spaces_right) { + replace_all2(value, {quote}, {helper0}); + replace_all2(value, {helper0}, {quote, quote}); + value = std::string{quote} + value + std::string{quote}; + } + output.push_back(value); + } + } else { + for (const auto& el : data) { + output.push_back(el.value); + } + } + + if (setup::trim_right::enabled) { + for (auto& el : output) { + size_t n = rng.rand(); + for (size_t i = 0; i < n % 3; ++i) { + el = el + " "; + } + } + } + + if (setup::trim_left::enabled) { + for (auto& el : output) { + size_t n = rng.rand(); + for (size_t i = 0; i < n % 3; ++i) { + el = " " + el; + } + } + } + + return output; +} + +void write_to_file(const std::vector& data, + const std::string& delim, const std::string& file_name) { + std::ofstream out{file_name, std::ios_base::app}; + std::string line; + for (size_t i = 0; i < data.size(); ++i) { + line += data[i]; + if (i != data.size() - 1) { + line += delim; + } + } + +#ifdef _WIN32 + line += "\n"; +#else + line += "\r\n"; +#endif + + out << line; +} + +template +void test_combinations(const std::vector& input_data, + const std::string& delim, bool include_header) { + // TODO test without string_error + using setup = ss::setup; + + unique_file_name f{"test_parser2"}; + std::vector> expected_data; + std::vector header; + std::vector field_header; + + for (const auto& el : input_data) { + header.push_back(el.header); + field_header.push_back(field{el.header}); + } + + if (include_header) { + auto header_data = generate_csv_data(field_header, delim); + write_to_file(header_data, delim, f.name); + } + + std::vector layout; + size_t n = 1 + rng.rand() % 10; + + for (size_t i = 0; i < input_data.size(); ++i) { + layout.push_back(i); + } + + for (size_t i = 0; i < n; ++i) { + std::vector raw_data; + for (const auto& el : input_data) { + const auto& fields = el.fields; + if (fields.empty()) { + continue; + } + + raw_data.push_back(fields[rng.rand_index(fields)]); + } + + expected_data.push_back(raw_data); + auto data = generate_csv_data(raw_data, delim); + write_to_file(data, delim, f.name); + + std::cout << "[."; + for (const auto& el : data) { + std::cout << el << '.'; + } + std::cout << "]" << std::endl; + } + + auto layout_combinations = vector_combinations(layout, layout.size()); + + auto remove_duplicates = [](const auto& vec) { + std::vector unique_vec; + std::unordered_set vec_set; + for (const auto& el : vec) { + if (vec_set.find(el) == vec_set.end()) { + vec_set.insert(el); + unique_vec.push_back(el); + } + } + + return unique_vec; + }; + + std::vector> unique_layout_combinations; + for (const auto& layout : layout_combinations) { + unique_layout_combinations.push_back(remove_duplicates(layout)); + } + + if (!include_header) { + unique_layout_combinations.clear(); + unique_layout_combinations.push_back(layout); + } + + for (const auto& layout : unique_layout_combinations) { + ss::parser p{f.name, delim}; + + if (include_header) { + std::vector fields; + for (const auto& index : layout) { + fields.push_back(header[index]); + } + + p.use_fields(fields); + + if (!p.valid()) { + std::cout << p.error_msg() << std::endl; + } + + REQUIRE(p.valid()); + } + + auto check_error = [&p] { + CHECK(p.valid()); + if (!p.valid()) { + std::cout << p.error_msg() << std::endl; + } + }; + + int num_columns = layout.size(); + for (size_t i = 0; i < n + 1; ++i) { + switch (num_columns) { + case 1: { + auto s0 = p.template get_next(); + if (i < n) { + check_error(); + // std::cout << s0 << std::endl; + CHECK(s0 == expected_data[i][layout[0]].value); + } else { + CHECK(p.eof()); + CHECK(!p.valid()); + } + break; + } + case 2: { + auto [s0, s1] = p.template get_next(); + if (i < n) { + check_error(); + // std::cout << s0 << ' ' << s1 << std::endl; + CHECK(s0 == expected_data[i][layout[0]].value); + CHECK(s1 == expected_data[i][layout[1]].value); + } else { + CHECK(p.eof()); + CHECK(!p.valid()); + } + break; + } + case 3: { + auto [s0, s1, s2] = + p.template get_next(); + if (i < n) { + check_error(); + // std::cout << s0 << ' ' << s1 << ' ' << s2 << std::endl; + CHECK(s0 == expected_data[i][layout[0]].value); + CHECK(s1 == expected_data[i][layout[1]].value); + CHECK(s2 == expected_data[i][layout[2]].value); + } else { + CHECK(p.eof()); + CHECK(!p.valid()); + } + break; + } + case 4: { + auto [s0, s1, s2, s3] = + p.template get_next(); + if (i < n) { + check_error(); + /* + std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3 + << std::endl; + */ + CHECK(s0 == expected_data[i][layout[0]].value); + CHECK(s1 == expected_data[i][layout[1]].value); + CHECK(s2 == expected_data[i][layout[2]].value); + CHECK(s3 == expected_data[i][layout[3]].value); + } else { + CHECK(p.eof()); + CHECK(!p.valid()); + } + break; + } + case 5: { + auto [s0, s1, s2, s3, s4] = + p.template get_next(); + if (i < n) { + check_error(); + // std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3 + // << ' ' << s4 << std::endl; + CHECK(s0 == expected_data[i][layout[0]].value); + CHECK(s1 == expected_data[i][layout[1]].value); + CHECK(s2 == expected_data[i][layout[2]].value); + CHECK(s3 == expected_data[i][layout[3]].value); + CHECK(s4 == expected_data[i][layout[4]].value); + } else { + CHECK(p.eof()); + CHECK(!p.valid()); + } + break; + } + default: + FAIL(("Invalid number of columns: " + + std::to_string(num_columns))); + break; + } + } + } +} + +// TODO rename +template +void test_combinations_impl() { + column ints0 = + make_column("ints0", {field{123}, field{45}, field{6}}); + column ints1 = + make_column("ints1", {field{123}, field{45}, field{6}}); + column ints2 = + make_column("ints2", {field{123}, field{45}, field{6}}); + + column floats0 = + make_column("floats0", {field{1.23}, field{456.7}, field{0.8}, + field{910}, field{123456789.987654321}}); + column floats1 = + make_column("floats1", {field{1.23}, field{456.7}, field{0.8}, + field{910}, field{123456789.987654321}}); + column floats2 = + make_column("floats2", {field{1.23}, field{456.7}, field{0.8}, + field{910}, field{123456789.987654321}}); + + column strings0 = + make_column("strings0", {field{"just"}, field{"some"}, + field{"random"}, field{"string"}}); + + column strings1 = + make_column("strings1", {field{"st\"rings"}, field{"w\"\"ith"}, + field{"qu\"otes\\"}, field{"\\a\\n\\d"}, + field{"escapes\""}}); + +#ifdef _WIN32 + column strings2 = + make_column("strings2", {field{" with "}, field{" spaces"}, + field{"and "}, field{"\r\nnew"}, + field{" \r\nlines"}, + field{" a\r\n\r\nn\r\n\r\nd "}, + field{" \r\nso\r\n "}, field{"on"}}); +#else + column strings2 = + make_column("strings2", + {field{" with "}, field{" spaces"}, + field{"and "}, field{"\nnew"}, field{" \nlines"}, + field{" a\n\nn\n\nd "}, field{" \nso\n "}, + field{"on"}}); +#endif + + auto columns0 = std::vector{ints0, strings0, floats0, strings1, strings2}; + auto columns1 = std::vector{strings2, strings1, floats0, strings0, ints0}; + auto columns2 = std::vector{floats0, strings1, ints0, strings2, strings0}; + auto columns3 = std::vector{ints0, ints1, ints2}; + auto columns4 = std::vector{floats0, floats1, floats2}; + auto columns5 = std::vector{strings1, strings2}; + auto columns6 = std::vector{strings1}; + auto columns7 = std::vector{strings2}; + + for (size_t i = 0; i < 3; ++i) { + for (const auto& delimiter : {",", "-", "--"}) { + for (const auto& columns : + {columns0, columns1, columns2, columns3, columns4, columns5, + columns6, columns7}) { + test_combinations(columns, delimiter, false); + test_combinations(columns, delimiter, true); + } + } + } +} +} /* namespace */ + +TEST_CASE("parser test various cases version 2") { + // TODO handle crlf + using quote = ss::quote<'"'>; + using escape = ss::escape<'\\'>; + using trim = ss::trim<' '>; + using triml = ss::trim_left<' '>; + using trimr = ss::trim_right<' '>; + using multiline = ss::multiline; + + test_combinations_impl<>(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + /* TODO uncomment + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + */ + + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); + test_combinations_impl(); +} diff --git a/test/test_splitter.cpp b/test/test_splitter.cpp index 7b69d65..bf3ba16 100644 --- a/test/test_splitter.cpp +++ b/test/test_splitter.cpp @@ -153,7 +153,8 @@ make_combinations(const std::vector& input, using matches_type = std::vector>; template -void test_combinations(matches_type& matches, std::vector delims) { +static inline void test_combinations(matches_type& matches, + std::vector delims) { ss::splitter s; ss::splitter st; @@ -520,7 +521,8 @@ TEST_CASE("splitter test error mode") { } template -auto expect_unterminated_quote(Splitter& s, const std::string& line) { +static inline auto expect_unterminated_quote(Splitter& s, + const std::string& line) { try { auto vec = s.split(buff(line.c_str())); CHECK(s.valid());