From ce03c371ae7e6b15568cb978aa326351919f2d8f Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 17 Feb 2024 21:05:30 +0100 Subject: [PATCH] Split parser tests into multiple files, add more tests for buffer mode --- test/meson.build | 5 +- test/test_parser.cpp | 1771 --------------------------------------- test/test_parser1.hpp | 151 ++++ test/test_parser1_1.cpp | 534 ++++++++++++ test/test_parser1_2.cpp | 309 +++++++ test/test_parser1_3.cpp | 515 ++++++++++++ test/test_parser1_4.cpp | 291 +++++++ 7 files changed, 1804 insertions(+), 1772 deletions(-) delete mode 100644 test/test_parser.cpp create mode 100644 test/test_parser1.hpp create mode 100644 test/test_parser1_1.cpp create mode 100644 test/test_parser1_2.cpp create mode 100644 test/test_parser1_3.cpp create mode 100644 test/test_parser1_4.cpp diff --git a/test/meson.build b/test/meson.build index 25bf963..454b16f 100644 --- a/test/meson.build +++ b/test/meson.build @@ -2,7 +2,10 @@ doctest_dep = dependency('doctest') add_project_arguments('-DDOCTEST_CONFIG_IMPLEMENT_WITH_MAIN', language: 'cpp') tests = [ - 'parser', + 'parser1_1', + 'parser1_2', + 'parser1_3', + 'parser1_4', 'splitter', 'converter', 'extractions', diff --git a/test/test_parser.cpp b/test/test_parser.cpp deleted file mode 100644 index d1fc036..0000000 --- a/test/test_parser.cpp +++ /dev/null @@ -1,1771 +0,0 @@ -#include "test_helpers.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace { -[[maybe_unused]] void replace_all(std::string& s, const std::string& from, - const std::string& to) { - if (from.empty()) return; - size_t start_pos = 0; - while ((start_pos = s.find(from, start_pos)) != std::string::npos) { - s.replace(start_pos, from.length(), to); - start_pos += to.length(); - } -} - -template -void expect_error_on_command(ss::parser& p, - const std::function command) { - if (ss::setup::throw_on_error) { - try { - command(); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } - } else { - command(); - CHECK(!p.valid()); - if constexpr (ss::setup::string_error) { - CHECK_FALSE(p.error_msg().empty()); - } - } -} - -[[maybe_unused]] void update_if_crlf(std::string& s) { -#ifdef _WIN32 - replace_all(s, "\r\n", "\n"); -#else - (void)(s); -#endif -} - -struct X { - constexpr static auto delim = ","; - constexpr static auto empty = "_EMPTY_"; - int i; - double d; - std::string s; - - std::string to_string() const { - if (s == empty) { - return ""; - } - - return std::to_string(i) - .append(delim) - .append(std::to_string(d)) - .append(delim) - .append(s); - } - auto tied() const { - return std::tie(i, d, s); - } -}; - -template -std::enable_if_t, bool> operator==(const T& lhs, - const T& rhs) { - return lhs.tied() == rhs.tied(); -} - -template -static void make_and_write(const std::string& file_name, - const std::vector& data, - const std::vector& header = {}) { - std::ofstream out{file_name}; - -#ifdef _WIN32 - std::vector new_lines = {"\n"}; -#else - std::vector new_lines = {"\n", "\r\n"}; -#endif - - for (const auto& i : header) { - if (&i != &header.front()) { - out << T::delim; - } - out << i; - } - - if (!header.empty()) { - out << new_lines.front(); - } - - for (size_t i = 0; i < data.size(); ++i) { - out << data[i].to_string() << new_lines[i % new_lines.size()]; - } -} - -std::string make_buffer(const std::string& file_name) { - std::ifstream in{file_name, std::ios::binary}; - std::string tmp; - std::string out; - out.reserve(sizeof(out) + 1); - while (in >> tmp) { - out += tmp; - if (in.peek() == '\n') { - out += "\n"; - } - if (in.peek() == '\r') { - out += "\r\n"; - } - } - return out; -} - -template -std::tuple, std::string> make_parser( - const std::string& file_name, const std::string& delim) { - if (buffer_mode) { - auto buffer = make_buffer(file_name); - return {ss::parser{buffer.data(), buffer.size(), delim}, - std::move(buffer)}; - } else { - return {ss::parser{file_name, delim}, std::string{}}; - } -} - -} /* namespace */ - -TEST_CASE("test file not found") { - unique_file_name f{"test_parser"}; - - { - ss::parser p{f.name, ","}; - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - CHECK_FALSE(p.valid()); - } - - try { - ss::parser p{f.name, ","}; - FAIL("Expected exception..."); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } -} - -template -void test_various_cases() { - unique_file_name f{"test_parser"}; - std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, - {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; - make_and_write(f.name, data); - auto csv_data_buffer = make_buffer(f.name); - { - auto [p, _] = make_parser(f.name, ","); - ss::parser p0{std::move(p)}; - p = std::move(p0); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - auto move_rotate = [&p = p, &p0 = p0] { - auto p1 = std::move(p); - p0 = std::move(p1); - p = std::move(p0); - }; - - while (!p.eof()) { - move_rotate(); - auto a = p.template get_next(); - i.emplace_back(ss::to_object(a)); - } - - for (const auto& a : p2.template iterate()) { - i2.emplace_back(ss::to_object(a)); - } - - CHECK_EQ(i, data); - CHECK_EQ(i2, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - auto [p3, ___] = make_parser(f.name, ","); - std::vector i3; - - std::vector expected = {std::begin(data) + 1, std::end(data)}; - using tup = std::tuple; - - p.ignore_next(); - while (!p.eof()) { - auto a = p.template get_next(); - i.emplace_back(ss::to_object(a)); - } - - p2.ignore_next(); - for (const auto& a : p2.template iterate()) { - i2.emplace_back(ss::to_object(a)); - } - - p3.ignore_next(); - for (auto it = p3.template iterate().begin(); - it != p3.template iterate().end(); ++it) { - i3.emplace_back(ss::to_object(*it)); - } - - CHECK_EQ(i, expected); - CHECK_EQ(i2, expected); - CHECK_EQ(i3, expected); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - while (!p.eof()) { - i.push_back(p.template get_object()); - } - - for (auto&& a : - p2.template iterate_object()) { - i2.push_back(std::move(a)); - } - - CHECK_EQ(i, data); - CHECK_EQ(i2, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - for (auto&& a : - p.template iterate_object()) { - i.push_back(std::move(a)); - } - - CHECK_EQ(i, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - using tup = std::tuple; - while (!p.eof()) { - i.push_back(p.template get_object()); - } - - for (auto it = p2.template iterate_object().begin(); - it != p2.template iterate_object().end(); it++) { - i2.push_back({it->i, it->d, it->s}); - } - - CHECK_EQ(i, data); - CHECK_EQ(i2, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - using tup = std::tuple; - for (auto&& a : p.template iterate_object()) { - i.push_back(std::move(a)); - } - - CHECK_EQ(i, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - while (!p.eof()) { - i.push_back(p.template get_next()); - } - - CHECK_EQ(i, data); - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - for (auto&& a : p.template iterate()) { - i.push_back(std::move(a)); - } - - CHECK_EQ(i, data); - } - - { - constexpr int excluded = 3; - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - while (!p.eof()) { - try { - auto a = p.template get_object, double, - std::string>(); - if (p.valid()) { - i.push_back(a); - } - } catch (...) { - // ignore - }; - } - - if (!ss::setup::throw_on_error) { - for (auto&& a : p2.template iterate_object, - double, std::string>()) { - if (p2.valid()) { - i2.push_back(std::move(a)); - } - } - } - - std::vector expected; - for (auto& x : data) { - if (x.i != excluded) { - expected.push_back(x); - } - } - - std::copy_if(data.begin(), data.end(), expected.begin(), - [&](const X& x) { return x.i != excluded; }); - CHECK_EQ(i, expected); - - if (!ss::setup::throw_on_error) { - CHECK_EQ(i2, expected); - } - } - - { - auto [p, _] = make_parser(f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(f.name, ","); - std::vector i2; - - while (!p.eof()) { - try { - auto a = p.template get_object, double, - std::string>(); - if (p.valid()) { - i.push_back(a); - } - } catch (...) { - // ignore - } - } - - if (!ss::setup::throw_on_error) { - for (auto&& a : p2.template iterate_object, - double, std::string>()) { - if (p2.valid()) { - i2.push_back(std::move(a)); - } - } - } - - std::vector expected = {{3, 4, "y"}}; - CHECK_EQ(i, expected); - if (!ss::setup::throw_on_error) { - CHECK_EQ(i2, expected); - } - } - - { - unique_file_name empty_f{"test_parser"}; - std::vector empty_data = {}; - - make_and_write(empty_f.name, empty_data); - - auto [p, _] = make_parser(empty_f.name, ","); - std::vector i; - - auto [p2, __] = make_parser(empty_f.name, ","); - std::vector i2; - - while (!p.eof()) { - i.push_back(p.template get_next()); - } - - for (auto&& a : p2.template iterate()) { - i2.push_back(std::move(a)); - } - - CHECK(i.empty()); - CHECK(i2.empty()); - } -} - -TEST_CASE("parser test various cases") { - test_various_cases(); - test_various_cases(); - test_various_cases(); - test_various_cases(); - test_various_cases(); - test_various_cases(); -} - -using test_tuple = std::tuple; -struct test_struct { - int i; - double d; - char c; - auto tied() { - return std::tie(i, d, c); - } -}; - -static inline void expect_test_struct(const test_struct&) { -} - -template -void test_composite_conversion() { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - for (auto& i : - {"10,a,11.1", "10,20,11.1", "junk", "10,11.1", "1,11.1,a", "junk", - "10,junk", "11,junk", "10,11.1,c", "10,20", "10,22.2,f"}) { - out << i << std::endl; - } - } - - auto [p, _] = make_parser(f.name, ","); - auto fail = [] { FAIL(""); }; - auto expect_error = [](auto error) { CHECK(!error.empty()); }; - auto ignore_error = [] {}; - - REQUIRE(p.valid()); - REQUIRE_FALSE(p.eof()); - - { - constexpr static auto expectedData = std::tuple{10, 'a', 11.1}; - - auto [d1, d2, d3, d4] = - p.template try_next(fail) - .template or_else(fail) - .template or_else( - [&](auto&& data) { CHECK_EQ(data, expectedData); }) - .on_error(fail) - .template or_else(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE(d3); - REQUIRE_FALSE(d4); - CHECK_EQ(*d3, expectedData); - } - - { - REQUIRE(!p.eof()); - constexpr static auto expectedData = std::tuple{10, 20, 11.1}; - - auto [d1, d2, d3, d4] = - p.template try_next( - [&](auto& i1, auto i2, double d) { - CHECK_EQ(std::tie(i1, i2, d), expectedData); - }) - .on_error(fail) - .template or_object(fail) - .on_error(fail) - .template or_else(fail) - .on_error(fail) - .template or_else(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - REQUIRE_FALSE(d3); - REQUIRE_FALSE(d4); - CHECK_EQ(*d1, expectedData); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2, d3, d4, d5] = - p.template try_object(fail) - .on_error(expect_error) - .template or_else(fail) - .template or_else(fail) - .template or_else(fail) - .template or_else(fail) - .values(); - - REQUIRE_FALSE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE_FALSE(d3); - REQUIRE_FALSE(d4); - REQUIRE_FALSE(d5); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2] = - p.template try_next([](auto& i, auto& d) { - REQUIRE_EQ(std::tie(i, d), std::tuple{10, 11.1}); - }) - .template or_else([](auto&, auto&) { FAIL(""); }) - .values(); - - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2] = - p.template try_next([](auto&, auto&) { FAIL(""); }) - .template or_else(expect_test_struct) - .values(); - - REQUIRE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE(d2); - CHECK_EQ(d2->tied(), std::tuple{1, 11.1, 'a'}); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2, d3, d4, d5] = - p.template try_next(fail) - .template or_object() - .template or_else(expect_test_struct) - .template or_else(fail) - .template or_else>(fail) - .on_error(ignore_error) - .on_error(expect_error) - .values(); - - REQUIRE_FALSE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE_FALSE(d3); - REQUIRE_FALSE(d4); - REQUIRE_FALSE(d5); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2] = - p.template try_next>() - .on_error(ignore_error) - .on_error(fail) - .template or_else>(fail) - .on_error(ignore_error) - .on_error(fail) - .on_error(ignore_error) - .values(); - - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - CHECK_EQ(*d1, std::tuple{10, std::nullopt}); - } - - { - REQUIRE_FALSE(p.eof()); - - auto [d1, d2] = - p.template try_next>() - .on_error(fail) - .template or_else>(fail) - .on_error(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - CHECK_EQ(*d1, std::tuple{11, std::variant{"junk"}}); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2] = p.template try_object() - .template or_else(fail) - .values(); - REQUIRE(p.valid()); - REQUIRE(d1); - REQUIRE_FALSE(d2); - CHECK_EQ(d1->tied(), std::tuple{10, 11.1, 'c'}); - } - - { - REQUIRE_FALSE(p.eof()); - - auto [d1, d2, d3, d4] = - p.template try_next([] { return false; }) - .template or_else([](auto&) { return false; }) - .template or_else() - .template or_else(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE(d3); - REQUIRE_FALSE(d4); - CHECK_EQ(d3.value(), std::tuple{10, 20}); - } - - { - REQUIRE(!p.eof()); - - auto [d1, d2, d3, d4] = - p.template try_object( - [] { return false; }) - .template or_else([](auto&) { return false; }) - .template or_object() - .template or_else(fail) - .values(); - - REQUIRE(p.valid()); - REQUIRE_FALSE(d1); - REQUIRE_FALSE(d2); - REQUIRE(d3); - REQUIRE_FALSE(d4); - CHECK_EQ(d3->tied(), std::tuple{10, 22.2, 'f'}); - } - - CHECK(p.eof()); -} - -// various scenarios -TEST_CASE("parser test composite conversion") { - test_composite_conversion(); - test_composite_conversion(); -} - -struct my_string { - char* data{nullptr}; - - my_string() = default; - - ~my_string() { - delete[] data; - } - - // make sure no object is copied - my_string(const my_string&) = delete; - my_string& operator=(const my_string&) = delete; - - my_string(my_string&& other) : data{other.data} { - other.data = nullptr; - } - - my_string& operator=(my_string&& other) { - data = other.data; - return *this; - } -}; - -template <> -inline bool ss::extract(const char* begin, const char* end, my_string& s) { - size_t size = end - begin; - s.data = new char[size + 1]; - strncpy(s.data, begin, size); - s.data[size] = '\0'; - return true; -} - -struct xyz { - my_string x; - my_string y; - my_string z; - auto tied() { - return std::tie(x, y, z); - } -}; - -template -void test_moving_of_parsed_composite_values() { - // to compile is enough - return; - ss::parser p{"", ""}; - p.template try_next() - .template or_else( - [](auto&&) {}) - .template or_else([](auto&) {}) - .template or_else([](auto&&) {}) - .template or_object([](auto&&) {}) - .template or_else>( - [](auto&, auto&, auto&) {}); -} - -TEST_CASE("parser test the moving of parsed composite values") { - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); - test_moving_of_parsed_composite_values(); -} - -TEST_CASE("parser test error mode") { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "junk" << std::endl; - out << "junk" << std::endl; - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - p.get_next(); - CHECK_FALSE(p.valid()); - CHECK_FALSE(p.error_msg().empty()); - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - p.get_next(); - CHECK_FALSE(p.valid()); - CHECK_FALSE(p.error_msg().empty()); - } -} - -TEST_CASE("parser throw on error mode") { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "junk" << std::endl; - out << "junk" << std::endl; - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - try { - p.get_next(); - FAIL("Expected exception..."); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } - } - - { - auto [p, _] = make_parser(f.name, ","); - - REQUIRE_FALSE(p.eof()); - try { - p.get_next(); - FAIL("Expected exception..."); - } catch (const std::exception& e) { - CHECK_FALSE(std::string{e.what()}.empty()); - } - } -} - -static inline std::string no_quote(const std::string& s) { - if (!s.empty() && s[0] == '"') { - return {std::next(begin(s)), std::prev(end(s))}; - } - return s; -} - -template -void test_quote_multiline() { - unique_file_name f{"test_parser"}; - std::vector data = {{1, 2, "\"x\r\nx\nx\""}, - {3, 4, "\"y\ny\r\ny\""}, - {5, 6, "\"z\nz\""}, - {7, 8, "\"u\"\"\""}, - {9, 10, "v"}, - {11, 12, "\"w\n\""}}; - for (auto& [_, __, s] : data) { - update_if_crlf(s); - } - - make_and_write(f.name, data); - for (auto& [_, __, s] : data) { - s = no_quote(s); - if (s[0] == 'u') { - s = "u\""; - } - } - - auto [p, buff] = - make_parser, Ts...>(f.name, - ","); - - std::vector i; - - while (!p.eof()) { - auto a = p.template get_next(); - i.emplace_back(ss::to_object(a)); - } - - for (auto& [_, __, s] : i) { - update_if_crlf(s); - } - CHECK_EQ(i, data); - - auto [p_no_multiline, __] = - make_parser, Ts...>(f.name, ","); - while (!p.eof()) { - auto command = [&p_no_multiline = p_no_multiline] { - p_no_multiline.template get_next(); - }; - expect_error_on_command(p_no_multiline, command); - } -} - -TEST_CASE("parser test csv on multiple lines with quotes") { - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); - test_quote_multiline(); -} - -static inline std::string no_escape(std::string& s) { - s.erase(std::remove(begin(s), end(s), '\\'), end(s)); - return s; -} - -template -void test_escape_multiline() { - unique_file_name f{"test_parser"}; - std::vector data = {{1, 2, "x\\\nx\\\r\nx"}, - {5, 6, "z\\\nz\\\nz"}, - {7, 8, "u"}, - {3, 4, "y\\\ny\\\ny"}, - {9, 10, "v\\\\"}, - {11, 12, "w\\\n"}}; - for (auto& [_, __, s] : data) { - update_if_crlf(s); - } - - make_and_write(f.name, data); - for (auto& [_, __, s] : data) { - s = no_escape(s); - if (s == "v") { - s = "v\\"; - } - } - - ss::parser, Ts...> p{f.name, ","}; - std::vector i; - - while (!p.eof()) { - auto a = p.template get_next(); - i.emplace_back(ss::to_object(a)); - } - - for (auto& [_, __, s] : i) { - update_if_crlf(s); - } - CHECK_EQ(i, data); - - ss::parser, Ts...> p_no_multiline{f.name, ","}; - while (!p.eof()) { - auto command = [&] { - auto a = - p_no_multiline.template get_next(); - }; - expect_error_on_command(p_no_multiline, command); - } -} - -TEST_CASE("parser test csv on multiple lines with escapes") { - test_escape_multiline(); - test_escape_multiline(); - test_escape_multiline(); -} - -template -void test_quote_escape_multiline() { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "1,2,\"just\\\n\nstrings\"" << std::endl; -#ifndef _WIN32 - out << "3,4,\"just\r\nsome\\\r\n\n\\\nstrings\"" << std::endl; - out << "5,6,\"just\\\n\\\r\n\r\n\nstrings" << std::endl; -#else - out << "3,4,\"just\nsome\\\n\n\\\nstrings\"" << std::endl; - out << "5,6,\"just\\\n\\\n\n\nstrings" << std::endl; -#endif - out << "7,8,\"just strings\"" << std::endl; - out << "9,10,just strings" << std::endl; - } - size_t bad_lines = 1; - auto num_errors = 0; - - ss::parser, ss::quote<'"'>, Ts...> p{ - f.name}; - std::vector i; - - while (!p.eof()) { - try { - auto a = p.template get_next(); - if (p.valid()) { - i.emplace_back(ss::to_object(a)); - } else { - ++num_errors; - } - } catch (const std::exception& e) { - ++num_errors; - } - } - - CHECK(bad_lines == num_errors); - - std::vector data = {{1, 2, "just\n\nstrings"}, -#ifndef _WIN32 - {3, 4, "just\r\nsome\r\n\n\nstrings"}, -#else - {3, 4, "just\nsome\n\n\nstrings"}, -#endif - {9, 10, "just strings"}}; - - for (auto& [_, __, s] : i) { - update_if_crlf(s); - } - CHECK_EQ(i, data); -} - -TEST_CASE("parser test csv on multiple lines with quotes and escapes") { - test_quote_escape_multiline(); - test_quote_escape_multiline(); - test_quote_escape_multiline(); -} - -template -void test_multiline_restricted() { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "1,2,\"just\n\nstrings\"" << std::endl; -#ifndef _WIN32 - out << "3,4,\"ju\n\r\n\nnk\"" << std::endl; - out << "5,6,just\\\n\\\r\nstrings" << std::endl; -#else - out << "3,4,\"ju\n\n\nnk\"" << std::endl; - out << "5,6,just\\\n\\\nstrings" << std::endl; -#endif - out << "7,8,ju\\\n\\\n\\\nnk" << std::endl; - out << "9,10,\"just\\\n\nstrings\"" << std::endl; - out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl; - out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl; - out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl; - out << "19,20,just strings" << std::endl; - } - auto bad_lines = 15; - auto num_errors = 0; - - ss::parser, ss::quote<'"'>, ss::escape<'\\'>, - Ts...> - p{f.name, ","}; - std::vector i; - - while (!p.eof()) { - try { - auto a = p.template get_next(); - if (p.valid()) { - i.emplace_back(ss::to_object(a)); - } else { - ++num_errors; - } - } catch (const std::exception& e) { - ++num_errors; - } - } - - CHECK(bad_lines == num_errors); - - std::vector data = {{1, 2, "just\n\nstrings"}, -#ifndef _WIN32 - {5, 6, "just\n\r\nstrings"}, -#else - {5, 6, "just\n\nstrings"}, -#endif - {9, 10, "just\n\nstrings"}, - {19, 20, "just strings"}}; - - for (auto& [_, __, s] : i) { - update_if_crlf(s); - } - - if (i.size() != data.size()) { - CHECK_EQ(i.size(), data.size()); - } - - CHECK_EQ(i, data); -} - -TEST_CASE("parser test multiline restricted") { - test_multiline_restricted(); - test_multiline_restricted(); - test_multiline_restricted(); -} - -template -void test_unterminated_line_impl(const std::vector& lines, - size_t bad_line) { - unique_file_name f{"test_parser"}; - std::ofstream out{f.name}; - for (const auto& line : lines) { - out << line << std::endl; - } - out.close(); - - ss::parser p{f.name}; - size_t line = 0; - while (!p.eof()) { - auto command = [&] { p.template get_next(); }; - - if (line == bad_line) { - expect_error_on_command(p, command); - break; - } else { - command(); - CHECK(p.valid()); - ++line; - } - } -} - -template -void test_unterminated_line(const std::vector& lines, - size_t bad_line) { - test_unterminated_line_impl(lines, bad_line); - test_unterminated_line_impl(lines, bad_line); - test_unterminated_line_impl(lines, bad_line); -} - -TEST_CASE("parser test csv on multiline with errors") { - using multiline = ss::multiline_restricted<3>; - using escape = ss::escape<'\\'>; - using quote = ss::quote<'"'>; - - // unterminated escape - { - const std::vector lines{"1,2,just\\"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"1,2,just\\", "9,8,second"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", "1,2,just\\"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "1,2,just\\", - "3,4,third"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", - "1,2,just\\\nstrings\\", - "3,4,th\\\nird"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "3,4,second", - "1,2,just\\"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); - } - - { - const std::vector lines{"9,8,\\first", "3,4,second", - "1,2,jus\\t\\"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); - } - - // unterminated quote - { - const std::vector lines{"1,2,\"just"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"1,2,\"just", "9,8,second"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", "1,2,\"just"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "1,2,\"just", - "3,4,th\\,ird"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "3,4,second", - "1,2,\"just"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); - } - - { - const std::vector lines{"9,8,\"first\"", - "\"3\",4,\"sec,ond\"", - "1,2,\"ju\"\"st"}; - test_unterminated_line(lines, 2); - test_unterminated_line(lines, 2); - } - - // unterminated quote and escape - { - const std::vector lines{"1,2,\"just\\"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"1,2,\"just\\\n\\"}; - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"1,2,\"just\n\\"}; - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", "1,2,\"just\n\\"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", "1,2,\"just\n\\", - "4,3,thrid"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,f\\\nirst", "1,2,\"just\n\\", - "4,3,thrid"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,\"f\ni\nrst\"", - "1,2,\"just\n\\", "4,3,thrid"}; - test_unterminated_line(lines, 1); - } - - // multiline limmit reached escape - { - const std::vector lines{"1,2,\\\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", - "1,2,\\\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,fi\\\nrs\\\nt", - "1,2,\\\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,first", - "1,2,\\\n\\\n\\\n\\\njust", - "4,3,third"}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - // multiline limmit reached quote - { - const std::vector lines{"1,2,\"\n\n\n\n\njust\""}; - test_unterminated_line(lines, 0); - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", - "1,2,\"\n\n\n\n\njust\""}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,\"fir\nst\"", - "1,2,\"\n\n\n\n\njust\""}; - test_unterminated_line(lines, 1); - test_unterminated_line(lines, 1); - } - - // multiline limmit reached quote and escape - { - const std::vector lines{"1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 0); - } - - { - const std::vector lines{"9,8,first", - "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,fi\\\nrst", - "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,\"fi\nrst\"", - "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - } - - { - const std::vector lines{"9,8,\"fi\nr\\\nst\"", - "1,2,\"\\\n\n\\\n\\\n\\\njust"}; - test_unterminated_line(lines, 1); - } -} - -template -struct has_type; - -template -struct has_type> - : std::disjunction...> {}; - -static inline void check_size(size_t size1, size_t size2) { - CHECK_EQ(size1, size2); -} - -template -static void test_fields_impl(const std::string file_name, - const std::vector& data, - const std::vector& fields) { - using CaseType = std::tuple; - - ss::parser p{file_name, ","}; - CHECK_FALSE(p.field_exists("Unknown")); - p.use_fields(fields); - std::vector i; - - for (const auto& a : p.template iterate()) { - i.push_back(a); - } - - check_size(i.size(), data.size()); - for (size_t j = 0; j < i.size(); ++j) { - if constexpr (has_type::value) { - CHECK_EQ(std::get(i[j]), data[j].i); - } - if constexpr (has_type::value) { - CHECK_EQ(std::get(i[j]), data[j].d); - } - if constexpr (has_type::value) { - CHECK_EQ(std::get(i[j]), data[j].s); - } - } -} - -template -static void test_fields(const std::string file_name, const std::vector& data, - const std::vector& fields) { - test_fields_impl, Ts...>(file_name, data, fields); - test_fields_impl, Ts...>(file_name, data, - fields); - test_fields_impl, Ts...>(file_name, data, - fields); -} - -TEST_CASE("parser test various cases with header") { - unique_file_name f{"test_parser"}; - constexpr static auto Int = "Int"; - constexpr static auto Dbl = "Double"; - constexpr static auto Str = "String"; - using str = std::string; - - std::vector header{Int, Dbl, Str}; - - std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, - {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; - - make_and_write(f.name, data, header); - const auto& o = f.name; - const auto& d = data; - - { - ss::parser p{f.name, ","}; - std::vector i; - - for (const auto& a : p.iterate()) { - i.emplace_back(ss::to_object(a)); - } - - CHECK_NE(i, data); - } - - { - ss::parser p{f.name, ","}; - std::vector i; - - p.ignore_next(); - for (const auto& a : p.iterate()) { - i.emplace_back(ss::to_object(a)); - } - - CHECK_EQ(i, data); - } - - { - ss::parser p{f.name, ","}; - std::vector i; - - for (const auto& a : p.iterate()) { - i.emplace_back(ss::to_object(a)); - } - - CHECK_EQ(i, data); - } - - { - ss::parser p{f.name, ","}; - p.use_fields(Int, Dbl, Str); - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - CHECK_FALSE(p.field_exists("Unknown")); - - p.use_fields(Int, "Unknown"); - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - p.use_fields(Int, Int); - CHECK_FALSE(p.valid()); - } - - { - ss::parser p{f.name, ","}; - p.use_fields(Int, Dbl); - - { - auto [int_, double_] = p.get_next(); - CHECK_EQ(int_, data[0].i); - CHECK_EQ(double_, data[0].d); - } - - p.use_fields(Dbl, Int); - - { - auto [double_, int_] = p.get_next(); - CHECK_EQ(int_, data[1].i); - CHECK_EQ(double_, data[1].d); - } - - p.use_fields(Str); - - { - auto string_ = p.get_next(); - CHECK_EQ(string_, data[2].s); - } - - p.use_fields(Str, Int, Dbl); - - { - auto [string_, int_, double_] = - p.get_next(); - CHECK_EQ(double_, data[3].d); - CHECK_EQ(int_, data[3].i); - CHECK_EQ(string_, data[3].s); - } - } - - /* python used to generate permutations - import itertools - - header = {'str': 'Str', - 'double': 'Dbl', - 'int': 'Int'} - - keys = ['str', 'int', 'double'] - - for r in range (1, 3): - combinations = list(itertools.permutations(keys, r = r)) - - for combination in combinations: - template_params = [] - arg_params = [] - for type in combination: - template_params.append(type) - arg_params.append(header[type]) - call = 'testFields<' + ', '.join(template_params) + \ - '>(o, d, {' + ', '.join(arg_params) + '});' - print(call) - */ - - test_fields(o, d, {Str}); - test_fields(o, d, {Int}); - test_fields(o, d, {Dbl}); - test_fields(o, d, {Str, Int}); - test_fields(o, d, {Str, Dbl}); - test_fields(o, d, {Int, Str}); - test_fields(o, d, {Int, Dbl}); - test_fields(o, d, {Dbl, Str}); - test_fields(o, d, {Dbl, Int}); - test_fields(o, d, {Str, Int, Dbl}); - test_fields(o, d, {Str, Dbl, Int}); - test_fields(o, d, {Int, Str, Dbl}); - test_fields(o, d, {Int, Dbl, Str}); - test_fields(o, d, {Dbl, Str, Int}); - test_fields(o, d, {Dbl, Int, Str}); -} - -template -void test_invalid_fields_impl(const std::vector& lines, - const std::vector& fields) { - unique_file_name f{"test_parser"}; - std::ofstream out{f.name}; - for (const auto& line : lines) { - out << line << std::endl; - } - out.close(); - - { - // No fields specified - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields(); }; - expect_error_on_command(p, command); - } - - { - // Unknown field - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields("Unknown"); }; - expect_error_on_command(p, command); - } - - { - // Field used multiple times - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields(fields.at(0), fields.at(0)); }; - if (!fields.empty()) { - expect_error_on_command(p, command); - } - } - - { - // Mapping out of range - ss::parser p{f.name, ","}; - auto command = [&] { - p.use_fields(fields.at(0)); - p.template get_next(); - }; - if (!fields.empty()) { - expect_error_on_command(p, command); - } - } - - { - // Invalid header - ss::parser p{f.name, ","}; - auto command = [&] { p.use_fields(fields); }; - - if (!fields.empty()) { - // Pass if there are no duplicates, fail otherwise - if (std::unordered_set{fields.begin(), fields.end()} - .size() != fields.size()) { - expect_error_on_command(p, command); - } else { - command(); - CHECK(p.valid()); - if (!p.valid()) { - if constexpr (ss::setup::string_error) { - std::cout << p.error_msg() << std::endl; - } - } - } - } - } -} - -template -void test_invalid_fields(const std::vector& lines, - const std::vector& fields) { - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); - test_invalid_fields_impl(lines, fields); -} - -TEST_CASE("parser test invalid header fields usage") { - test_invalid_fields({}, {}); - - test_invalid_fields({"Int"}, {"Int"}); - test_invalid_fields({"Int", "1"}, {"Int"}); - test_invalid_fields({"Int", "1", "2"}, {"Int"}); - - test_invalid_fields({"Int,String"}, {"Int", "String"}); - test_invalid_fields({"Int,String", "1,hi"}, {"Int", "String"}); - test_invalid_fields({"Int,String", "2,hello"}, {"Int", "String"}); - - test_invalid_fields({"Int,String,Double"}, {"Int", "String", "Double"}); - test_invalid_fields({"Int,String,Double", "1,hi,2.34"}, - {"Int", "String", "Double"}); - test_invalid_fields({"Int,String,Double", "1,hi,2.34", "2,hello,3.45"}, - {"Int", "String", "Double"}); - - test_invalid_fields({"Int,Int,Int"}, {"Int", "Int", "Int"}); - test_invalid_fields({"Int,Int,Int", "1,2,3"}, {"Int", "Int", "Int"}); - - test_invalid_fields({"Int,String,Int"}, {"Int", "String", "Int"}); - test_invalid_fields({"Int,String,Int", "1,hi,3"}, {"Int", "String", "Int"}); -} - -template -void test_invalid_rows_with_header() { - unique_file_name f{"test_parser"}; - { - std::ofstream out{f.name}; - out << "Int,String,Double" << std::endl; - out << "1,line1,2.34" << std::endl; - out << "2,line2" << std::endl; - out << "3,line3,67.8" << std::endl; - out << "4,line4,67.8,9" << std::endl; - out << "5,line5,9.10" << std::endl; - out << "six,line6,10.11" << std::endl; - } - - { - ss::parser p{f.name}; - - p.use_fields("Int", "String", "Double"); - using data = std::tuple; - std::vector i; - - CHECK(p.valid()); - - while (!p.eof()) { - try { - const auto& t = p.template get_next(); - if (p.valid()) { - i.push_back(t); - } - } catch (const ss::exception&) { - continue; - } - } - - std::vector expected = {{1, "line1", 2.34}, - {3, "line3", 67.8}, - {5, "line5", 9.10}}; - CHECK_EQ(i, expected); - } - - { - ss::parser p{f.name}; - - p.use_fields("Double", "Int"); - using data = std::tuple; - std::vector i; - - CHECK(p.valid()); - - while (!p.eof()) { - try { - const auto& t = p.template get_next(); - if (p.valid()) { - i.push_back(t); - } - } catch (const ss::exception&) { - continue; - } - } - - std::vector expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}}; - CHECK_EQ(i, expected); - } - - { - ss::parser p{f.name}; - - p.use_fields("String", "Double"); - using data = std::tuple; - std::vector i; - - CHECK(p.valid()); - - while (!p.eof()) { - try { - const auto& t = p.template get_next(); - if (p.valid()) { - i.push_back(t); - } - } catch (const ss::exception&) { - continue; - } - } - - std::vector expected = {{"line1", 2.34}, - {"line3", 67.8}, - {"line5", 9.10}, - {"line6", 10.11}}; - CHECK_EQ(i, expected); - } -} - -TEST_CASE("parser test invalid rows with header") { - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); - test_invalid_rows_with_header(); -} - -template -void test_ignore_empty_impl(const std::vector& data) { - unique_file_name f{"test_parser"}; - make_and_write(f.name, data); - - std::vector expected; - for (const auto& d : data) { - if (d.s != X::empty) { - expected.push_back(d); - } - } - - { - ss::parser p{f.name, ","}; - - std::vector i; - for (const auto& a : p.template iterate()) { - i.push_back(a); - } - - CHECK_EQ(i, expected); - } - - { - ss::parser p{f.name, ","}; - std::vector i; - size_t n = 0; - while (!p.eof()) { - try { - ++n; - const auto& a = p.template get_next(); - if (data.at(n - 1).s == X::empty) { - CHECK_FALSE(p.valid()); - continue; - } - i.push_back(a); - } catch (...) { - CHECK_EQ(data.at(n - 1).s, X::empty); - } - } - - CHECK_EQ(i, expected); - } -} - -template -void test_ignore_empty(const std::vector& data) { - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); - test_ignore_empty_impl(data); -} - -TEST_CASE("parser test various cases with empty lines") { - test_ignore_empty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); - - test_ignore_empty( - {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); - - test_ignore_empty( - {{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, "x"}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); - - test_ignore_empty( - {{1, 2, X::empty}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); - - test_ignore_empty( - {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, "x"}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); - - test_ignore_empty({{1, 2, X::empty}, - {3, 4, X::empty}, - {9, 10, X::empty}, - {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, "x"}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, X::empty}}); - - test_ignore_empty( - {{1, 2, X::empty}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, "w"}}); - - test_ignore_empty({{11, 12, X::empty}}); - - test_ignore_empty({}); -} diff --git a/test/test_parser1.hpp b/test/test_parser1.hpp new file mode 100644 index 0000000..a68a939 --- /dev/null +++ b/test/test_parser1.hpp @@ -0,0 +1,151 @@ +#pragma once + +#include "test_helpers.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { +[[maybe_unused]] void replace_all(std::string& s, const std::string& from, + const std::string& to) { + if (from.empty()) return; + size_t start_pos = 0; + while ((start_pos = s.find(from, start_pos)) != std::string::npos) { + s.replace(start_pos, from.length(), to); + start_pos += to.length(); + } +} + +template +void expect_error_on_command(ss::parser& p, + const std::function command) { + if (ss::setup::throw_on_error) { + try { + command(); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } + } else { + command(); + CHECK(!p.valid()); + if constexpr (ss::setup::string_error) { + CHECK_FALSE(p.error_msg().empty()); + } + } +} + +[[maybe_unused]] void update_if_crlf(std::string& s) { +#ifdef _WIN32 + replace_all(s, "\r\n", "\n"); +#else + (void)(s); +#endif +} + +struct X { + constexpr static auto delim = ","; + constexpr static auto empty = "_EMPTY_"; + int i; + double d; + std::string s; + + std::string to_string() const { + if (s == empty) { + return ""; + } + + return std::to_string(i) + .append(delim) + .append(std::to_string(d)) + .append(delim) + .append(s); + } + auto tied() const { + return std::tie(i, d, s); + } +}; + +template +std::enable_if_t, bool> operator==(const T& lhs, + const T& rhs) { + return lhs.tied() == rhs.tied(); +} + +template +static void make_and_write(const std::string& file_name, + const std::vector& data, + const std::vector& header = {}) { + std::ofstream out{file_name}; + +#ifdef _WIN32 + std::vector new_lines = {"\n"}; +#else + std::vector new_lines = {"\n", "\r\n"}; +#endif + + for (const auto& i : header) { + if (&i != &header.front()) { + out << T::delim; + } + out << i; + } + + if (!header.empty()) { + out << new_lines.front(); + } + + for (size_t i = 0; i < data.size(); ++i) { + out << data[i].to_string() << new_lines[i % new_lines.size()]; + } +} + +std::string make_buffer(const std::string& file_name) { + std::ifstream in{file_name, std::ios::binary}; + std::string tmp; + std::string out; + out.reserve(sizeof(out) + 1); + while (in >> tmp) { + out += tmp; + std::string matches = "\n\r\t "; + while (std::any_of(matches.begin(), matches.end(), + [&](auto c) { return in.peek() == c; })) { + if (in.peek() == '\r') { + out += "\r\n"; + in.ignore(2); + } else { + out += std::string{static_cast(in.peek())}; + in.ignore(1); + } + } + } + return out; +} + +template +std::tuple, std::string> make_parser( + const std::string& file_name, const std::string& delim = "") { + if (buffer_mode) { + auto buffer = make_buffer(file_name); + if (delim.empty()) { + return {ss::parser{buffer.data(), buffer.size()}, + std::move(buffer)}; + } else { + return {ss::parser{buffer.data(), buffer.size(), delim}, + std::move(buffer)}; + } + } else { + if (delim.empty()) { + return {ss::parser{file_name}, std::string{}}; + } else { + return {ss::parser{file_name, delim}, std::string{}}; + } + } +} + +} /* namespace */ diff --git a/test/test_parser1_1.cpp b/test/test_parser1_1.cpp new file mode 100644 index 0000000..60787f2 --- /dev/null +++ b/test/test_parser1_1.cpp @@ -0,0 +1,534 @@ +#include "test_parser1.hpp" + +TEST_CASE("test file not found") { + unique_file_name f{"test_parser"}; + + { + ss::parser p{f.name, ","}; + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + CHECK_FALSE(p.valid()); + } + + try { + ss::parser p{f.name, ","}; + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } +} + +template +void test_various_cases() { + unique_file_name f{"test_parser"}; + std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, + {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; + make_and_write(f.name, data); + auto csv_data_buffer = make_buffer(f.name); + { + auto [p, _] = make_parser(f.name, ","); + ss::parser p0{std::move(p)}; + p = std::move(p0); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + auto move_rotate = [&p = p, &p0 = p0] { + auto p1 = std::move(p); + p0 = std::move(p1); + p = std::move(p0); + }; + + while (!p.eof()) { + move_rotate(); + auto a = p.template get_next(); + i.emplace_back(ss::to_object(a)); + } + + for (const auto& a : p2.template iterate()) { + i2.emplace_back(ss::to_object(a)); + } + + CHECK_EQ(i, data); + CHECK_EQ(i2, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + auto [p3, ___] = make_parser(f.name, ","); + std::vector i3; + + std::vector expected = {std::begin(data) + 1, std::end(data)}; + using tup = std::tuple; + + p.ignore_next(); + while (!p.eof()) { + auto a = p.template get_next(); + i.emplace_back(ss::to_object(a)); + } + + p2.ignore_next(); + for (const auto& a : p2.template iterate()) { + i2.emplace_back(ss::to_object(a)); + } + + p3.ignore_next(); + for (auto it = p3.template iterate().begin(); + it != p3.template iterate().end(); ++it) { + i3.emplace_back(ss::to_object(*it)); + } + + CHECK_EQ(i, expected); + CHECK_EQ(i2, expected); + CHECK_EQ(i3, expected); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + while (!p.eof()) { + i.push_back(p.template get_object()); + } + + for (auto&& a : + p2.template iterate_object()) { + i2.push_back(std::move(a)); + } + + CHECK_EQ(i, data); + CHECK_EQ(i2, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + for (auto&& a : + p.template iterate_object()) { + i.push_back(std::move(a)); + } + + CHECK_EQ(i, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + using tup = std::tuple; + while (!p.eof()) { + i.push_back(p.template get_object()); + } + + for (auto it = p2.template iterate_object().begin(); + it != p2.template iterate_object().end(); it++) { + i2.push_back({it->i, it->d, it->s}); + } + + CHECK_EQ(i, data); + CHECK_EQ(i2, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + using tup = std::tuple; + for (auto&& a : p.template iterate_object()) { + i.push_back(std::move(a)); + } + + CHECK_EQ(i, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + while (!p.eof()) { + i.push_back(p.template get_next()); + } + + CHECK_EQ(i, data); + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + for (auto&& a : p.template iterate()) { + i.push_back(std::move(a)); + } + + CHECK_EQ(i, data); + } + + { + constexpr int excluded = 3; + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + while (!p.eof()) { + try { + auto a = p.template get_object, double, + std::string>(); + if (p.valid()) { + i.push_back(a); + } + } catch (...) { + // ignore + }; + } + + if (!ss::setup::throw_on_error) { + for (auto&& a : p2.template iterate_object, + double, std::string>()) { + if (p2.valid()) { + i2.push_back(std::move(a)); + } + } + } + + std::vector expected; + for (auto& x : data) { + if (x.i != excluded) { + expected.push_back(x); + } + } + + std::copy_if(data.begin(), data.end(), expected.begin(), + [&](const X& x) { return x.i != excluded; }); + CHECK_EQ(i, expected); + + if (!ss::setup::throw_on_error) { + CHECK_EQ(i2, expected); + } + } + + { + auto [p, _] = make_parser(f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(f.name, ","); + std::vector i2; + + while (!p.eof()) { + try { + auto a = p.template get_object, double, + std::string>(); + if (p.valid()) { + i.push_back(a); + } + } catch (...) { + // ignore + } + } + + if (!ss::setup::throw_on_error) { + for (auto&& a : p2.template iterate_object, + double, std::string>()) { + if (p2.valid()) { + i2.push_back(std::move(a)); + } + } + } + + std::vector expected = {{3, 4, "y"}}; + CHECK_EQ(i, expected); + if (!ss::setup::throw_on_error) { + CHECK_EQ(i2, expected); + } + } + + { + unique_file_name empty_f{"test_parser"}; + std::vector empty_data = {}; + + make_and_write(empty_f.name, empty_data); + + auto [p, _] = make_parser(empty_f.name, ","); + std::vector i; + + auto [p2, __] = make_parser(empty_f.name, ","); + std::vector i2; + + while (!p.eof()) { + i.push_back(p.template get_next()); + } + + for (auto&& a : p2.template iterate()) { + i2.push_back(std::move(a)); + } + + CHECK(i.empty()); + CHECK(i2.empty()); + } +} + +TEST_CASE("parser test various cases") { + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); + test_various_cases(); +} + +using test_tuple = std::tuple; +struct test_struct { + int i; + double d; + char c; + auto tied() { + return std::tie(i, d, c); + } +}; + +static inline void expect_test_struct(const test_struct&) { +} + +template +void test_composite_conversion() { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + for (auto& i : + {"10,a,11.1", "10,20,11.1", "junk", "10,11.1", "1,11.1,a", "junk", + "10,junk", "11,junk", "10,11.1,c", "10,20", "10,22.2,f"}) { + out << i << std::endl; + } + } + + auto [p, _] = make_parser(f.name, ","); + auto fail = [] { FAIL(""); }; + auto expect_error = [](auto error) { CHECK(!error.empty()); }; + auto ignore_error = [] {}; + + REQUIRE(p.valid()); + REQUIRE_FALSE(p.eof()); + + { + constexpr static auto expectedData = std::tuple{10, 'a', 11.1}; + + auto [d1, d2, d3, d4] = + p.template try_next(fail) + .template or_else(fail) + .template or_else( + [&](auto&& data) { CHECK_EQ(data, expectedData); }) + .on_error(fail) + .template or_else(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE(d3); + REQUIRE_FALSE(d4); + CHECK_EQ(*d3, expectedData); + } + + { + REQUIRE(!p.eof()); + constexpr static auto expectedData = std::tuple{10, 20, 11.1}; + + auto [d1, d2, d3, d4] = + p.template try_next( + [&](auto& i1, auto i2, double d) { + CHECK_EQ(std::tie(i1, i2, d), expectedData); + }) + .on_error(fail) + .template or_object(fail) + .on_error(fail) + .template or_else(fail) + .on_error(fail) + .template or_else(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + REQUIRE_FALSE(d3); + REQUIRE_FALSE(d4); + CHECK_EQ(*d1, expectedData); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2, d3, d4, d5] = + p.template try_object(fail) + .on_error(expect_error) + .template or_else(fail) + .template or_else(fail) + .template or_else(fail) + .template or_else(fail) + .values(); + + REQUIRE_FALSE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE_FALSE(d3); + REQUIRE_FALSE(d4); + REQUIRE_FALSE(d5); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2] = + p.template try_next([](auto& i, auto& d) { + REQUIRE_EQ(std::tie(i, d), std::tuple{10, 11.1}); + }) + .template or_else([](auto&, auto&) { FAIL(""); }) + .values(); + + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2] = + p.template try_next([](auto&, auto&) { FAIL(""); }) + .template or_else(expect_test_struct) + .values(); + + REQUIRE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE(d2); + CHECK_EQ(d2->tied(), std::tuple{1, 11.1, 'a'}); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2, d3, d4, d5] = + p.template try_next(fail) + .template or_object() + .template or_else(expect_test_struct) + .template or_else(fail) + .template or_else>(fail) + .on_error(ignore_error) + .on_error(expect_error) + .values(); + + REQUIRE_FALSE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE_FALSE(d3); + REQUIRE_FALSE(d4); + REQUIRE_FALSE(d5); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2] = + p.template try_next>() + .on_error(ignore_error) + .on_error(fail) + .template or_else>(fail) + .on_error(ignore_error) + .on_error(fail) + .on_error(ignore_error) + .values(); + + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + CHECK_EQ(*d1, std::tuple{10, std::nullopt}); + } + + { + REQUIRE_FALSE(p.eof()); + + auto [d1, d2] = + p.template try_next>() + .on_error(fail) + .template or_else>(fail) + .on_error(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + CHECK_EQ(*d1, std::tuple{11, std::variant{"junk"}}); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2] = p.template try_object() + .template or_else(fail) + .values(); + REQUIRE(p.valid()); + REQUIRE(d1); + REQUIRE_FALSE(d2); + CHECK_EQ(d1->tied(), std::tuple{10, 11.1, 'c'}); + } + + { + REQUIRE_FALSE(p.eof()); + + auto [d1, d2, d3, d4] = + p.template try_next([] { return false; }) + .template or_else([](auto&) { return false; }) + .template or_else() + .template or_else(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE(d3); + REQUIRE_FALSE(d4); + CHECK_EQ(d3.value(), std::tuple{10, 20}); + } + + { + REQUIRE(!p.eof()); + + auto [d1, d2, d3, d4] = + p.template try_object( + [] { return false; }) + .template or_else([](auto&) { return false; }) + .template or_object() + .template or_else(fail) + .values(); + + REQUIRE(p.valid()); + REQUIRE_FALSE(d1); + REQUIRE_FALSE(d2); + REQUIRE(d3); + REQUIRE_FALSE(d4); + CHECK_EQ(d3->tied(), std::tuple{10, 22.2, 'f'}); + } + + CHECK(p.eof()); +} + +// various scenarios +TEST_CASE("parser test composite conversion") { + test_composite_conversion(); + test_composite_conversion(); +} diff --git a/test/test_parser1_2.cpp b/test/test_parser1_2.cpp new file mode 100644 index 0000000..6ca7a0b --- /dev/null +++ b/test/test_parser1_2.cpp @@ -0,0 +1,309 @@ +#include "test_parser1.hpp" + +struct my_string { + char* data{nullptr}; + + my_string() = default; + + ~my_string() { + delete[] data; + } + + // make sure no object is copied + my_string(const my_string&) = delete; + my_string& operator=(const my_string&) = delete; + + my_string(my_string&& other) : data{other.data} { + other.data = nullptr; + } + + my_string& operator=(my_string&& other) { + data = other.data; + return *this; + } +}; + +template <> +inline bool ss::extract(const char* begin, const char* end, my_string& s) { + size_t size = end - begin; + s.data = new char[size + 1]; + strncpy(s.data, begin, size); + s.data[size] = '\0'; + return true; +} + +struct xyz { + my_string x; + my_string y; + my_string z; + auto tied() { + return std::tie(x, y, z); + } +}; + +template +void test_moving_of_parsed_composite_values() { + // to compile is enough + return; + auto [p, _] = make_parser("", ""); + p.template try_next() + .template or_else( + [](auto&&) {}) + .template or_else([](auto&) {}) + .template or_else([](auto&&) {}) + .template or_object([](auto&&) {}) + .template or_else>( + [](auto&, auto&, auto&) {}); +} + +TEST_CASE("parser test the moving of parsed composite values") { + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); + test_moving_of_parsed_composite_values(); +} + +TEST_CASE("parser test error mode") { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "junk" << std::endl; + out << "junk" << std::endl; + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + p.get_next(); + CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + p.get_next(); + CHECK_FALSE(p.valid()); + CHECK_FALSE(p.error_msg().empty()); + } +} + +TEST_CASE("parser throw on error mode") { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "junk" << std::endl; + out << "junk" << std::endl; + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + try { + p.get_next(); + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } + } + + { + auto [p, _] = make_parser(f.name, ","); + + REQUIRE_FALSE(p.eof()); + try { + p.get_next(); + FAIL("Expected exception..."); + } catch (const std::exception& e) { + CHECK_FALSE(std::string{e.what()}.empty()); + } + } +} + +static inline std::string no_quote(const std::string& s) { + if (!s.empty() && s[0] == '"') { + return {std::next(begin(s)), std::prev(end(s))}; + } + return s; +} + +template +void test_quote_multiline() { + unique_file_name f{"test_parser"}; + std::vector data = {{1, 2, "\"x\r\nx\nx\""}, + {3, 4, "\"y\ny\r\ny\""}, + {5, 6, "\"z\nz\""}, + {7, 8, "\"u\"\"\""}, + {9, 10, "v"}, + {11, 12, "\"w\n\""}}; + for (auto& [_, __, s] : data) { + update_if_crlf(s); + } + + make_and_write(f.name, data); + for (auto& [_, __, s] : data) { + s = no_quote(s); + if (s[0] == 'u') { + s = "u\""; + } + } + + auto [p, _] = + make_parser, Ts...>(f.name, + ","); + + std::vector i; + + while (!p.eof()) { + auto a = p.template get_next(); + i.emplace_back(ss::to_object(a)); + } + + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } + CHECK_EQ(i, data); + + auto [p_no_multiline, __] = + make_parser, Ts...>(f.name, ","); + while (!p.eof()) { + auto command = [&p_no_multiline = p_no_multiline] { + p_no_multiline.template get_next(); + }; + expect_error_on_command(p_no_multiline, command); + } +} + +TEST_CASE("parser test csv on multiple lines with quotes") { + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); + test_quote_multiline(); +} + +static inline std::string no_escape(std::string& s) { + s.erase(std::remove(begin(s), end(s), '\\'), end(s)); + return s; +} + +template +void test_escape_multiline() { + unique_file_name f{"test_parser"}; + std::vector data = {{1, 2, "x\\\nx\\\r\nx"}, + {5, 6, "z\\\nz\\\nz"}, + {7, 8, "u"}, + {3, 4, "y\\\ny\\\ny"}, + {9, 10, "v\\\\"}, + {11, 12, "w\\\n"}}; + for (auto& [_, __, s] : data) { + update_if_crlf(s); + } + + make_and_write(f.name, data); + for (auto& [_, __, s] : data) { + s = no_escape(s); + if (s == "v") { + s = "v\\"; + } + } + + auto [p, _] = + make_parser, Ts...>(f.name, + ","); + std::vector i; + + while (!p.eof()) { + auto a = p.template get_next(); + i.emplace_back(ss::to_object(a)); + } + + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } + CHECK_EQ(i, data); + + auto [p_no_multiline, __] = + make_parser, Ts...>(f.name, ","); + while (!p.eof()) { + auto command = [&p_no_multiline = p_no_multiline] { + auto a = + p_no_multiline.template get_next(); + }; + expect_error_on_command(p_no_multiline, command); + } +} + +TEST_CASE("parser test csv on multiple lines with escapes") { + test_escape_multiline(); + test_escape_multiline(); + test_escape_multiline(); + test_escape_multiline(); + test_escape_multiline(); + test_escape_multiline(); +} + +template +void test_quote_escape_multiline() { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "1,2,\"just\\\n\nstrings\"" << std::endl; +#ifndef _WIN32 + out << "3,4,\"just\r\nsome\\\r\n\n\\\nstrings\"" << std::endl; + out << "5,6,\"just\\\n\\\r\n\r\n\nstrings" << std::endl; +#else + out << "3,4,\"just\nsome\\\n\n\\\nstrings\"" << std::endl; + out << "5,6,\"just\\\n\\\n\n\nstrings" << std::endl; +#endif + out << "7,8,\"just strings\"" << std::endl; + out << "9,10,just strings" << std::endl; + } + size_t bad_lines = 1; + auto num_errors = 0; + + auto [p, _] = make_parser, + ss::quote<'"'>, Ts...>(f.name); + std::vector i; + + while (!p.eof()) { + try { + auto a = p.template get_next(); + if (p.valid()) { + i.emplace_back(ss::to_object(a)); + } else { + ++num_errors; + } + } catch (const std::exception& e) { + ++num_errors; + } + } + + CHECK(bad_lines == num_errors); + + std::vector data = {{1, 2, "just\n\nstrings"}, +#ifndef _WIN32 + {3, 4, "just\r\nsome\r\n\n\nstrings"}, +#else + {3, 4, "just\nsome\n\n\nstrings"}, +#endif + {9, 10, "just strings"}}; + + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } + CHECK_EQ(i, data); +} + +TEST_CASE("parser test csv on multiple lines with quotes and escapes") { + test_quote_escape_multiline(); + test_quote_escape_multiline(); + test_quote_escape_multiline(); + test_quote_escape_multiline(); + test_quote_escape_multiline(); + test_quote_escape_multiline(); +} diff --git a/test/test_parser1_3.cpp b/test/test_parser1_3.cpp new file mode 100644 index 0000000..6885eb1 --- /dev/null +++ b/test/test_parser1_3.cpp @@ -0,0 +1,515 @@ +#include "test_parser1.hpp" + +template +void test_multiline_restricted() { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "1,2,\"just\n\nstrings\"" << std::endl; +#ifndef _WIN32 + out << "3,4,\"ju\n\r\n\nnk\"" << std::endl; + out << "5,6,just\\\n\\\r\nstrings" << std::endl; +#else + out << "3,4,\"ju\n\n\nnk\"" << std::endl; + out << "5,6,just\\\n\\\nstrings" << std::endl; +#endif + out << "7,8,ju\\\n\\\n\\\nnk" << std::endl; + out << "9,10,\"just\\\n\nstrings\"" << std::endl; + out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl; + out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl; + out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl; + out << "19,20,just strings" << std::endl; + } + auto bad_lines = 15; + auto num_errors = 0; + + ss::parser, ss::quote<'"'>, ss::escape<'\\'>, + Ts...> + p{f.name, ","}; + std::vector i; + + while (!p.eof()) { + try { + auto a = p.template get_next(); + if (p.valid()) { + i.emplace_back(ss::to_object(a)); + } else { + ++num_errors; + } + } catch (const std::exception& e) { + ++num_errors; + } + } + + CHECK(bad_lines == num_errors); + + std::vector data = {{1, 2, "just\n\nstrings"}, +#ifndef _WIN32 + {5, 6, "just\n\r\nstrings"}, +#else + {5, 6, "just\n\nstrings"}, +#endif + {9, 10, "just\n\nstrings"}, + {19, 20, "just strings"}}; + + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } + + if (i.size() != data.size()) { + CHECK_EQ(i.size(), data.size()); + } + + CHECK_EQ(i, data); +} + +TEST_CASE("parser test multiline restricted") { + test_multiline_restricted(); + test_multiline_restricted(); + test_multiline_restricted(); +} + +template +void test_unterminated_line_impl(const std::vector& lines, + size_t bad_line) { + unique_file_name f{"test_parser"}; + std::ofstream out{f.name}; + for (const auto& line : lines) { + out << line << std::endl; + } + out.close(); + + ss::parser p{f.name}; + size_t line = 0; + while (!p.eof()) { + auto command = [&] { p.template get_next(); }; + + if (line == bad_line) { + expect_error_on_command(p, command); + break; + } else { + command(); + CHECK(p.valid()); + ++line; + } + } +} + +template +void test_unterminated_line(const std::vector& lines, + size_t bad_line) { + test_unterminated_line_impl(lines, bad_line); + test_unterminated_line_impl(lines, bad_line); + test_unterminated_line_impl(lines, bad_line); +} + +TEST_CASE("parser test csv on multiline with errors") { + using multiline = ss::multiline_restricted<3>; + using escape = ss::escape<'\\'>; + using quote = ss::quote<'"'>; + + // unterminated escape + { + const std::vector lines{"1,2,just\\"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"1,2,just\\", "9,8,second"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", "1,2,just\\"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "1,2,just\\", + "3,4,third"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", + "1,2,just\\\nstrings\\", + "3,4,th\\\nird"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "3,4,second", + "1,2,just\\"}; + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + } + + { + const std::vector lines{"9,8,\\first", "3,4,second", + "1,2,jus\\t\\"}; + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + } + + // unterminated quote + { + const std::vector lines{"1,2,\"just"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"1,2,\"just", "9,8,second"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", "1,2,\"just"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "1,2,\"just", + "3,4,th\\,ird"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "3,4,second", + "1,2,\"just"}; + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + } + + { + const std::vector lines{"9,8,\"first\"", + "\"3\",4,\"sec,ond\"", + "1,2,\"ju\"\"st"}; + test_unterminated_line(lines, 2); + test_unterminated_line(lines, 2); + } + + // unterminated quote and escape + { + const std::vector lines{"1,2,\"just\\"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"1,2,\"just\\\n\\"}; + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"1,2,\"just\n\\"}; + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", "1,2,\"just\n\\"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", "1,2,\"just\n\\", + "4,3,thrid"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,f\\\nirst", "1,2,\"just\n\\", + "4,3,thrid"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,\"f\ni\nrst\"", + "1,2,\"just\n\\", "4,3,thrid"}; + test_unterminated_line(lines, 1); + } + + // multiline limmit reached escape + { + const std::vector lines{"1,2,\\\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", + "1,2,\\\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,fi\\\nrs\\\nt", + "1,2,\\\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,first", + "1,2,\\\n\\\n\\\n\\\njust", + "4,3,third"}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + // multiline limmit reached quote + { + const std::vector lines{"1,2,\"\n\n\n\n\njust\""}; + test_unterminated_line(lines, 0); + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", + "1,2,\"\n\n\n\n\njust\""}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,\"fir\nst\"", + "1,2,\"\n\n\n\n\njust\""}; + test_unterminated_line(lines, 1); + test_unterminated_line(lines, 1); + } + + // multiline limmit reached quote and escape + { + const std::vector lines{"1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 0); + } + + { + const std::vector lines{"9,8,first", + "1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,fi\\\nrst", + "1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,\"fi\nrst\"", + "1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + } + + { + const std::vector lines{"9,8,\"fi\nr\\\nst\"", + "1,2,\"\\\n\n\\\n\\\n\\\njust"}; + test_unterminated_line(lines, 1); + } +} + +template +struct has_type; + +template +struct has_type> + : std::disjunction...> {}; + +static inline void check_size(size_t size1, size_t size2) { + CHECK_EQ(size1, size2); +} + +template +static void test_fields_impl(const std::string file_name, + const std::vector& data, + const std::vector& fields) { + using CaseType = std::tuple; + + ss::parser p{file_name, ","}; + CHECK_FALSE(p.field_exists("Unknown")); + p.use_fields(fields); + std::vector i; + + for (const auto& a : p.template iterate()) { + i.push_back(a); + } + + check_size(i.size(), data.size()); + for (size_t j = 0; j < i.size(); ++j) { + if constexpr (has_type::value) { + CHECK_EQ(std::get(i[j]), data[j].i); + } + if constexpr (has_type::value) { + CHECK_EQ(std::get(i[j]), data[j].d); + } + if constexpr (has_type::value) { + CHECK_EQ(std::get(i[j]), data[j].s); + } + } +} + +template +static void test_fields(const std::string file_name, const std::vector& data, + const std::vector& fields) { + test_fields_impl, Ts...>(file_name, data, fields); + test_fields_impl, Ts...>(file_name, data, + fields); + test_fields_impl, Ts...>(file_name, data, + fields); +} + +TEST_CASE("parser test various cases with header") { + unique_file_name f{"test_parser"}; + constexpr static auto Int = "Int"; + constexpr static auto Dbl = "Double"; + constexpr static auto Str = "String"; + using str = std::string; + + std::vector header{Int, Dbl, Str}; + + std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, + {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; + + make_and_write(f.name, data, header); + const auto& o = f.name; + const auto& d = data; + + { + ss::parser p{f.name, ","}; + std::vector i; + + for (const auto& a : p.iterate()) { + i.emplace_back(ss::to_object(a)); + } + + CHECK_NE(i, data); + } + + { + ss::parser p{f.name, ","}; + std::vector i; + + p.ignore_next(); + for (const auto& a : p.iterate()) { + i.emplace_back(ss::to_object(a)); + } + + CHECK_EQ(i, data); + } + + { + ss::parser p{f.name, ","}; + std::vector i; + + for (const auto& a : p.iterate()) { + i.emplace_back(ss::to_object(a)); + } + + CHECK_EQ(i, data); + } + + { + ss::parser p{f.name, ","}; + p.use_fields(Int, Dbl, Str); + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + CHECK_FALSE(p.field_exists("Unknown")); + + p.use_fields(Int, "Unknown"); + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + p.use_fields(Int, Int); + CHECK_FALSE(p.valid()); + } + + { + ss::parser p{f.name, ","}; + p.use_fields(Int, Dbl); + + { + auto [int_, double_] = p.get_next(); + CHECK_EQ(int_, data[0].i); + CHECK_EQ(double_, data[0].d); + } + + p.use_fields(Dbl, Int); + + { + auto [double_, int_] = p.get_next(); + CHECK_EQ(int_, data[1].i); + CHECK_EQ(double_, data[1].d); + } + + p.use_fields(Str); + + { + auto string_ = p.get_next(); + CHECK_EQ(string_, data[2].s); + } + + p.use_fields(Str, Int, Dbl); + + { + auto [string_, int_, double_] = + p.get_next(); + CHECK_EQ(double_, data[3].d); + CHECK_EQ(int_, data[3].i); + CHECK_EQ(string_, data[3].s); + } + } + + /* python used to generate permutations + import itertools + + header = {'str': 'Str', + 'double': 'Dbl', + 'int': 'Int'} + + keys = ['str', 'int', 'double'] + + for r in range (1, 3): + combinations = list(itertools.permutations(keys, r = r)) + + for combination in combinations: + template_params = [] + arg_params = [] + for type in combination: + template_params.append(type) + arg_params.append(header[type]) + call = 'testFields<' + ', '.join(template_params) + \ + '>(o, d, {' + ', '.join(arg_params) + '});' + print(call) + */ + + test_fields(o, d, {Str}); + test_fields(o, d, {Int}); + test_fields(o, d, {Dbl}); + test_fields(o, d, {Str, Int}); + test_fields(o, d, {Str, Dbl}); + test_fields(o, d, {Int, Str}); + test_fields(o, d, {Int, Dbl}); + test_fields(o, d, {Dbl, Str}); + test_fields(o, d, {Dbl, Int}); + test_fields(o, d, {Str, Int, Dbl}); + test_fields(o, d, {Str, Dbl, Int}); + test_fields(o, d, {Int, Str, Dbl}); + test_fields(o, d, {Int, Dbl, Str}); + test_fields(o, d, {Dbl, Str, Int}); + test_fields(o, d, {Dbl, Int, Str}); +} diff --git a/test/test_parser1_4.cpp b/test/test_parser1_4.cpp new file mode 100644 index 0000000..42a385b --- /dev/null +++ b/test/test_parser1_4.cpp @@ -0,0 +1,291 @@ +#include "test_parser1.hpp" + +template +void test_invalid_fields_impl(const std::vector& lines, + const std::vector& fields) { + unique_file_name f{"test_parser"}; + std::ofstream out{f.name}; + for (const auto& line : lines) { + out << line << std::endl; + } + out.close(); + + { + // No fields specified + ss::parser p{f.name, ","}; + auto command = [&] { p.use_fields(); }; + expect_error_on_command(p, command); + } + + { + // Unknown field + ss::parser p{f.name, ","}; + auto command = [&] { p.use_fields("Unknown"); }; + expect_error_on_command(p, command); + } + + { + // Field used multiple times + ss::parser p{f.name, ","}; + auto command = [&] { p.use_fields(fields.at(0), fields.at(0)); }; + if (!fields.empty()) { + expect_error_on_command(p, command); + } + } + + { + // Mapping out of range + ss::parser p{f.name, ","}; + auto command = [&] { + p.use_fields(fields.at(0)); + p.template get_next(); + }; + if (!fields.empty()) { + expect_error_on_command(p, command); + } + } + + { + // Invalid header + ss::parser p{f.name, ","}; + auto command = [&] { p.use_fields(fields); }; + + if (!fields.empty()) { + // Pass if there are no duplicates, fail otherwise + if (std::unordered_set{fields.begin(), fields.end()} + .size() != fields.size()) { + expect_error_on_command(p, command); + } else { + command(); + CHECK(p.valid()); + if (!p.valid()) { + if constexpr (ss::setup::string_error) { + std::cout << p.error_msg() << std::endl; + } + } + } + } + } +} + +template +void test_invalid_fields(const std::vector& lines, + const std::vector& fields) { + test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); + test_invalid_fields_impl(lines, fields); +} + +TEST_CASE("parser test invalid header fields usage") { + test_invalid_fields({}, {}); + + test_invalid_fields({"Int"}, {"Int"}); + test_invalid_fields({"Int", "1"}, {"Int"}); + test_invalid_fields({"Int", "1", "2"}, {"Int"}); + + test_invalid_fields({"Int,String"}, {"Int", "String"}); + test_invalid_fields({"Int,String", "1,hi"}, {"Int", "String"}); + test_invalid_fields({"Int,String", "2,hello"}, {"Int", "String"}); + + test_invalid_fields({"Int,String,Double"}, {"Int", "String", "Double"}); + test_invalid_fields({"Int,String,Double", "1,hi,2.34"}, + {"Int", "String", "Double"}); + test_invalid_fields({"Int,String,Double", "1,hi,2.34", "2,hello,3.45"}, + {"Int", "String", "Double"}); + + test_invalid_fields({"Int,Int,Int"}, {"Int", "Int", "Int"}); + test_invalid_fields({"Int,Int,Int", "1,2,3"}, {"Int", "Int", "Int"}); + + test_invalid_fields({"Int,String,Int"}, {"Int", "String", "Int"}); + test_invalid_fields({"Int,String,Int", "1,hi,3"}, {"Int", "String", "Int"}); +} + +template +void test_invalid_rows_with_header() { + unique_file_name f{"test_parser"}; + { + std::ofstream out{f.name}; + out << "Int,String,Double" << std::endl; + out << "1,line1,2.34" << std::endl; + out << "2,line2" << std::endl; + out << "3,line3,67.8" << std::endl; + out << "4,line4,67.8,9" << std::endl; + out << "5,line5,9.10" << std::endl; + out << "six,line6,10.11" << std::endl; + } + + { + ss::parser p{f.name}; + + p.use_fields("Int", "String", "Double"); + using data = std::tuple; + std::vector i; + + CHECK(p.valid()); + + while (!p.eof()) { + try { + const auto& t = p.template get_next(); + if (p.valid()) { + i.push_back(t); + } + } catch (const ss::exception&) { + continue; + } + } + + std::vector expected = {{1, "line1", 2.34}, + {3, "line3", 67.8}, + {5, "line5", 9.10}}; + CHECK_EQ(i, expected); + } + + { + ss::parser p{f.name}; + + p.use_fields("Double", "Int"); + using data = std::tuple; + std::vector i; + + CHECK(p.valid()); + + while (!p.eof()) { + try { + const auto& t = p.template get_next(); + if (p.valid()) { + i.push_back(t); + } + } catch (const ss::exception&) { + continue; + } + } + + std::vector expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}}; + CHECK_EQ(i, expected); + } + + { + ss::parser p{f.name}; + + p.use_fields("String", "Double"); + using data = std::tuple; + std::vector i; + + CHECK(p.valid()); + + while (!p.eof()) { + try { + const auto& t = p.template get_next(); + if (p.valid()) { + i.push_back(t); + } + } catch (const ss::exception&) { + continue; + } + } + + std::vector expected = {{"line1", 2.34}, + {"line3", 67.8}, + {"line5", 9.10}, + {"line6", 10.11}}; + CHECK_EQ(i, expected); + } +} + +TEST_CASE("parser test invalid rows with header") { + test_invalid_rows_with_header(); + test_invalid_rows_with_header(); + test_invalid_rows_with_header(); +} + +template +void test_ignore_empty_impl(const std::vector& data) { + unique_file_name f{"test_parser"}; + make_and_write(f.name, data); + + std::vector expected; + for (const auto& d : data) { + if (d.s != X::empty) { + expected.push_back(d); + } + } + + { + ss::parser p{f.name, ","}; + + std::vector i; + for (const auto& a : p.template iterate()) { + i.push_back(a); + } + + CHECK_EQ(i, expected); + } + + { + ss::parser p{f.name, ","}; + std::vector i; + size_t n = 0; + while (!p.eof()) { + try { + ++n; + const auto& a = p.template get_next(); + if (data.at(n - 1).s == X::empty) { + CHECK_FALSE(p.valid()); + continue; + } + i.push_back(a); + } catch (...) { + CHECK_EQ(data.at(n - 1).s, X::empty); + } + } + + CHECK_EQ(i, expected); + } +} + +template +void test_ignore_empty(const std::vector& data) { + test_ignore_empty_impl(data); + test_ignore_empty_impl(data); + test_ignore_empty_impl(data); +} + +TEST_CASE("parser test various cases with empty lines") { + test_ignore_empty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); + + test_ignore_empty( + {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); + + test_ignore_empty( + {{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, "x"}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); + + test_ignore_empty( + {{1, 2, X::empty}, {5, 6, X::empty}, {9, 10, "v"}, {11, 12, "w"}}); + + test_ignore_empty( + {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, "x"}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, X::empty}, {3, 4, "y"}, {9, 10, X::empty}, {11, 12, X::empty}}); + + test_ignore_empty({{1, 2, X::empty}, + {3, 4, X::empty}, + {9, 10, X::empty}, + {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, "x"}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, X::empty}}); + + test_ignore_empty( + {{1, 2, X::empty}, {3, 4, X::empty}, {9, 10, X::empty}, {11, 12, "w"}}); + + test_ignore_empty({{11, 12, X::empty}}); + + test_ignore_empty({}); +}