diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 4a1db0f..2a95301 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -88,14 +88,26 @@ public: return to_object(get_next()); } + // TODO make the method work with if valid() returns false size_t line() const { return valid() ? reader_.line_number_ - 1 : 0; } template no_void_validator_tup_t get_next() { + std::optional error; + if (!eof_) { - reader_.parse(); + if constexpr (throw_on_error) { + try { + reader_.parse(); + } catch (...) { + read_line(); + throw; + } + } else { + reader_.parse(); + } } reader_.update(); @@ -112,6 +124,17 @@ public: return {}; } + if constexpr (throw_on_error) { + try { + auto value = reader_.converter_.template convert(); + read_line(); + return value; + } catch (...) { + read_line(); + throw; + } + } + auto value = reader_.converter_.template convert(); if (!reader_.converter_.valid()) { diff --git a/include/ss/splitter.hpp b/include/ss/splitter.hpp index dfb44c1..dd8f365 100644 --- a/include/ss/splitter.hpp +++ b/include/ss/splitter.hpp @@ -162,7 +162,6 @@ private: } } - // TODO handle this efficiently (if multiline is enabled) void handle_error_unterminated_quote() { constexpr static auto error_msg = "unterminated quote"; diff --git a/test/test_parser.cpp b/test/test_parser.cpp index bc0cc43..de1be8c 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -772,7 +772,8 @@ static inline std::string no_escape(std::string& s) { return s; } -TEST_CASE("parser test csv on multiple lines with escapes") { +template +void test_escape_multiline() { unique_file_name f{"test_parser"}; std::vector data = {{1, 2, "x\\\nx\\\r\nx"}, {5, 6, "z\\\nz\\\nz"}, @@ -792,11 +793,11 @@ TEST_CASE("parser test csv on multiple lines with escapes") { } } - ss::parser> p{f.name, ","}; + ss::parser, Ts...> p{f.name, ","}; std::vector i; while (!p.eof()) { - auto a = p.get_next(); + auto a = p.template get_next(); i.emplace_back(ss::to_object(a)); } @@ -805,14 +806,24 @@ TEST_CASE("parser test csv on multiple lines with escapes") { } CHECK_EQ(i, data); - ss::parser> p_no_multiline{f.name, ","}; + ss::parser, Ts...> p_no_multiline{f.name, ","}; while (!p.eof()) { - auto a = p_no_multiline.get_next(); - CHECK_FALSE(p.valid()); + auto command = [&] { + auto a = + p_no_multiline.template get_next(); + }; + expect_error_on_command(p_no_multiline, command); } } -TEST_CASE("parser test csv on multiple lines with quotes and escapes") { +TEST_CASE("parser test csv on multiple lines with escapes") { + test_escape_multiline(); + test_escape_multiline(); + test_escape_multiline(); +} + +template +void test_quote_escape_multiline() { unique_file_name f{"test_parser"}; { std::ofstream out{f.name}; @@ -827,17 +838,28 @@ TEST_CASE("parser test csv on multiple lines with quotes and escapes") { out << "7,8,\"just strings\"" << std::endl; out << "9,10,just strings" << std::endl; } + size_t bad_lines = 1; + auto num_errors = 0; - ss::parser, ss::quote<'"'>> p{f.name}; + ss::parser, ss::quote<'"'>, Ts...> p{ + f.name}; std::vector i; while (!p.eof()) { - auto a = p.get_next(); - if (p.valid()) { - i.emplace_back(ss::to_object(a)); + try { + auto a = p.template get_next(); + if (p.valid()) { + i.emplace_back(ss::to_object(a)); + } else { + ++num_errors; + } + } catch (const std::exception& e) { + ++num_errors; } } + CHECK(bad_lines == num_errors); + std::vector data = {{1, 2, "just\n\nstrings"}, #ifndef _WIN32 {3, 4, "just\r\nsome\r\n\n\nstrings"}, @@ -852,7 +874,14 @@ TEST_CASE("parser test csv on multiple lines with quotes and escapes") { CHECK_EQ(i, data); } -TEST_CASE("parser test multiline restricted") { +TEST_CASE("parser test csv on multiple lines with quotes and escapes") { + test_quote_escape_multiline(); + test_quote_escape_multiline(); + test_quote_escape_multiline(); +} + +template +void test_multiline_restricted() { unique_file_name f{"test_parser"}; { std::ofstream out{f.name}; @@ -871,18 +900,29 @@ TEST_CASE("parser test multiline restricted") { out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl; out << "19,20,just strings" << std::endl; } + auto bad_lines = 15; + auto num_errors = 0; - ss::parser, ss::quote<'"'>, ss::escape<'\\'>> + ss::parser, ss::quote<'"'>, ss::escape<'\\'>, + Ts...> p{f.name, ","}; std::vector i; while (!p.eof()) { - auto a = p.get_next(); - if (p.valid()) { - i.emplace_back(ss::to_object(a)); + try { + auto a = p.template get_next(); + if (p.valid()) { + i.emplace_back(ss::to_object(a)); + } else { + ++num_errors; + } + } catch (const std::exception& e) { + ++num_errors; } } + CHECK(bad_lines == num_errors); + std::vector data = {{1, 2, "just\n\nstrings"}, #ifndef _WIN32 {5, 6, "just\n\r\nstrings"}, @@ -896,9 +936,19 @@ TEST_CASE("parser test multiline restricted") { update_if_crlf(s); } + if (i.size() != data.size()) { + CHECK_EQ(i.size(), data.size()); + } + CHECK_EQ(i, data); } +TEST_CASE("parser test multiline restricted") { + test_multiline_restricted(); + test_multiline_restricted(); + test_multiline_restricted(); +} + template void test_unterminated_line_impl(const std::vector& lines, size_t bad_line) {