Fix handling of invalid conversion/split within parser, update parser tests

This commit is contained in:
ado 2023-08-05 18:11:13 +02:00
parent 7c9ba953ad
commit 2e1c4c97ec
3 changed files with 90 additions and 18 deletions

View File

@ -88,14 +88,26 @@ public:
return to_object<T>(get_next<Ts...>()); return to_object<T>(get_next<Ts...>());
} }
// TODO make the method work with if valid() returns false
size_t line() const { size_t line() const {
return valid() ? reader_.line_number_ - 1 : 0; return valid() ? reader_.line_number_ - 1 : 0;
} }
template <typename T, typename... Ts> template <typename T, typename... Ts>
no_void_validator_tup_t<T, Ts...> get_next() { no_void_validator_tup_t<T, Ts...> get_next() {
std::optional<std::string> error;
if (!eof_) { if (!eof_) {
if constexpr (throw_on_error) {
try {
reader_.parse(); reader_.parse();
} catch (...) {
read_line();
throw;
}
} else {
reader_.parse();
}
} }
reader_.update(); reader_.update();
@ -112,6 +124,17 @@ public:
return {}; return {};
} }
if constexpr (throw_on_error) {
try {
auto value = reader_.converter_.template convert<T, Ts...>();
read_line();
return value;
} catch (...) {
read_line();
throw;
}
}
auto value = reader_.converter_.template convert<T, Ts...>(); auto value = reader_.converter_.template convert<T, Ts...>();
if (!reader_.converter_.valid()) { if (!reader_.converter_.valid()) {

View File

@ -162,7 +162,6 @@ private:
} }
} }
// TODO handle this efficiently (if multiline is enabled)
void handle_error_unterminated_quote() { void handle_error_unterminated_quote() {
constexpr static auto error_msg = "unterminated quote"; constexpr static auto error_msg = "unterminated quote";

View File

@ -772,7 +772,8 @@ static inline std::string no_escape(std::string& s) {
return s; return s;
} }
TEST_CASE("parser test csv on multiple lines with escapes") { template <typename... Ts>
void test_escape_multiline() {
unique_file_name f{"test_parser"}; unique_file_name f{"test_parser"};
std::vector<X> data = {{1, 2, "x\\\nx\\\r\nx"}, std::vector<X> data = {{1, 2, "x\\\nx\\\r\nx"},
{5, 6, "z\\\nz\\\nz"}, {5, 6, "z\\\nz\\\nz"},
@ -792,11 +793,11 @@ TEST_CASE("parser test csv on multiple lines with escapes") {
} }
} }
ss::parser<ss::multiline, ss::escape<'\\'>> p{f.name, ","}; ss::parser<ss::multiline, ss::escape<'\\'>, Ts...> p{f.name, ","};
std::vector<X> i; std::vector<X> i;
while (!p.eof()) { while (!p.eof()) {
auto a = p.get_next<int, double, std::string>(); auto a = p.template get_next<int, double, std::string>();
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
@ -805,14 +806,24 @@ TEST_CASE("parser test csv on multiple lines with escapes") {
} }
CHECK_EQ(i, data); CHECK_EQ(i, data);
ss::parser<ss::escape<'\\'>> p_no_multiline{f.name, ","}; ss::parser<ss::escape<'\\'>, Ts...> p_no_multiline{f.name, ","};
while (!p.eof()) { while (!p.eof()) {
auto a = p_no_multiline.get_next<int, double, std::string>(); auto command = [&] {
CHECK_FALSE(p.valid()); auto a =
p_no_multiline.template get_next<int, double, std::string>();
};
expect_error_on_command(p_no_multiline, command);
} }
} }
TEST_CASE("parser test csv on multiple lines with quotes and escapes") { TEST_CASE("parser test csv on multiple lines with escapes") {
test_escape_multiline();
test_escape_multiline<ss::string_error>();
test_escape_multiline<ss::throw_on_error>();
}
template <typename... Ts>
void test_quote_escape_multiline() {
unique_file_name f{"test_parser"}; unique_file_name f{"test_parser"};
{ {
std::ofstream out{f.name}; std::ofstream out{f.name};
@ -827,17 +838,28 @@ TEST_CASE("parser test csv on multiple lines with quotes and escapes") {
out << "7,8,\"just strings\"" << std::endl; out << "7,8,\"just strings\"" << std::endl;
out << "9,10,just strings" << std::endl; out << "9,10,just strings" << std::endl;
} }
size_t bad_lines = 1;
auto num_errors = 0;
ss::parser<ss::multiline, ss::escape<'\\'>, ss::quote<'"'>> p{f.name}; ss::parser<ss::multiline, ss::escape<'\\'>, ss::quote<'"'>, Ts...> p{
f.name};
std::vector<X> i; std::vector<X> i;
while (!p.eof()) { while (!p.eof()) {
auto a = p.get_next<int, double, std::string>(); try {
auto a = p.template get_next<int, double, std::string>();
if (p.valid()) { if (p.valid()) {
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} else {
++num_errors;
}
} catch (const std::exception& e) {
++num_errors;
} }
} }
CHECK(bad_lines == num_errors);
std::vector<X> data = {{1, 2, "just\n\nstrings"}, std::vector<X> data = {{1, 2, "just\n\nstrings"},
#ifndef _WIN32 #ifndef _WIN32
{3, 4, "just\r\nsome\r\n\n\nstrings"}, {3, 4, "just\r\nsome\r\n\n\nstrings"},
@ -852,7 +874,14 @@ TEST_CASE("parser test csv on multiple lines with quotes and escapes") {
CHECK_EQ(i, data); CHECK_EQ(i, data);
} }
TEST_CASE("parser test multiline restricted") { TEST_CASE("parser test csv on multiple lines with quotes and escapes") {
test_quote_escape_multiline();
test_quote_escape_multiline<ss::string_error>();
test_quote_escape_multiline<ss::throw_on_error>();
}
template <typename... Ts>
void test_multiline_restricted() {
unique_file_name f{"test_parser"}; unique_file_name f{"test_parser"};
{ {
std::ofstream out{f.name}; std::ofstream out{f.name};
@ -871,18 +900,29 @@ TEST_CASE("parser test multiline restricted") {
out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl; out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl;
out << "19,20,just strings" << std::endl; out << "19,20,just strings" << std::endl;
} }
auto bad_lines = 15;
auto num_errors = 0;
ss::parser<ss::multiline_restricted<2>, ss::quote<'"'>, ss::escape<'\\'>> ss::parser<ss::multiline_restricted<2>, ss::quote<'"'>, ss::escape<'\\'>,
Ts...>
p{f.name, ","}; p{f.name, ","};
std::vector<X> i; std::vector<X> i;
while (!p.eof()) { while (!p.eof()) {
auto a = p.get_next<int, double, std::string>(); try {
auto a = p.template get_next<int, double, std::string>();
if (p.valid()) { if (p.valid()) {
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} else {
++num_errors;
}
} catch (const std::exception& e) {
++num_errors;
} }
} }
CHECK(bad_lines == num_errors);
std::vector<X> data = {{1, 2, "just\n\nstrings"}, std::vector<X> data = {{1, 2, "just\n\nstrings"},
#ifndef _WIN32 #ifndef _WIN32
{5, 6, "just\n\r\nstrings"}, {5, 6, "just\n\r\nstrings"},
@ -896,9 +936,19 @@ TEST_CASE("parser test multiline restricted") {
update_if_crlf(s); update_if_crlf(s);
} }
if (i.size() != data.size()) {
CHECK_EQ(i.size(), data.size());
}
CHECK_EQ(i, data); CHECK_EQ(i, data);
} }
TEST_CASE("parser test multiline restricted") {
test_multiline_restricted();
test_multiline_restricted<ss::string_error>();
test_multiline_restricted<ss::throw_on_error>();
}
template <typename... Ts> template <typename... Ts>
void test_unterminated_line_impl(const std::vector<std::string>& lines, void test_unterminated_line_impl(const std::vector<std::string>& lines,
size_t bad_line) { size_t bad_line) {