WIP, added multiline restriction, fixed a few bugs for multiline, updated unit tests

This commit is contained in:
ado 2021-02-20 15:53:18 +01:00
parent 2985027505
commit 2dbc21780f
8 changed files with 288 additions and 98 deletions

View File

@ -230,6 +230,15 @@ private:
} }
} }
void set_error_multiline_limit_reached() {
if constexpr (string_error) {
error_.clear();
error_.append("multiline limit reached.");
} else {
error_ = true;
}
}
void set_error_invalid_conversion(const string_range msg, size_t pos) { void set_error_invalid_conversion(const string_range msg, size_t pos) {
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();

View File

@ -9,13 +9,16 @@
#include <string> #include <string>
#include <vector> #include <vector>
// TODO remove
#include <iostream>
namespace ss { namespace ss {
template <typename... Matchers> template <typename... Matchers>
class parser { class parser {
constexpr static auto string_error = setup<Matchers...>::string_error; constexpr static auto string_error = setup<Matchers...>::string_error;
constexpr static auto multiline = setup<Matchers...>::multiline;
using multiline = typename setup<Matchers...>::multiline;
using error_type = ss::ternary_t<string_error, std::string, bool>; using error_type = ss::ternary_t<string_error, std::string, bool>;
public: public:
@ -324,7 +327,7 @@ private:
helper_buffer_{other.helper_buffer_}, converter_{std::move( helper_buffer_{other.helper_buffer_}, converter_{std::move(
other.converter_)}, other.converter_)},
next_line_converter_{std::move(other.next_line_converter_)}, next_line_converter_{std::move(other.next_line_converter_)},
size_{other.size_}, size_{other.size_}, next_line_size_{other.size_},
helper_size_{other.helper_size_}, delim_{std::move(other.delim_)}, helper_size_{other.helper_size_}, delim_{std::move(other.delim_)},
file_{other.file_}, crlf_{other.crlf_} { file_{other.file_}, crlf_{other.crlf_} {
other.buffer_ = nullptr; other.buffer_ = nullptr;
@ -341,6 +344,7 @@ private:
converter_ = std::move(other.converter_); converter_ = std::move(other.converter_);
next_line_converter_ = std::move(other.next_line_converter_); next_line_converter_ = std::move(other.next_line_converter_);
size_ = other.size_; size_ = other.size_;
next_line_size_ = other.next_line_size_;
helper_size_ = other.helper_size_; helper_size_ = other.helper_size_;
delim_ = std::move(other.delim_); delim_ = std::move(other.delim_);
file_ = other.file_; file_ = other.file_;
@ -370,16 +374,23 @@ private:
reader& operator=(const reader& other) = delete; reader& operator=(const reader& other) = delete;
bool read_next() { bool read_next() {
ssize_t ssize = getline(&next_line_buffer_, &size_, file_); memset(next_line_buffer_, '\0', next_line_size_);
ssize_t ssize =
getline(&next_line_buffer_, &next_line_size_, file_);
if (ssize == -1) { if (ssize == -1) {
return false; return false;
} }
size_t size = remove_eol(next_line_buffer_, ssize); size_t size = remove_eol(next_line_buffer_, ssize);
size_t limit = 0;
if constexpr (multiline && setup<Matchers...>::escape::enabled) { if constexpr (multiline::enabled &&
setup<Matchers...>::escape::enabled) {
while (escaped_eol(size)) { while (escaped_eol(size)) {
if (multiline_limit_reached(limit)) {
return true;
}
if (!append_line(next_line_buffer_, size)) { if (!append_line(next_line_buffer_, size)) {
return false; return false;
} }
@ -388,8 +399,12 @@ private:
next_line_converter_.split(next_line_buffer_, delim_); next_line_converter_.split(next_line_buffer_, delim_);
if constexpr (multiline && setup<Matchers...>::quote::enabled) { if constexpr (multiline::enabled &&
setup<Matchers...>::quote::enabled) {
while (unterminated_quote()) { while (unterminated_quote()) {
if (multiline_limit_reached(limit)) {
return true;
}
if (!append_line(next_line_buffer_, size)) { if (!append_line(next_line_buffer_, size)) {
return false; return false;
} }
@ -402,9 +417,20 @@ private:
void update() { void update() {
std::swap(buffer_, next_line_buffer_); std::swap(buffer_, next_line_buffer_);
std::swap(size_, next_line_size_);
std::swap(converter_, next_line_converter_); std::swap(converter_, next_line_converter_);
} }
bool multiline_limit_reached(size_t& limit) {
if constexpr (multiline::size > 0) {
if (limit++ >= multiline::size) {
next_line_converter_.set_error_multiline_limit_reached();
return true;
}
}
return false;
}
bool escaped_eol(size_t size) { bool escaped_eol(size_t size) {
const char* curr; const char* curr;
for (curr = next_line_buffer_ + size - 1; for (curr = next_line_buffer_ + size - 1;
@ -422,12 +448,15 @@ private:
return false; return false;
} }
void undo_remove_eol(size_t& string_end) { void undo_remove_eol(char* buffer, size_t& string_end) {
if (next_line_converter_.unterminated_quote()) {
string_end -= next_line_converter_.splitter_.escaped_;
}
if (crlf_) { if (crlf_) {
std::copy_n("\r\n\0", 3, next_line_buffer_ + string_end); std::copy_n("\r\n\0", 3, buffer + string_end);
string_end += 2; string_end += 2;
} else { } else {
std::copy_n("\n\0", 2, next_line_buffer_ + string_end); std::copy_n("\n\0", 2, buffer + string_end);
string_end += 1; string_end += 1;
} }
} }
@ -447,15 +476,15 @@ private:
void realloc_concat(char*& first, size_t& first_size, void realloc_concat(char*& first, size_t& first_size,
const char* const second, size_t second_size) { const char* const second, size_t second_size) {
first = static_cast<char*>(realloc(static_cast<void*>(first), next_line_size_ = first_size + second_size + 2;
first_size + second_size + 2)); first = static_cast<char*>(
realloc(static_cast<void*>(first), next_line_size_));
std::copy_n(second, second_size + 1, first + first_size); std::copy_n(second, second_size + 1, first + first_size);
first_size += second_size; first_size += second_size;
} }
bool append_line(char*& dst_buffer, size_t& dst_size) { bool append_line(char*& dst_buffer, size_t& dst_size) {
undo_remove_eol(dst_size); undo_remove_eol(dst_buffer, dst_size);
ssize_t ssize = getline(&helper_buffer_, &helper_size_, file_); ssize_t ssize = getline(&helper_buffer_, &helper_size_, file_);
if (ssize == -1) { if (ssize == -1) {
@ -478,6 +507,7 @@ private:
converter<Matchers...> next_line_converter_; converter<Matchers...> next_line_converter_;
size_t size_{0}; size_t size_{0};
size_t next_line_size_{0};
size_t helper_size_{0}; size_t helper_size_{0};
std::string delim_; std::string delim_;

View File

@ -67,7 +67,11 @@ public:
}; };
//////////////// ////////////////
// setup parameters // setup
////////////////
////////////////
// matcher
//////////////// ////////////////
template <char C> template <char C>
@ -85,15 +89,6 @@ struct trim_right : matcher<Cs...> {};
template <char... Cs> template <char... Cs>
struct escape : matcher<Cs...> {}; struct escape : matcher<Cs...> {};
// TODO add limit
class multiline;
class string_error;
////////////////
// setup implementation
////////////////
template <typename T, template <char...> class Template> template <typename T, template <char...> class Template>
struct is_instance_of_matcher : std::false_type {}; struct is_instance_of_matcher : std::false_type {};
@ -128,6 +123,54 @@ struct get_matcher<Matcher> {
template <template <char...> class Matcher, typename... Ts> template <template <char...> class Matcher, typename... Ts>
using get_matcher_t = typename get_matcher<Matcher, Ts...>::type; using get_matcher_t = typename get_matcher<Matcher, Ts...>::type;
////////////////
// multiline
////////////////
template <size_t S, bool B = true>
struct multiline_restricted {
constexpr static auto size = S;
constexpr static auto enabled = B;
};
using multiline = multiline_restricted<0>;
template <typename T>
struct is_instance_of_multiline : std::false_type {};
template <size_t S, bool B>
struct is_instance_of_multiline<multiline_restricted<S, B>> : std::true_type {};
template <typename T>
using is_instance_of_multiline_t = typename is_instance_of_multiline<T>::type;
template <typename... Ts>
struct get_multiline;
template <typename T, typename... Ts>
struct get_multiline<T, Ts...> {
using type = ternary_t<is_instance_of_multiline<T>::value, T,
typename get_multiline<Ts...>::type>;
};
template <>
struct get_multiline<> {
using type = multiline_restricted<0, false>;
};
template <typename... Ts>
using get_multiline_t = typename get_multiline<Ts...>::type;
////////////////
// string_error
////////////////
class string_error;
////////////////
// setup implementation
////////////////
template <typename... Ts> template <typename... Ts>
struct setup { struct setup {
private: private:
@ -139,14 +182,12 @@ private:
is_instance_of_matcher_t<T, trim_left>, is_instance_of_matcher_t<T, trim_left>,
is_instance_of_matcher_t<T, trim_right>> {}; is_instance_of_matcher_t<T, trim_right>> {};
template <typename T>
struct is_multiline : std::is_same<T, multiline> {};
template <typename T> template <typename T>
struct is_string_error : std::is_same<T, string_error> {}; struct is_string_error : std::is_same<T, string_error> {};
constexpr static auto count_matcher = count_v<is_matcher, Ts...>; constexpr static auto count_matcher = count_v<is_matcher, Ts...>;
constexpr static auto count_multiline = count_v<is_multiline, Ts...>; constexpr static auto count_multiline =
count_v<is_instance_of_multiline, Ts...>;
constexpr static auto count_string_error = count_v<is_string_error, Ts...>; constexpr static auto count_string_error = count_v<is_string_error, Ts...>;
constexpr static auto number_of_valid_setup_types = constexpr static auto number_of_valid_setup_types =
@ -163,7 +204,7 @@ public:
using trim_left = ternary_t<trim_all::enabled, trim_all, trim_left_only>; using trim_left = ternary_t<trim_all::enabled, trim_all, trim_left_only>;
using trim_right = ternary_t<trim_all::enabled, trim_all, trim_right_only>; using trim_right = ternary_t<trim_all::enabled, trim_all, trim_right_only>;
constexpr static bool multiline = (count_multiline == 1); using multiline = get_multiline_t<Ts...>;
constexpr static bool string_error = (count_string_error == 1); constexpr static bool string_error = (count_string_error == 1);
private: private:
@ -181,7 +222,8 @@ private:
#undef ASSERT_MSG #undef ASSERT_MSG
static_assert( static_assert(
!multiline || (multiline && (quote::enabled || escape::enabled)), !multiline::enabled ||
(multiline::enabled && (quote::enabled || escape::enabled)),
"to enable multiline either quote or escape need to be enabled"); "to enable multiline either quote or escape need to be enabled");
static_assert(!(trim_all::enabled && trim_left_only::enabled) && static_assert(!(trim_all::enabled && trim_left_only::enabled) &&

View File

@ -9,6 +9,9 @@
#include <string> #include <string>
#include <vector> #include <vector>
// TODO remove
#include <iostream>
namespace ss { namespace ss {
template <typename... Ts> template <typename... Ts>
@ -82,7 +85,13 @@ private:
return split_data_; return split_data_;
} }
std::cout << "======================" << std::endl;
std::cout << "resplitting" << std::endl;
resplitting_ = true;
begin_ = line_ + begin; begin_ = line_ + begin;
size_t end = end_ - old_line - escaped_;
end_ = line_ + end;
curr_ = end_;
} }
} }
@ -284,6 +293,12 @@ private:
void read(const Delim& delim) { void read(const Delim& delim) {
escaped_ = 0; escaped_ = 0;
if constexpr (quote::enabled) { if constexpr (quote::enabled) {
if (resplitting_) {
resplitting_ = false;
++begin_;
read_quoted(delim);
return;
}
if (quote::match(*begin_)) { if (quote::match(*begin_)) {
curr_ = end_ = ++begin_; curr_ = end_ = ++begin_;
read_quoted(delim); read_quoted(delim);
@ -321,7 +336,9 @@ private:
template <typename Delim> template <typename Delim>
void read_quoted(const Delim& delim) { void read_quoted(const Delim& delim) {
if constexpr (quote::enabled) { if constexpr (quote::enabled) {
std::cout << "start loop: " << std::endl;
while (true) { while (true) {
std::cout << "- " << *end_ << std::endl;
if (!quote::match(*end_)) { if (!quote::match(*end_)) {
if constexpr (escape::enabled) { if constexpr (escape::enabled) {
if (escape::match(*end_)) { if (escape::match(*end_)) {
@ -334,6 +351,7 @@ private:
// unterminated quote error // unterminated quote error
// eg: ..."hell\0 -> quote not terminated // eg: ..."hell\0 -> quote not terminated
if (*end_ == '\0') { if (*end_ == '\0') {
shift_and_set_current();
set_error_unterminated_quote(); set_error_unterminated_quote();
split_data_.emplace_back(line_, begin_); split_data_.emplace_back(line_, begin_);
done_ = true; done_ = true;
@ -381,9 +399,11 @@ private:
// members // members
//////////////// ////////////////
public:
error_type error_{}; error_type error_{};
bool unterminated_quote_{false}; bool unterminated_quote_{false};
bool done_; bool done_{true};
bool resplitting_{false};
size_t escaped_{0}; size_t escaped_{0};
split_data split_data_; split_data split_data_;

View File

@ -1,9 +1,9 @@
test_sources = files([ test_sources = files([
'test_main.cpp', 'test_main.cpp',
'test_splitter.cpp', #'test_splitter.cpp',
'test_converter.cpp', #'test_converter.cpp',
'test_parser.cpp', 'test_parser.cpp',
'test_extractions.cpp', #'test_extractions.cpp',
]) ])
doctest_proj = subproject('doctest') doctest_proj = subproject('doctest')

View File

@ -8,10 +8,9 @@
#include <doctest.h> #include <doctest.h>
#endif #endif
class buffer { struct buffer {
char* data_{nullptr}; char* data_{nullptr};
public:
char* operator()(const char* data) { char* operator()(const char* data) {
if (data_) { if (data_) {
delete[] data_; delete[] data_;

View File

@ -66,7 +66,7 @@ TEST_CASE("parser test various cases") {
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
CHECK(std::equal(i.begin(), i.end(), data.begin())); CHECK_EQ(i, data);
} }
{ {
@ -80,7 +80,7 @@ TEST_CASE("parser test various cases") {
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
CHECK(std::equal(i.begin(), i.end(), data.begin() + 1)); CHECK_EQ(i, data);
} }
{ {
@ -91,7 +91,7 @@ TEST_CASE("parser test various cases") {
i.push_back(p.get_object<X, int, double, std::string>()); i.push_back(p.get_object<X, int, double, std::string>());
} }
CHECK(std::equal(i.begin(), i.end(), data.begin())); CHECK_EQ(i, data);
} }
{ {
@ -103,7 +103,7 @@ TEST_CASE("parser test various cases") {
i.push_back(p.get_object<X, tup>()); i.push_back(p.get_object<X, tup>());
} }
CHECK(std::equal(i.begin(), i.end(), data.begin())); CHECK_EQ(i, data);
} }
{ {
@ -114,7 +114,7 @@ TEST_CASE("parser test various cases") {
i.push_back(p.get_next<X>()); i.push_back(p.get_next<X>());
} }
CHECK(std::equal(i.begin(), i.end(), data.begin())); CHECK_EQ(i, data);
} }
{ {
@ -132,7 +132,7 @@ TEST_CASE("parser test various cases") {
std::vector<X> expected = data; std::vector<X> expected = data;
std::remove_if(expected.begin(), expected.end(), std::remove_if(expected.begin(), expected.end(),
[](const X& x) { return x.i == excluded; }); [](const X& x) { return x.i == excluded; });
CHECK(std::equal(i.begin(), i.end(), expected.begin())); CHECK_EQ(i, data);
} }
{ {
@ -146,7 +146,7 @@ TEST_CASE("parser test various cases") {
} }
} }
std::vector<X> expected = {{3, 4, "y"}}; std::vector<X> expected = {{3, 4, "y"}};
CHECK(std::equal(i.begin(), i.end(), expected.begin())); CHECK_EQ(i, data);
} }
{ {
@ -194,7 +194,7 @@ TEST_CASE("parser test composite conversion") {
auto expect_error = [](auto error) { CHECK(!error.empty()); }; auto expect_error = [](auto error) { CHECK(!error.empty()); };
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(!p.eof()); REQUIRE_FALSE(p.eof());
{ {
constexpr static auto expectedData = std::tuple{10, 'a', 11.1}; constexpr static auto expectedData = std::tuple{10, 'a', 11.1};
@ -209,11 +209,11 @@ TEST_CASE("parser test composite conversion") {
.values(); .values();
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(!d1); REQUIRE_FALSE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
REQUIRE(d3); REQUIRE(d3);
REQUIRE(!d4); REQUIRE_FALSE(d4);
CHECK(*d3 == expectedData); CHECK_EQ(*d3, expectedData);
} }
{ {
@ -234,10 +234,10 @@ TEST_CASE("parser test composite conversion") {
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(d1); REQUIRE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
REQUIRE(!d3); REQUIRE_FALSE(d3);
REQUIRE(!d4); REQUIRE_FALSE(d4);
CHECK(*d1 == expectedData); CHECK_EQ(*d1 ,expectedData);
} }
{ {
@ -252,12 +252,12 @@ TEST_CASE("parser test composite conversion") {
.or_else<int, char, double>(fail) .or_else<int, char, double>(fail)
.values(); .values();
REQUIRE(!p.valid()); REQUIRE_FALSE(p.valid());
REQUIRE(!d1); REQUIRE_FALSE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
REQUIRE(!d3); REQUIRE_FALSE(d3);
REQUIRE(!d4); REQUIRE_FALSE(d4);
REQUIRE(!d5); REQUIRE_FALSE(d5);
} }
{ {
@ -272,7 +272,7 @@ TEST_CASE("parser test composite conversion") {
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(d1); REQUIRE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
} }
{ {
@ -283,9 +283,9 @@ TEST_CASE("parser test composite conversion") {
.values(); .values();
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(!d1); REQUIRE_FALSE(d1);
REQUIRE(d2); REQUIRE(d2);
CHECK(d2->tied() == std::tuple{1, 11.1, 'a'}); CHECK_EQ(d2->tied(), std::tuple{1, 11.1, 'a'});
} }
{ {
@ -300,12 +300,12 @@ TEST_CASE("parser test composite conversion") {
.on_error(expect_error) .on_error(expect_error)
.values(); .values();
REQUIRE(!p.valid()); REQUIRE_FALSE(p.valid());
REQUIRE(!d1); REQUIRE_FALSE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
REQUIRE(!d3); REQUIRE_FALSE(d3);
REQUIRE(!d4); REQUIRE_FALSE(d4);
REQUIRE(!d5); REQUIRE_FALSE(d5);
} }
{ {
@ -319,12 +319,12 @@ TEST_CASE("parser test composite conversion") {
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(d1); REQUIRE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
CHECK(*d1 == std::tuple{10, std::nullopt}); CHECK_EQ(*d1, std::tuple{10, std::nullopt});
} }
{ {
REQUIRE(!p.eof()); REQUIRE_FALSE(p.eof());
auto [d1, d2] = p.try_next<int, std::variant<int, std::string>>() auto [d1, d2] = p.try_next<int, std::variant<int, std::string>>()
.on_error(fail) .on_error(fail)
@ -334,8 +334,8 @@ TEST_CASE("parser test composite conversion") {
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(d1); REQUIRE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
CHECK(*d1 == std::tuple{11, std::variant<int, std::string>{"junk"}}); CHECK_EQ(*d1, std::tuple{11, std::variant<int, std::string>{"junk"}});
} }
{ {
@ -346,12 +346,12 @@ TEST_CASE("parser test composite conversion") {
.values(); .values();
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(d1); REQUIRE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
CHECK(d1->tied() == std::tuple{10, 11.1, 'c'}); CHECK_EQ(d1->tied(), std::tuple{10, 11.1, 'c'});
} }
{ {
REQUIRE(!p.eof()); REQUIRE_FALSE(p.eof());
auto [d1, d2, d3, d4] = auto [d1, d2, d3, d4] =
p.try_next<int, int>([] { return false; }) p.try_next<int, int>([] { return false; })
@ -361,11 +361,11 @@ TEST_CASE("parser test composite conversion") {
.values(); .values();
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(!d1); REQUIRE_FALSE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
REQUIRE(d3); REQUIRE(d3);
REQUIRE(!d4); REQUIRE_FALSE(d4);
CHECK(d3.value() == std::tuple{10, 20}); CHECK_EQ(d3.value(), std::tuple{10, 20});
} }
{ {
@ -379,11 +379,11 @@ TEST_CASE("parser test composite conversion") {
.values(); .values();
REQUIRE(p.valid()); REQUIRE(p.valid());
REQUIRE(!d1); REQUIRE_FALSE(d1);
REQUIRE(!d2); REQUIRE_FALSE(d2);
REQUIRE(d3); REQUIRE(d3);
REQUIRE(!d4); REQUIRE_FALSE(d4);
CHECK(d3->tied() == std::tuple{10, 22.2, 'f'}); CHECK_EQ(d3->tied(), std::tuple{10, 22.2, 'f'});
} }
CHECK(p.eof()); CHECK(p.eof());
@ -446,7 +446,7 @@ TEST_CASE("parser test the moving of parsed values") {
ss::parser p{f.name, ","}; ss::parser p{f.name, ","};
auto x = p.get_next<my_string>(); auto x = p.get_next<my_string>();
CHECK(move_called < 3); CHECK_LT(move_called, 3);
move_called_one_col = move_called; move_called_one_col = move_called;
move_called = 0; move_called = 0;
} }
@ -461,21 +461,21 @@ TEST_CASE("parser test the moving of parsed values") {
ss::parser p{f.name, ","}; ss::parser p{f.name, ","};
auto x = p.get_next<my_string, my_string, my_string>(); auto x = p.get_next<my_string, my_string, my_string>();
CHECK(move_called <= 3 * move_called_one_col); CHECK_LE(move_called, 3 * move_called_one_col);
move_called = 0; move_called = 0;
} }
{ {
ss::parser p{f.name, ","}; ss::parser p{f.name, ","};
auto x = p.get_object<xyz, my_string, my_string, my_string>(); auto x = p.get_object<xyz, my_string, my_string, my_string>();
CHECK(move_called <= 6 * move_called_one_col); CHECK_LE(move_called, 6 * move_called_one_col);
move_called = 0; move_called = 0;
} }
{ {
ss::parser p{f.name, ","}; ss::parser p{f.name, ","};
auto x = p.get_next<xyz>(); auto x = p.get_next<xyz>();
CHECK(move_called <= 6 * move_called_one_col); CHECK_LE(move_called, 6 * move_called_one_col);
move_called = 0; move_called = 0;
} }
} }
@ -503,10 +503,10 @@ TEST_CASE("parser test error mode") {
ss::parser<ss::string_error> p(f.name, ","); ss::parser<ss::string_error> p(f.name, ",");
REQUIRE(!p.eof()); REQUIRE_FALSE(p.eof());
p.get_next<int>(); p.get_next<int>();
CHECK(!p.valid()); CHECK_FALSE(p.valid());
CHECK(!p.error_msg().empty()); CHECK_FALSE(p.error_msg().empty());
} }
std::string no_quote(const std::string& s) { std::string no_quote(const std::string& s) {
@ -516,7 +516,7 @@ std::string no_quote(const std::string& s) {
return s; return s;
} }
TEST_CASE("parser test csv on multiple lines with quotes") { TEST_CASE("parser test csv on multiple lines with quotes zzz") {
unique_file_name f; unique_file_name f;
std::vector<X> data = {{1, 2, "\"x\nx\nx\""}, {3, 4, "\"y\ny\ny\""}, std::vector<X> data = {{1, 2, "\"x\nx\nx\""}, {3, 4, "\"y\ny\ny\""},
{5, 6, "\"z\nz\""}, {7, 8, "\"u\"\"\""}, {5, 6, "\"z\nz\""}, {7, 8, "\"u\"\"\""},
@ -537,7 +537,7 @@ TEST_CASE("parser test csv on multiple lines with quotes") {
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
CHECK(std::equal(i.begin(), i.end(), data.begin())); CHECK_EQ(i, data);
ss::parser<ss::quote<'"'>> p_no_multiline{f.name, ","}; ss::parser<ss::quote<'"'>> p_no_multiline{f.name, ","};
while (!p.eof()) { while (!p.eof()) {
@ -551,10 +551,10 @@ std::string no_escape(std::string& s) {
return s; return s;
} }
TEST_CASE("parser test csv on multiple lines with escapes") { TEST_CASE("parser test csv on multiple lines with escapes xxx") {
unique_file_name f; unique_file_name f;
std::vector<X> data = {{1, 2, "x\\\nx\\\nx"}, {3, 4, "y\\\ny\\\ny"}, std::vector<X> data = {{1, 2, "x\\\nx\\\nx"}, {5, 6, "z\\\nz\\\nz"},
{5, 6, "z\\\nz"}, {7, 8, "u"}, {7, 8, "u"}, {3, 4, "y\\\ny\\\ny"},
{9, 10, "v\\\\"}, {11, 12, "w\\\n"}}; {9, 10, "v\\\\"}, {11, 12, "w\\\n"}};
make_and_write(f.name, data); make_and_write(f.name, data);
@ -571,13 +571,78 @@ TEST_CASE("parser test csv on multiple lines with escapes") {
while (!p.eof()) { while (!p.eof()) {
auto a = p.get_next<int, double, std::string>(); auto a = p.get_next<int, double, std::string>();
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
// std::cout << i.back().s << std::endl;
} }
CHECK(std::equal(i.begin(), i.end(), data.begin())); CHECK_EQ(i, data);
ss::parser<ss::escape<'\\'>> p_no_multiline{f.name, ","}; ss::parser<ss::escape<'\\'>> p_no_multiline{f.name, ","};
while (!p.eof()) { while (!p.eof()) {
auto a = p_no_multiline.get_next<int, double, std::string>(); auto a = p_no_multiline.get_next<int, double, std::string>();
CHECK(!p.valid()); CHECK_FALSE(p.valid());
} }
} }
TEST_CASE("parser test csv on multiple lines with quotes and escapes yyy") {
unique_file_name f;
{
std::ofstream out{f.name};
// out << "1,2,\"just\\\n\nstrings\"" << std::endl;
out << "3,4,\"j\\\ns\n\\\nx\\\ny\ng\"" << std::endl;
// out << "5,6,\"just\\\n\\\n\n\nstrings" << std::endl;
// out << "7,8,\"just strings\"" << std::endl;
// out << "9,10,just strings" << std::endl;
}
ss::parser<ss::multiline, ss::escape<'\\'>, ss::quote<'"'>> p{f.name};
std::vector<X> i;
while (!p.eof()) {
auto a = p.get_next<int, double, std::string>();
if (p.valid()) {
i.emplace_back(ss::to_object<X>(a));
}
}
std::vector<X> data = {{1, 2, "just\n\nstrings"},
{3, 4, "just\nsome\n\n\nstrings"},
{9, 10, "just strings"}};
CHECK_EQ(i, data);
}
/*
TEST_CASE("parser test multiline restricted") {
unique_file_name f;
{
std::ofstream out{f.name};
//out << "1,2,\"just\n\nstrings\"" << std::endl;
//out << "3,4,\"ju\n\n\nnk\"" << std::endl;
//out << "5,6,just\\\n\\\nstrings" << std::endl;
//out << "7,8,ju\\\n\\\n\\\nnk" << std::endl;
//out << "9,10,\"just\\\n\nstrings\"" << std::endl;
out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl;
// out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl;
// out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl;
out << "19,20,just strings" << std::endl;
}
ss::parser<ss::multiline_restricted<120>, ss::quote<'"'>, ss::escape<'\\'>>
p{f.name, ","};
std::vector<X> i;
while (!p.eof()) {
std::cout << "==========================" << std::endl;
auto a = p.get_next<int, double, std::string>();
if (p.valid()) {
i.emplace_back(ss::to_object<X>(a));
}
}
std::vector<X> data = {{1, 2, "just\n\nstrings"},
{5, 6, "just\n\nstrings"},
{9, 10, "just\n\nstrings"},
{19, 20, "just strings"}};
CHECK(std::equal(i.begin(), i.end(), data.begin()));
}
*/

View File

@ -541,7 +541,7 @@ public:
}; };
} /* ss */ } /* ss */
TEST_CASE("splitter test unterminated quote") { TEST_CASE("splitter test resplit unterminated quote") {
{ {
ss::converter<ss::quote<'"'>> c; ss::converter<ss::quote<'"'>> c;
auto& s = c.splitter; auto& s = c.splitter;
@ -578,6 +578,7 @@ TEST_CASE("splitter test unterminated quote") {
std::vector<std::string> expected{"just", "some", R"("just","some",")"}; std::vector<std::string> expected{"just", "some", R"("just","some",")"};
CHECK(words(vec) == expected); CHECK(words(vec) == expected);
buff.data_[strlen(buff.data_) - c.splitter.escaped_] = '\0';
auto new_line = buff.append(R"(,dom","strings")"); auto new_line = buff.append(R"(,dom","strings")");
vec = c.resplit(new_line, strlen(new_line)); vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid()); CHECK(s.valid());
@ -629,6 +630,27 @@ TEST_CASE("splitter test unterminated quote") {
} }
} }
{
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
auto& s = c.splitter;
auto vec =
expect_unterminated_quote(s, "3,4,"
"\"just0\\\n1\\\n22\\\n33333x\\\n4");
std::vector<std::string> expected{"3", "4"};
auto w = words(vec);
w.pop_back();
CHECK(w == expected);
{
buff.data_[strlen(buff.data_) - c.splitter.escaped_] = '\0';
auto new_line = buff.append("\nx5strings\"");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"3", "4", "just0\n1\n22\n33333x\n4\nx5strings"};
CHECK(words(vec) == expected);
}
}
{ {
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c; ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
auto& s = c.splitter; auto& s = c.splitter;
@ -638,6 +660,7 @@ TEST_CASE("splitter test unterminated quote") {
w.pop_back(); w.pop_back();
CHECK(w == expected); CHECK(w == expected);
{ {
buff.data_[strlen(buff.data_) - c.splitter.escaped_] = '\0';
auto new_line = buff.append(R"(n,dom",str\"ings)"); auto new_line = buff.append(R"(n,dom",str\"ings)");
vec = c.resplit(new_line, strlen(new_line)); vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid()); CHECK(s.valid());
@ -650,17 +673,18 @@ TEST_CASE("splitter test unterminated quote") {
{ {
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c; ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
auto& s = c.splitter; auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just\"some","ra\")"); auto vec = expect_unterminated_quote(s, R"("just\"some","r\a\a\\\a\")");
std::vector<std::string> expected{"just\"some"}; std::vector<std::string> expected{"just\"some"};
auto w = words(vec); auto w = words(vec);
w.pop_back(); w.pop_back();
CHECK(w == expected); CHECK(w == expected);
{ {
buff.data_[strlen(buff.data_) - c.splitter.escaped_] = '\0';
auto new_line = buff.append(R"(n,dom",str\"ings)"); auto new_line = buff.append(R"(n,dom",str\"ings)");
vec = c.resplit(new_line, strlen(new_line)); vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid()); CHECK(s.valid());
CHECK(!s.unterminated_quote()); CHECK(!s.unterminated_quote());
expected = {"just\"some", "ra\"n,dom", "str\"ings"}; expected = {"just\"some", "raa\\a\"n,dom", "str\"ings"};
CHECK(words(vec) == expected); CHECK(words(vec) == expected);
} }
} }
@ -692,6 +716,7 @@ TEST_CASE("splitter test unterminated quote") {
w.pop_back(); w.pop_back();
CHECK(w == expected); CHECK(w == expected);
{ {
buff.data_[strlen(buff.data_) - c.splitter.escaped_] = '\0';
auto new_line = buff.append(R"( n,dom" , strings )"); auto new_line = buff.append(R"( n,dom" , strings )");
vec = c.resplit(new_line, strlen(new_line)); vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid()); CHECK(s.valid());