Update header parsing

This commit is contained in:
ado 2023-08-05 13:30:14 +02:00
parent 2b132bc33a
commit 03870aa1ba
2 changed files with 37 additions and 18 deletions

View File

@ -45,7 +45,7 @@ public:
if constexpr (ignore_header) {
ignore_next();
} else {
header_ = reader_.get_header();
raw_header_ = reader_.get_buffer();
}
} else {
handle_error_file_not_open();
@ -123,6 +123,10 @@ public:
}
bool field_exists(const std::string& field) {
if (header_.empty()) {
split_header_data();
}
return header_index(field).has_value();
}
@ -133,6 +137,10 @@ public:
return;
}
if (header_.empty()) {
split_header_data();
}
if (!valid()) {
return;
}
@ -432,6 +440,15 @@ private:
// header
////////////////
void split_header_data() {
ss::splitter<Options...> splitter;
std::string raw_header_copy = raw_header_;
splitter.split(raw_header_copy.data(), reader_.delim_);
for (const auto& [begin, end] : splitter.split_data_) {
header_.emplace_back(begin, end);
}
}
std::optional<size_t> header_index(const std::string& field) {
auto it = std::find(header_.begin(), header_.end(), field);
@ -508,8 +525,7 @@ private:
void handle_error_header_ignored() {
constexpr static auto error_msg =
": \"the header row is ignored within the setup it cannot be "
"used\"";
": the header row is ignored within the setup it cannot be used";
if constexpr (string_error) {
error_.clear();
@ -796,15 +812,8 @@ private:
return true;
}
std::vector<std::string> get_header() {
std::vector<std::string> header;
std::string header_buffer = next_line_buffer_;
ss::splitter<Options...> splitter;
splitter.split(header_buffer.data(), delim_);
for (const auto& [begin, end] : splitter.split_data_) {
header.emplace_back(begin, end);
}
return header;
std::string get_buffer() {
return std::string{next_line_buffer_};
}
////////////////
@ -838,6 +847,7 @@ private:
error_type error_{};
reader reader_;
std::vector<std::string> header_;
std::string raw_header_;
bool eof_{false};
};

View File

@ -700,7 +700,8 @@ static inline std::string no_quote(const std::string& s) {
return s;
}
TEST_CASE("parser test csv on multiple lines with quotes") {
template <typename... Ts>
void test_quote_multiline() {
unique_file_name f{"test_parser"};
std::vector<X> data = {{1, 2, "\"x\r\nx\nx\""},
{3, 4, "\"y\ny\r\ny\""},
@ -720,11 +721,11 @@ TEST_CASE("parser test csv on multiple lines with quotes") {
}
}
ss::parser<ss::multiline, ss::quote<'"'>> p{f.name, ","};
ss::parser<ss::multiline, ss::quote<'"'>, Ts...> p{f.name, ","};
std::vector<X> i;
while (!p.eof()) {
auto a = p.get_next<int, double, std::string>();
auto a = p.template get_next<int, double, std::string>();
i.emplace_back(ss::to_object<X>(a));
}
@ -733,13 +734,21 @@ TEST_CASE("parser test csv on multiple lines with quotes") {
}
CHECK_EQ(i, data);
ss::parser<ss::quote<'"'>> p_no_multiline{f.name, ","};
ss::parser<ss::quote<'"'>, Ts...> p_no_multiline{f.name, ","};
while (!p.eof()) {
auto a = p_no_multiline.get_next<int, double, std::string>();
CHECK(!p.valid());
auto command = [&] {
p_no_multiline.template get_next<int, double, std::string>();
};
expect_error_on_command(p_no_multiline, command);
}
}
TEST_CASE("parser test csv on multiple lines with quotes") {
test_quote_multiline();
test_quote_multiline<ss::string_error>();
test_quote_multiline<ss::throw_on_error>();
}
static inline std::string no_escape(std::string& s) {
s.erase(std::remove(begin(s), end(s), '\\'), end(s));
return s;