Add header and raw_header methods, update header usage methods error handling, write new and update existing unit tests

This commit is contained in:
ado 2024-03-13 17:15:31 +01:00
parent 69875c238e
commit b9f4afdd5f
7 changed files with 429 additions and 109 deletions

View File

@ -31,6 +31,9 @@ class parser {
constexpr static bool ignore_empty = setup<Options...>::ignore_empty; constexpr static bool ignore_empty = setup<Options...>::ignore_empty;
using header_splitter = ss::splitter<
ss::filter_not_t<ss::is_instance_of_multiline, Options...>>;
public: public:
parser(std::string file_name, std::string delim = ss::default_delimiter) parser(std::string file_name, std::string delim = ss::default_delimiter)
: file_name_{std::move(file_name)}, reader_{file_name_, delim} { : file_name_{std::move(file_name)}, reader_{file_name_, delim} {
@ -161,20 +164,49 @@ public:
return value; return value;
} }
std::string raw_header() const {
assert_ignore_header_not_defined();
return raw_header_;
}
std::vector<std::string> header() {
assert_ignore_header_not_defined();
clear_error();
header_splitter splitter;
std::string raw_header_copy = raw_header_;
if (!strict_split(splitter, raw_header_copy)) {
return {};
}
std::vector<std::string> split_header;
for (const auto& [begin, end] : splitter.split_data_) {
split_header.emplace_back(begin, end);
}
return split_header;
}
bool field_exists(const std::string& field) { bool field_exists(const std::string& field) {
assert_ignore_header_not_defined();
clear_error();
if (header_.empty()) { if (header_.empty()) {
split_header_data(); split_header_data();
} }
if (!valid()) {
return false;
}
return header_index(field).has_value(); return header_index(field).has_value();
} }
template <typename... Ts> template <typename... Ts>
void use_fields(const Ts&... fields_args) { void use_fields(const Ts&... fields_args) {
if constexpr (ignore_header) { assert_ignore_header_not_defined();
handle_error_header_ignored(); clear_error();
return;
}
if (header_.empty() && !eof()) { if (header_.empty() && !eof()) {
split_header_data(); split_header_data();
@ -491,15 +523,51 @@ private:
// header // header
//////////////// ////////////////
void assert_ignore_header_not_defined() const {
static_assert(!ignore_header,
"cannot use this method when 'ignore_header' is defined");
}
bool strict_split(header_splitter& splitter, std::string& header) {
if (header.empty()) {
return false;
}
if constexpr (throw_on_error) {
try {
splitter.split(header.data(), reader_.delim_);
} catch (const ss::exception& e) {
decorate_rethrow_no_line(e);
}
} else {
splitter.split(header.data(), reader_.delim_);
if (!splitter.valid()) {
handle_error_invalid_header_split(splitter);
return false;
}
}
return true;
}
void split_header_data() { void split_header_data() {
ss::splitter<Options...> splitter; header_splitter splitter;
std::string raw_header_copy = raw_header_; std::string raw_header_copy = raw_header_;
splitter.split(raw_header_copy.data(), reader_.delim_);
if (!strict_split(splitter, raw_header_copy)) {
return;
}
for (const auto& [begin, end] : splitter.split_data_) { for (const auto& [begin, end] : splitter.split_data_) {
std::string field{begin, end}; std::string field{begin, end};
if (field.empty()) {
handle_error_duplicate_header_field(field);
header_.clear();
return;
}
if (std::find(header_.begin(), header_.end(), field) != if (std::find(header_.begin(), header_.end(), field) !=
header_.end()) { header_.end()) {
handle_error_invalid_header(field); handle_error_duplicate_header_field(field);
header_.clear(); header_.clear();
return; return;
} }
@ -594,20 +662,6 @@ private:
} }
} }
void handle_error_header_ignored() {
constexpr static auto error_msg =
": the header row is ignored within the setup it cannot be used";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg};
} else {
error_ = true;
}
}
void handle_error_invalid_field(const std::string& field) { void handle_error_invalid_field(const std::string& field) {
constexpr static auto error_msg = constexpr static auto error_msg =
": header does not contain given field: "; ": header does not contain given field: ";
@ -648,14 +702,40 @@ private:
} }
} }
void handle_error_invalid_header(const std::string& field) { void handle_error_invalid_header_field() {
constexpr static auto error_msg = "header contains duplicates: "; constexpr static auto error_msg = " header contains empty field";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
error_.append(error_msg).append(error_msg); error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) { } else if constexpr (throw_on_error) {
throw ss::exception{error_msg + field}; throw ss::exception{error_msg};
} else {
error_ = true;
}
}
void handle_error_duplicate_header_field(const std::string& field) {
constexpr static auto error_msg = " header contains duplicate: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg).append(field);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg + field};
} else {
error_ = true;
}
}
void handle_error_invalid_header_split(const header_splitter& splitter) {
constexpr static auto error_msg = " failed header split: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_)
.append(error_msg)
.append(splitter.error_msg());
} else { } else {
error_ = true; error_ = true;
} }
@ -671,6 +751,14 @@ private:
.append(e.what())}; .append(e.what())};
} }
void decorate_rethrow_no_line(const ss::exception& e) const {
static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method");
throw ss::exception{std::string{file_name_}
.append(": ")
.append(e.what())};
}
//////////////// ////////////////
// line reading // line reading
//////////////// ////////////////
@ -935,7 +1023,7 @@ private:
} }
std::string get_buffer() { std::string get_buffer() {
return std::string{next_line_buffer_, next_line_buffer_size_}; return std::string{next_line_buffer_, next_line_size_};
} }
//////////////// ////////////////

View File

@ -293,4 +293,7 @@ private:
template <typename... Options> template <typename... Options>
struct setup<setup<Options...>> : setup<Options...> {}; struct setup<setup<Options...>> : setup<Options...> {};
template <typename... Options>
struct setup<std::tuple<Options...>> : setup<Options...> {};
} /* namespace ss */ } /* namespace ss */

143
ssp.hpp
View File

@ -1054,6 +1054,9 @@ private:
template <typename... Options> template <typename... Options>
struct setup<setup<Options...>> : setup<Options...> {}; struct setup<setup<Options...>> : setup<Options...> {};
template <typename... Options>
struct setup<std::tuple<Options...>> : setup<Options...> {};
} /* namespace ss */ } /* namespace ss */
namespace ss { namespace ss {
@ -2280,6 +2283,9 @@ class parser {
constexpr static bool ignore_empty = setup<Options...>::ignore_empty; constexpr static bool ignore_empty = setup<Options...>::ignore_empty;
using header_splitter = ss::splitter<
ss::filter_not_t<ss::is_instance_of_multiline, Options...>>;
public: public:
parser(std::string file_name, std::string delim = ss::default_delimiter) parser(std::string file_name, std::string delim = ss::default_delimiter)
: file_name_{std::move(file_name)}, reader_{file_name_, delim} { : file_name_{std::move(file_name)}, reader_{file_name_, delim} {
@ -2410,20 +2416,49 @@ public:
return value; return value;
} }
std::string raw_header() const {
assert_ignore_header_not_defined();
return raw_header_;
}
std::vector<std::string> header() {
assert_ignore_header_not_defined();
clear_error();
header_splitter splitter;
std::string raw_header_copy = raw_header_;
if (!strict_split(splitter, raw_header_copy)) {
return {};
}
std::vector<std::string> split_header;
for (const auto& [begin, end] : splitter.split_data_) {
split_header.emplace_back(begin, end);
}
return split_header;
}
bool field_exists(const std::string& field) { bool field_exists(const std::string& field) {
assert_ignore_header_not_defined();
clear_error();
if (header_.empty()) { if (header_.empty()) {
split_header_data(); split_header_data();
} }
if (!valid()) {
return false;
}
return header_index(field).has_value(); return header_index(field).has_value();
} }
template <typename... Ts> template <typename... Ts>
void use_fields(const Ts&... fields_args) { void use_fields(const Ts&... fields_args) {
if constexpr (ignore_header) { assert_ignore_header_not_defined();
handle_error_header_ignored(); clear_error();
return;
}
if (header_.empty() && !eof()) { if (header_.empty() && !eof()) {
split_header_data(); split_header_data();
@ -2740,15 +2775,51 @@ private:
// header // header
//////////////// ////////////////
void assert_ignore_header_not_defined() const {
static_assert(!ignore_header,
"cannot use this method when 'ignore_header' is defined");
}
bool strict_split(header_splitter& splitter, std::string& header) {
if (header.empty()) {
return false;
}
if constexpr (throw_on_error) {
try {
splitter.split(header.data(), reader_.delim_);
} catch (const ss::exception& e) {
decorate_rethrow_no_line(e);
}
} else {
splitter.split(header.data(), reader_.delim_);
if (!splitter.valid()) {
handle_error_invalid_header_split(splitter);
return false;
}
}
return true;
}
void split_header_data() { void split_header_data() {
ss::splitter<Options...> splitter; header_splitter splitter;
std::string raw_header_copy = raw_header_; std::string raw_header_copy = raw_header_;
splitter.split(raw_header_copy.data(), reader_.delim_);
if (!strict_split(splitter, raw_header_copy)) {
return;
}
for (const auto& [begin, end] : splitter.split_data_) { for (const auto& [begin, end] : splitter.split_data_) {
std::string field{begin, end}; std::string field{begin, end};
if (field.empty()) {
handle_error_duplicate_header_field(field);
header_.clear();
return;
}
if (std::find(header_.begin(), header_.end(), field) != if (std::find(header_.begin(), header_.end(), field) !=
header_.end()) { header_.end()) {
handle_error_invalid_header(field); handle_error_duplicate_header_field(field);
header_.clear(); header_.clear();
return; return;
} }
@ -2843,20 +2914,6 @@ private:
} }
} }
void handle_error_header_ignored() {
constexpr static auto error_msg =
": the header row is ignored within the setup it cannot be used";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg};
} else {
error_ = true;
}
}
void handle_error_invalid_field(const std::string& field) { void handle_error_invalid_field(const std::string& field) {
constexpr static auto error_msg = constexpr static auto error_msg =
": header does not contain given field: "; ": header does not contain given field: ";
@ -2897,14 +2954,40 @@ private:
} }
} }
void handle_error_invalid_header(const std::string& field) { void handle_error_invalid_header_field() {
constexpr static auto error_msg = "header contains duplicates: "; constexpr static auto error_msg = " header contains empty field";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
error_.append(error_msg).append(error_msg); error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) { } else if constexpr (throw_on_error) {
throw ss::exception{error_msg + field}; throw ss::exception{error_msg};
} else {
error_ = true;
}
}
void handle_error_duplicate_header_field(const std::string& field) {
constexpr static auto error_msg = " header contains duplicate: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg).append(field);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg + field};
} else {
error_ = true;
}
}
void handle_error_invalid_header_split(const header_splitter& splitter) {
constexpr static auto error_msg = " failed header split: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_)
.append(error_msg)
.append(splitter.error_msg());
} else { } else {
error_ = true; error_ = true;
} }
@ -2920,6 +3003,14 @@ private:
.append(e.what())}; .append(e.what())};
} }
void decorate_rethrow_no_line(const ss::exception& e) const {
static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method");
throw ss::exception{std::string{file_name_}
.append(": ")
.append(e.what())};
}
//////////////// ////////////////
// line reading // line reading
//////////////// ////////////////
@ -3184,7 +3275,7 @@ private:
} }
std::string get_buffer() { std::string get_buffer() {
return std::string{next_line_buffer_, next_line_buffer_size_}; return std::string{next_line_buffer_, next_line_size_};
} }
//////////////// ////////////////

View File

@ -145,6 +145,17 @@ struct unique_file_name {
CHECK_FALSE(std::string{e.what()}.empty()); \ CHECK_FALSE(std::string{e.what()}.empty()); \
} }
#define CHECK_EQ_ARRAY(first, second) \
{ \
const auto& first_ = (first); \
const auto& second_ = (second); \
CHECK_EQ(first_.size(), second_.size()); \
for (size_t i_ = 0; i_ < std::min(first_.size(), second_.size()); \
++i_) { \
CHECK_EQ(first_[i_], second_[i_]); \
} \
}
template <typename T> template <typename T>
[[maybe_unused]] std::vector<std::vector<T>> vector_combinations( [[maybe_unused]] std::vector<std::vector<T>> vector_combinations(
const std::vector<T>& v, size_t n) { const std::vector<T>& v, size_t n) {
@ -166,6 +177,22 @@ template <typename T>
return ret; return ret;
} }
[[maybe_unused]] std::string merge_header(
const std::vector<std::string>& header,
const std::string& delimiter = ss::default_delimiter) {
std::string s;
if (!header.empty()) {
for (const auto& i : header) {
s.append(i);
s.append(delimiter);
}
for (size_t i = 0; i < delimiter.size(); ++i) {
s.pop_back();
}
}
return s;
};
[[maybe_unused]] std::string make_buffer(const std::string& file_name) { [[maybe_unused]] std::string make_buffer(const std::string& file_name) {
std::ifstream in{file_name, std::ios::binary}; std::ifstream in{file_name, std::ios::binary};
std::string tmp; std::string tmp;

View File

@ -28,6 +28,7 @@ void expect_error_on_command(ss::parser<Ts...>& p,
if (ss::setup<Ts...>::throw_on_error) { if (ss::setup<Ts...>::throw_on_error) {
try { try {
command(); command();
FAIL("expected exception");
} catch (const std::exception& e) { } catch (const std::exception& e) {
CHECK_FALSE(std::string{e.what()}.empty()); CHECK_FALSE(std::string{e.what()}.empty());
} }

View File

@ -9,6 +9,7 @@ struct has_type<T, std::tuple<Us...>>
template <typename T, typename... Ts> template <typename T, typename... Ts>
static void test_fields(const std::string file_name, const std::vector<X>& data, static void test_fields(const std::string file_name, const std::vector<X>& data,
const std::vector<std::string>& header,
const std::vector<std::string>& fields) { const std::vector<std::string>& fields) {
constexpr auto buffer_mode = T::BufferMode::value; constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode; using ErrorMode = typename T::ErrorMode;
@ -17,9 +18,14 @@ static void test_fields(const std::string file_name, const std::vector<X>& data,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(file_name, ","); auto [p, _] = make_parser<buffer_mode, ErrorMode>(file_name, ",");
CHECK_FALSE(p.field_exists("Unknown")); CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(fields); p.use_fields(fields);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
std::vector<CaseType> i; std::vector<CaseType> i;
for (const auto& a : p.template iterate<CaseType>()) { for (const auto& a : p.template iterate<CaseType>()) {
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.push_back(a); i.push_back(a);
} }
@ -40,12 +46,12 @@ static void test_fields(const std::string file_name, const std::vector<X>& data,
TEST_CASE_TEMPLATE("test various cases with header", T, TEST_CASE_TEMPLATE("test various cases with header", T,
ParserOptionCombinations) { ParserOptionCombinations) {
unique_file_name f{"various_cases_with_header"}; unique_file_name f{"various_cases_with_header"};
using str = std::string;
constexpr static auto Int = "Int"; constexpr static auto Int = "Int";
constexpr static auto Dbl = "Double"; constexpr static auto Dbl = "Double";
constexpr static auto Str = "String"; constexpr static auto Str = "String";
using str = std::string; const std::vector<std::string> header{Int, Dbl, Str};
std::vector<std::string> header{Int, Dbl, Str};
std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"},
{7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}};
@ -59,6 +65,8 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
std::vector<X> i; std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) { for (const auto& a : p.iterate<int, double, std::string>()) {
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
@ -71,46 +79,22 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
p.ignore_next(); p.ignore_next();
for (const auto& a : p.iterate<int, double, std::string>()) { for (const auto& a : p.iterate<int, double, std::string>()) {
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
CHECK_EQ(i, data); CHECK_EQ(i, data);
} }
{
ss::parser<ss::ignore_header> p{f.name, ","};
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Dbl, Str);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(Int, "Unknown");
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Int);
CHECK_FALSE(p.valid());
}
{ {
ss::parser<ss::string_error> p{f.name, ","}; ss::parser<ss::string_error> p{f.name, ","};
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
p.use_fields(Int, Dbl); p.use_fields(Int, Dbl);
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
{ {
auto [int_, double_] = p.get_next<int, double>(); auto [int_, double_] = p.get_next<int, double>();
@ -119,6 +103,8 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
} }
p.use_fields(Dbl, Int); p.use_fields(Dbl, Int);
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
{ {
auto [double_, int_] = p.get_next<double, int>(); auto [double_, int_] = p.get_next<double, int>();
@ -163,25 +149,25 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
template_params.append(type) template_params.append(type)
arg_params.append(header[type]) arg_params.append(header[type])
call = 'testFields<' + ', '.join(template_params) + \ call = 'testFields<' + ', '.join(template_params) + \
'>(o, d, {' + ', '.join(arg_params) + '});' '>(o, d, header, {' + ', '.join(arg_params) + '});'
print(call) print(call)
*/ */
test_fields<T, str>(o, d, {Str}); test_fields<T, str>(o, d, header, {Str});
test_fields<T, int>(o, d, {Int}); test_fields<T, int>(o, d, header, {Int});
test_fields<T, double>(o, d, {Dbl}); test_fields<T, double>(o, d, header, {Dbl});
test_fields<T, str, int>(o, d, {Str, Int}); test_fields<T, str, int>(o, d, header, {Str, Int});
test_fields<T, str, double>(o, d, {Str, Dbl}); test_fields<T, str, double>(o, d, header, {Str, Dbl});
test_fields<T, int, str>(o, d, {Int, Str}); test_fields<T, int, str>(o, d, header, {Int, Str});
test_fields<T, int, double>(o, d, {Int, Dbl}); test_fields<T, int, double>(o, d, header, {Int, Dbl});
test_fields<T, double, str>(o, d, {Dbl, Str}); test_fields<T, double, str>(o, d, header, {Dbl, Str});
test_fields<T, double, int>(o, d, {Dbl, Int}); test_fields<T, double, int>(o, d, header, {Dbl, Int});
test_fields<T, str, int, double>(o, d, {Str, Int, Dbl}); test_fields<T, str, int, double>(o, d, header, {Str, Int, Dbl});
test_fields<T, str, double, int>(o, d, {Str, Dbl, Int}); test_fields<T, str, double, int>(o, d, header, {Str, Dbl, Int});
test_fields<T, int, str, double>(o, d, {Int, Str, Dbl}); test_fields<T, int, str, double>(o, d, header, {Int, Str, Dbl});
test_fields<T, int, double, str>(o, d, {Int, Dbl, Str}); test_fields<T, int, double, str>(o, d, header, {Int, Dbl, Str});
test_fields<T, double, str, int>(o, d, {Dbl, Str, Int}); test_fields<T, double, str, int>(o, d, header, {Dbl, Str, Int});
test_fields<T, double, int, str>(o, d, {Dbl, Int, Str}); test_fields<T, double, int, str>(o, d, header, {Dbl, Int, Str});
} }
template <typename T> template <typename T>
@ -190,6 +176,17 @@ void test_invalid_fields(const std::vector<std::string>& lines,
constexpr auto buffer_mode = T::BufferMode::value; constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode; using ErrorMode = typename T::ErrorMode;
auto check_header = [&lines](auto& p) {
if (lines.empty()) {
CHECK(p.header().empty());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
} else {
CHECK_EQ(lines[0], merge_header(p.header()));
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
}
CHECK(p.valid());
};
unique_file_name f{"invalid_fields"}; unique_file_name f{"invalid_fields"};
{ {
std::ofstream out{f.name}; std::ofstream out{f.name};
@ -203,6 +200,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ","); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p] { p.use_fields(); }; auto command = [&p = p] { p.use_fields(); };
expect_error_on_command(p, command); expect_error_on_command(p, command);
check_header(p);
} }
{ {
@ -210,6 +208,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ","); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p] { p.use_fields("Unknown"); }; auto command = [&p = p] { p.use_fields("Unknown"); };
expect_error_on_command(p, command); expect_error_on_command(p, command);
check_header(p);
} }
{ {
@ -221,6 +220,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
if (!fields.empty()) { if (!fields.empty()) {
expect_error_on_command(p, command); expect_error_on_command(p, command);
} }
check_header(p);
} }
{ {
@ -230,15 +230,19 @@ void test_invalid_fields(const std::vector<std::string>& lines,
p.use_fields(fields.at(0)); p.use_fields(fields.at(0));
p.template get_next<std::string, std::string>(); p.template get_next<std::string, std::string>();
}; };
check_header(p);
if (!fields.empty()) { if (!fields.empty()) {
expect_error_on_command(p, command); expect_error_on_command(p, command);
} }
check_header(p);
} }
{ {
// Invalid header // Invalid header
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ","); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p, &fields = fields] { p.use_fields(fields); }; auto command = [&p = p, &fields = fields] { p.use_fields(fields); };
check_header(p);
if (!fields.empty()) { if (!fields.empty()) {
// Pass if there are no duplicates, fail otherwise // Pass if there are no duplicates, fail otherwise
@ -255,6 +259,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
} }
} }
} }
check_header(p);
} }
} }
@ -289,7 +294,7 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
constexpr auto buffer_mode = T::BufferMode::value; constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode; using ErrorMode = typename T::ErrorMode;
unique_file_name f{"invalid rows with header"}; unique_file_name f{"invalid_rows_with_header"};
{ {
std::ofstream out{f.name}; std::ofstream out{f.name};
out << "Int,String,Double" << std::endl; out << "Int,String,Double" << std::endl;
@ -301,8 +306,12 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
out << "six,line6,10.11" << std::endl; out << "six,line6,10.11" << std::endl;
} }
std::vector<std::string> header = {"Int", "String", "Double"};
{ {
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("Int", "String", "Double"); p.use_fields("Int", "String", "Double");
using data = std::tuple<int, std::string, double>; using data = std::tuple<int, std::string, double>;
@ -325,10 +334,14 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
{3, "line3", 67.8}, {3, "line3", 67.8},
{5, "line5", 9.10}}; {5, "line5", 9.10}};
CHECK_EQ(i, expected); CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
} }
{ {
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("Double", "Int"); p.use_fields("Double", "Int");
using data = std::tuple<double, int>; using data = std::tuple<double, int>;
@ -349,10 +362,14 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
std::vector<data> expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}}; std::vector<data> expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}};
CHECK_EQ(i, expected); CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
} }
{ {
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("String", "Double"); p.use_fields("String", "Double");
using data = std::tuple<std::string, double>; using data = std::tuple<std::string, double>;
@ -376,6 +393,92 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
{"line5", 9.10}, {"line5", 9.10},
{"line6", 10.11}}; {"line6", 10.11}};
CHECK_EQ(i, expected); CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
}
}
TEST_CASE_TEMPLATE("test invalid header", T, ParserOptionCombinations) {
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
unique_file_name f{"invalid_header"};
// Empty header
{
std::ofstream out{f.name};
out << "" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK(p.header().empty());
CHECK_EQ(merge_header(p.header()), p.raw_header());
CHECK(p.valid());
}
// Unterminated quote in header
{
std::ofstream out{f.name};
out << "\"Int" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::quote<'"'>>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::quote<'"'>, ss::multiline>(
f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::quote<'"'>,
ss::escape<'\\'>, ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
// Unterminated escape in header
{
std::ofstream out{f.name};
out << "Int\\" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>,
ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>,
ss::quote<'"'>, ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
} }
} }

View File

@ -1,13 +1,7 @@
#include "test_helpers.hpp" #include "test_helpers.hpp"
#include <algorithm>
#include <filesystem>
#include <fstream> #include <fstream>
#include <iomanip>
#include <iostream> #include <iostream>
#include <regex>
#include <ss/parser.hpp> #include <ss/parser.hpp>
#include <sstream>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#ifndef SEGMENT_NAME #ifndef SEGMENT_NAME
@ -333,8 +327,10 @@ void test_data_combinations(const std::vector<column>& input_data,
field_header.push_back(field{el.header}); field_header.push_back(field{el.header});
} }
std::string header_line;
if (include_header) { if (include_header) {
auto header_data = generate_csv_data<Ts...>(field_header, delim); auto header_data = generate_csv_data<Ts...>(field_header, delim);
header_line = merge_header(header_data, delim);
if (input_data.size() == 0 && rand() % 10 == 0) { if (input_data.size() == 0 && rand() % 10 == 0) {
write_to_file(header_data, delim, f.name, false); write_to_file(header_data, delim, f.name, false);
} else { } else {
@ -425,8 +421,19 @@ void test_data_combinations(const std::vector<column>& input_data,
} }
}; };
auto check_header = [&p = p, &header = header, include_header,
header_line] {
if (include_header) {
if constexpr (!setup::ignore_header) {
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(header_line, p.raw_header());
}
}
};
int num_columns = layout.size(); int num_columns = layout.size();
for (size_t i = 0; i < n + 1; ++i) { for (size_t i = 0; i < n + 1; ++i) {
check_header();
try { try {
switch (num_columns) { switch (num_columns) {
case 1: { case 1: {