Add header and raw_header methods, update header usage methods error handling, write new and update existing unit tests

This commit is contained in:
ado
2024-03-13 17:15:31 +01:00
parent 69875c238e
commit b9f4afdd5f
7 changed files with 429 additions and 109 deletions

View File

@@ -145,6 +145,17 @@ struct unique_file_name {
CHECK_FALSE(std::string{e.what()}.empty()); \
}
#define CHECK_EQ_ARRAY(first, second) \
{ \
const auto& first_ = (first); \
const auto& second_ = (second); \
CHECK_EQ(first_.size(), second_.size()); \
for (size_t i_ = 0; i_ < std::min(first_.size(), second_.size()); \
++i_) { \
CHECK_EQ(first_[i_], second_[i_]); \
} \
}
template <typename T>
[[maybe_unused]] std::vector<std::vector<T>> vector_combinations(
const std::vector<T>& v, size_t n) {
@@ -166,6 +177,22 @@ template <typename T>
return ret;
}
[[maybe_unused]] std::string merge_header(
const std::vector<std::string>& header,
const std::string& delimiter = ss::default_delimiter) {
std::string s;
if (!header.empty()) {
for (const auto& i : header) {
s.append(i);
s.append(delimiter);
}
for (size_t i = 0; i < delimiter.size(); ++i) {
s.pop_back();
}
}
return s;
};
[[maybe_unused]] std::string make_buffer(const std::string& file_name) {
std::ifstream in{file_name, std::ios::binary};
std::string tmp;

View File

@@ -28,6 +28,7 @@ void expect_error_on_command(ss::parser<Ts...>& p,
if (ss::setup<Ts...>::throw_on_error) {
try {
command();
FAIL("expected exception");
} catch (const std::exception& e) {
CHECK_FALSE(std::string{e.what()}.empty());
}

View File

@@ -9,6 +9,7 @@ struct has_type<T, std::tuple<Us...>>
template <typename T, typename... Ts>
static void test_fields(const std::string file_name, const std::vector<X>& data,
const std::vector<std::string>& header,
const std::vector<std::string>& fields) {
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
@@ -17,9 +18,14 @@ static void test_fields(const std::string file_name, const std::vector<X>& data,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(file_name, ",");
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(fields);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
std::vector<CaseType> i;
for (const auto& a : p.template iterate<CaseType>()) {
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.push_back(a);
}
@@ -40,12 +46,12 @@ static void test_fields(const std::string file_name, const std::vector<X>& data,
TEST_CASE_TEMPLATE("test various cases with header", T,
ParserOptionCombinations) {
unique_file_name f{"various_cases_with_header"};
using str = std::string;
constexpr static auto Int = "Int";
constexpr static auto Dbl = "Double";
constexpr static auto Str = "String";
using str = std::string;
std::vector<std::string> header{Int, Dbl, Str};
const std::vector<std::string> header{Int, Dbl, Str};
std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"},
{7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}};
@@ -59,6 +65,8 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.emplace_back(ss::to_object<X>(a));
}
@@ -71,46 +79,22 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
p.ignore_next();
for (const auto& a : p.iterate<int, double, std::string>()) {
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header> p{f.name, ","};
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Dbl, Str);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(Int, "Unknown");
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Int);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::string_error> p{f.name, ","};
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
p.use_fields(Int, Dbl);
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
{
auto [int_, double_] = p.get_next<int, double>();
@@ -119,6 +103,8 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
}
p.use_fields(Dbl, Int);
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
{
auto [double_, int_] = p.get_next<double, int>();
@@ -163,25 +149,25 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
template_params.append(type)
arg_params.append(header[type])
call = 'testFields<' + ', '.join(template_params) + \
'>(o, d, {' + ', '.join(arg_params) + '});'
'>(o, d, header, {' + ', '.join(arg_params) + '});'
print(call)
*/
test_fields<T, str>(o, d, {Str});
test_fields<T, int>(o, d, {Int});
test_fields<T, double>(o, d, {Dbl});
test_fields<T, str, int>(o, d, {Str, Int});
test_fields<T, str, double>(o, d, {Str, Dbl});
test_fields<T, int, str>(o, d, {Int, Str});
test_fields<T, int, double>(o, d, {Int, Dbl});
test_fields<T, double, str>(o, d, {Dbl, Str});
test_fields<T, double, int>(o, d, {Dbl, Int});
test_fields<T, str, int, double>(o, d, {Str, Int, Dbl});
test_fields<T, str, double, int>(o, d, {Str, Dbl, Int});
test_fields<T, int, str, double>(o, d, {Int, Str, Dbl});
test_fields<T, int, double, str>(o, d, {Int, Dbl, Str});
test_fields<T, double, str, int>(o, d, {Dbl, Str, Int});
test_fields<T, double, int, str>(o, d, {Dbl, Int, Str});
test_fields<T, str>(o, d, header, {Str});
test_fields<T, int>(o, d, header, {Int});
test_fields<T, double>(o, d, header, {Dbl});
test_fields<T, str, int>(o, d, header, {Str, Int});
test_fields<T, str, double>(o, d, header, {Str, Dbl});
test_fields<T, int, str>(o, d, header, {Int, Str});
test_fields<T, int, double>(o, d, header, {Int, Dbl});
test_fields<T, double, str>(o, d, header, {Dbl, Str});
test_fields<T, double, int>(o, d, header, {Dbl, Int});
test_fields<T, str, int, double>(o, d, header, {Str, Int, Dbl});
test_fields<T, str, double, int>(o, d, header, {Str, Dbl, Int});
test_fields<T, int, str, double>(o, d, header, {Int, Str, Dbl});
test_fields<T, int, double, str>(o, d, header, {Int, Dbl, Str});
test_fields<T, double, str, int>(o, d, header, {Dbl, Str, Int});
test_fields<T, double, int, str>(o, d, header, {Dbl, Int, Str});
}
template <typename T>
@@ -190,6 +176,17 @@ void test_invalid_fields(const std::vector<std::string>& lines,
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
auto check_header = [&lines](auto& p) {
if (lines.empty()) {
CHECK(p.header().empty());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
} else {
CHECK_EQ(lines[0], merge_header(p.header()));
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
}
CHECK(p.valid());
};
unique_file_name f{"invalid_fields"};
{
std::ofstream out{f.name};
@@ -203,6 +200,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p] { p.use_fields(); };
expect_error_on_command(p, command);
check_header(p);
}
{
@@ -210,6 +208,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p] { p.use_fields("Unknown"); };
expect_error_on_command(p, command);
check_header(p);
}
{
@@ -221,6 +220,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
if (!fields.empty()) {
expect_error_on_command(p, command);
}
check_header(p);
}
{
@@ -230,15 +230,19 @@ void test_invalid_fields(const std::vector<std::string>& lines,
p.use_fields(fields.at(0));
p.template get_next<std::string, std::string>();
};
check_header(p);
if (!fields.empty()) {
expect_error_on_command(p, command);
}
check_header(p);
}
{
// Invalid header
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p, &fields = fields] { p.use_fields(fields); };
check_header(p);
if (!fields.empty()) {
// Pass if there are no duplicates, fail otherwise
@@ -255,6 +259,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
}
}
}
check_header(p);
}
}
@@ -289,7 +294,7 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
unique_file_name f{"invalid rows with header"};
unique_file_name f{"invalid_rows_with_header"};
{
std::ofstream out{f.name};
out << "Int,String,Double" << std::endl;
@@ -301,8 +306,12 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
out << "six,line6,10.11" << std::endl;
}
std::vector<std::string> header = {"Int", "String", "Double"};
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("Int", "String", "Double");
using data = std::tuple<int, std::string, double>;
@@ -325,10 +334,14 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
{3, "line3", 67.8},
{5, "line5", 9.10}};
CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("Double", "Int");
using data = std::tuple<double, int>;
@@ -349,10 +362,14 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
std::vector<data> expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}};
CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("String", "Double");
using data = std::tuple<std::string, double>;
@@ -376,6 +393,92 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
{"line5", 9.10},
{"line6", 10.11}};
CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
}
}
TEST_CASE_TEMPLATE("test invalid header", T, ParserOptionCombinations) {
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
unique_file_name f{"invalid_header"};
// Empty header
{
std::ofstream out{f.name};
out << "" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK(p.header().empty());
CHECK_EQ(merge_header(p.header()), p.raw_header());
CHECK(p.valid());
}
// Unterminated quote in header
{
std::ofstream out{f.name};
out << "\"Int" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::quote<'"'>>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::quote<'"'>, ss::multiline>(
f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::quote<'"'>,
ss::escape<'\\'>, ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
// Unterminated escape in header
{
std::ofstream out{f.name};
out << "Int\\" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>,
ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>,
ss::quote<'"'>, ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
}

View File

@@ -1,13 +1,7 @@
#include "test_helpers.hpp"
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <regex>
#include <ss/parser.hpp>
#include <sstream>
#include <unordered_map>
#include <unordered_set>
#ifndef SEGMENT_NAME
@@ -333,8 +327,10 @@ void test_data_combinations(const std::vector<column>& input_data,
field_header.push_back(field{el.header});
}
std::string header_line;
if (include_header) {
auto header_data = generate_csv_data<Ts...>(field_header, delim);
header_line = merge_header(header_data, delim);
if (input_data.size() == 0 && rand() % 10 == 0) {
write_to_file(header_data, delim, f.name, false);
} else {
@@ -425,8 +421,19 @@ void test_data_combinations(const std::vector<column>& input_data,
}
};
auto check_header = [&p = p, &header = header, include_header,
header_line] {
if (include_header) {
if constexpr (!setup::ignore_header) {
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(header_line, p.raw_header());
}
}
};
int num_columns = layout.size();
for (size_t i = 0; i < n + 1; ++i) {
check_header();
try {
switch (num_columns) {
case 1: {