WIP, Write additional parser tests

This commit is contained in:
ado 2023-07-25 00:56:38 +02:00
parent 77631b8c0d
commit a6db4a7ad2
2 changed files with 450 additions and 55 deletions

View File

@ -681,8 +681,13 @@ private:
}
}
<<<<<<< Updated upstream
next_line_converter_.resplit(next_line_buffer_, size,
delim_);
=======
next_line_converter_.resplit(next_line_buffer_,
next_line_size_, delim_);
>>>>>>> Stashed changes
}
}
}

View File

@ -5,6 +5,8 @@
#include <iomanip>
#include <ss/parser.hpp>
#include <sstream>
#include <unordered_map>
#include <unordered_set>
std::string time_now_rand() {
std::stringstream ss;
@ -25,7 +27,7 @@ struct unique_file_name {
}
~unique_file_name() {
std::filesystem::remove(name);
// std::filesystem::remove(name);
}
};
@ -104,6 +106,7 @@ static void make_and_write(const std::string& file_name,
}
}
#if 0
#include <iostream>
TEST_CASE("parser test various cases") {
@ -868,12 +871,12 @@ template <typename T, typename... Us>
struct has_type<T, std::tuple<Us...>>
: std::disjunction<std::is_same<T, Us>...> {};
void checkSize(size_t size1, size_t size2) {
void check_size(size_t size1, size_t size2) {
CHECK_EQ(size1, size2);
}
template <typename... Ts>
void testFields(const std::string file_name, const std::vector<X>& data,
void test_fields(const std::string file_name, const std::vector<X>& data,
const std::vector<std::string>& fields) {
using CaseType = std::tuple<Ts...>;
@ -886,7 +889,7 @@ void testFields(const std::string file_name, const std::vector<X>& data,
i.push_back(a);
}
checkSize(i.size(), data.size());
check_size(i.size(), data.size());
for (size_t j = 0; j < i.size(); ++j) {
if constexpr (has_type<int, CaseType>::value) {
CHECK_EQ(std::get<int>(i[j]), data[j].i);
@ -1029,24 +1032,24 @@ TEST_CASE("parser test various cases with header") {
print(call)
*/
testFields<str>(o, d, {Str});
testFields<int>(o, d, {Int});
testFields<double>(o, d, {Dbl});
testFields<str, int>(o, d, {Str, Int});
testFields<str, double>(o, d, {Str, Dbl});
testFields<int, str>(o, d, {Int, Str});
testFields<int, double>(o, d, {Int, Dbl});
testFields<double, str>(o, d, {Dbl, Str});
testFields<double, int>(o, d, {Dbl, Int});
testFields<str, int, double>(o, d, {Str, Int, Dbl});
testFields<str, double, int>(o, d, {Str, Dbl, Int});
testFields<int, str, double>(o, d, {Int, Str, Dbl});
testFields<int, double, str>(o, d, {Int, Dbl, Str});
testFields<double, str, int>(o, d, {Dbl, Str, Int});
testFields<double, int, str>(o, d, {Dbl, Int, Str});
test_fields<str>(o, d, {Str});
test_fields<int>(o, d, {Int});
test_fields<double>(o, d, {Dbl});
test_fields<str, int>(o, d, {Str, Int});
test_fields<str, double>(o, d, {Str, Dbl});
test_fields<int, str>(o, d, {Int, Str});
test_fields<int, double>(o, d, {Int, Dbl});
test_fields<double, str>(o, d, {Dbl, Str});
test_fields<double, int>(o, d, {Dbl, Int});
test_fields<str, int, double>(o, d, {Str, Int, Dbl});
test_fields<str, double, int>(o, d, {Str, Dbl, Int});
test_fields<int, str, double>(o, d, {Int, Str, Dbl});
test_fields<int, double, str>(o, d, {Int, Dbl, Str});
test_fields<double, str, int>(o, d, {Dbl, Str, Int});
test_fields<double, int, str>(o, d, {Dbl, Int, Str});
}
void testIgnoreEmpty(const std::vector<X>& data) {
void test_ignore_empty(const std::vector<X>& data) {
unique_file_name f;
make_and_write(f.name, data);
@ -1087,53 +1090,440 @@ void testIgnoreEmpty(const std::vector<X>& data) {
}
TEST_CASE("parser test various cases with empty lines") {
testIgnoreEmpty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}});
test_ignore_empty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}});
testIgnoreEmpty(
test_ignore_empty(
{{1, 2, X::make_empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}});
testIgnoreEmpty(
test_ignore_empty(
{{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::make_empty}});
testIgnoreEmpty(
test_ignore_empty(
{{1, 2, "x"}, {5, 6, X::make_empty}, {9, 10, "v"}, {11, 12, "w"}});
testIgnoreEmpty({{1, 2, X::make_empty},
test_ignore_empty({{1, 2, X::make_empty},
{5, 6, X::make_empty},
{9, 10, "v"},
{11, 12, "w"}});
testIgnoreEmpty({{1, 2, X::make_empty},
test_ignore_empty({{1, 2, X::make_empty},
{3, 4, "y"},
{9, 10, "v"},
{11, 12, X::make_empty}});
testIgnoreEmpty({{1, 2, "x"},
test_ignore_empty({{1, 2, "x"},
{3, 4, "y"},
{9, 10, X::make_empty},
{11, 12, X::make_empty}});
testIgnoreEmpty({{1, 2, X::make_empty},
test_ignore_empty({{1, 2, X::make_empty},
{3, 4, "y"},
{9, 10, X::make_empty},
{11, 12, X::make_empty}});
testIgnoreEmpty({{1, 2, X::make_empty},
test_ignore_empty({{1, 2, X::make_empty},
{3, 4, X::make_empty},
{9, 10, X::make_empty},
{11, 12, X::make_empty}});
testIgnoreEmpty({{1, 2, "x"},
test_ignore_empty({{1, 2, "x"},
{3, 4, X::make_empty},
{9, 10, X::make_empty},
{11, 12, X::make_empty}});
testIgnoreEmpty({{1, 2, X::make_empty},
test_ignore_empty({{1, 2, X::make_empty},
{3, 4, X::make_empty},
{9, 10, X::make_empty},
{11, 12, "w"}});
testIgnoreEmpty({{11, 12, X::make_empty}});
test_ignore_empty({{11, 12, X::make_empty}});
testIgnoreEmpty({});
test_ignore_empty({});
}
#endif
////////////////
// parser tests v2
////////////////
#include <iostream>
#include <regex>
struct random_number_generator {
size_t z1 = 12341;
size_t z2 = 12342;
size_t z3 = 12343;
size_t z4 = 12344;
size_t rand() {
unsigned int b;
b = ((z1 << 6) ^ z1) >> 13;
z1 = ((z1 & 4294967294U) << 18) ^ b;
b = ((z2 << 2) ^ z2) >> 27;
z2 = ((z2 & 4294967288U) << 2) ^ b;
b = ((z3 << 13) ^ z3) >> 21;
z3 = ((z3 & 4294967280U) << 7) ^ b;
b = ((z4 << 3) ^ z4) >> 12;
z4 = ((z4 & 4294967168U) << 13) ^ b;
return (z1 ^ z2 ^ z3 ^ z4);
}
template <typename T>
size_t rand_index(const T& s) {
REQUIRE(!s.empty());
return rand() % s.size();
}
bool rand_bool() {
return rand() % 4 == 0;
}
template <typename T>
void rand_insert(std::string& dst, const T& src) {
dst.insert(rand_index(dst), std::string{src});
}
template <typename T>
void rand_insert_n(std::string& dst, const T& src, size_t n_max) {
size_t n = rand() % n_max;
for (size_t i = 0; i < n; ++i) {
rand_insert(dst, src);
}
}
} rng;
struct field {
std::string value;
bool is_string = false;
bool has_spaces_left = false;
bool has_spaces_right = false;
bool has_new_line = false;
field(const std::string& input) {
value = input;
is_string = true;
has_spaces_left = !input.empty() && input.front() == ' ';
has_spaces_right = !input.empty() && input.back() == ' ';
has_new_line = input.find_first_of('\n') != std::string::npos;
}
field(int input) {
value = std::to_string(input);
}
field(double input) {
value = std::to_string(input);
}
};
struct column {
std::string header;
std::vector<field> fields;
};
template <typename... Ts>
column make_column(const std::string& input_header,
const std::vector<field>& input_fields) {
using setup = ss::setup<Ts...>;
std::vector<field> filtered_fields;
for (const auto& el : input_fields) {
if (!setup::multiline::enabled && el.has_new_line) {
continue;
}
if (!setup::escape::enabled && !setup::quote::enabled) {
if (!setup::trim_left::enabled && el.has_spaces_left) {
continue;
}
if (!setup::trim_right::enabled && el.has_spaces_right) {
continue;
}
}
filtered_fields.push_back(el);
}
return column{.header = input_header, .fields = filtered_fields};
}
void replace_all2(std::string& s, const std::string& old_value,
const std::string& new_value) {
while (true) {
size_t pos = s.find(old_value);
if (pos == std::string::npos) {
return;
}
s.replace(pos, old_value.size(), new_value);
}
}
template <typename... Ts>
std::vector<std::string> generate_csv_data(const std::vector<field>& data,
const std::string& delim) {
(void)delim;
using setup = ss::setup<Ts...>;
constexpr static auto escape = '\\';
constexpr static auto quote = '"';
constexpr static auto helper0 = '#';
constexpr static auto helper1 = '$';
constexpr static auto new_line = '\n';
std::vector<std::string> output;
if (setup::escape::enabled && setup::quote::enabled) {
for (const auto& el : data) {
auto value = el.value;
replace_all2(value, {escape, quote}, {helper1});
bool quote_newline = rng.rand_bool();
// handle escape
replace_all2(value, {escape}, {helper0});
rng.rand_insert_n(value, escape, 2);
if (!quote_newline) {
replace_all2(value, {new_line}, {helper1});
replace_all2(value, {helper1}, {escape, new_line});
}
replace_all2(value, {escape, escape}, {escape});
replace_all2(value, {escape, helper0}, {helper0});
replace_all2(value, {helper0, escape}, {helper0});
replace_all2(value, {helper0}, {escape, escape});
replace_all2(value, {helper1}, {escape, quote});
replace_all2(value, {escape, quote}, {helper1});
if (rng.rand_bool() || quote_newline) {
replace_all2(value, {quote}, {helper0});
if (rng.rand_bool()) {
replace_all2(value, {helper0}, {escape, quote});
} else {
replace_all2(value, {helper0}, {quote, quote});
}
value = std::string{quote} + value + std::string{quote};
}
replace_all2(value, {helper1}, {escape, quote});
output.push_back(value);
}
} else if (setup::escape::enabled) {
for (const auto& el : data) {
auto value = el.value;
replace_all2(value, {escape}, {helper0});
rng.rand_insert_n(value, escape, 3);
replace_all2(value, {new_line}, {helper1});
replace_all2(value, {helper1}, {escape, new_line});
replace_all2(value, {escape, escape}, {escape});
replace_all2(value, {escape, helper0}, {helper0});
replace_all2(value, {helper0, escape}, {helper0});
replace_all2(value, {helper0}, {escape, escape});
output.push_back(value);
}
} else if (setup::quote::enabled) {
for (const auto& el : data) {
auto value = el.value;
if (rng.rand_bool() || el.has_new_line) {
replace_all2(value, {quote}, {helper0});
replace_all2(value, {helper0}, {quote, quote});
value = std::string{quote} + value + std::string{quote};
}
output.push_back(value);
}
} else {
for (const auto& el : data) {
output.push_back(el.value);
}
}
if (setup::trim_right::enabled) {
for (auto& el : output) {
size_t n = rng.rand();
for (size_t i = 0; i < n % 3; ++i) {
el = el + " ";
}
}
}
if (setup::trim_left::enabled) {
for (auto& el : output) {
size_t n = rng.rand();
for (size_t i = 0; i < n % 3; ++i) {
el = " " + el;
}
}
}
return output;
}
void write_to_file(const std::vector<std::string>& data,
const std::string& delim, const std::string& file_name) {
std::ofstream out{file_name, std::ios_base::app};
for (size_t i = 0; i < data.size(); ++i) {
out << data[i];
if (i != data.size() - 1) {
out << delim;
}
}
out << std::endl;
out.close();
}
template <typename... Ts>
void test_combinations(const std::vector<column>& input_data,
const std::string& delim) {
// TODO test without string_error
using setup = ss::setup<Ts..., ss::string_error>;
unique_file_name f;
std::vector<std::vector<field>> expected_data;
size_t n = rng.rand() % 10;
for (size_t i = 0; i < n; ++i) {
std::vector<field> raw_data;
for (const auto& el : input_data) {
const auto& fields = el.fields;
if (fields.empty()) {
continue;
}
raw_data.push_back(fields[rng.rand_index(fields)]);
}
expected_data.push_back(raw_data);
auto data = generate_csv_data<Ts...>(raw_data, delim);
write_to_file(data, delim, f.name);
// TODO remove
std::cout << "[.";
for (const auto& el : data) {
std::cout << el << '.';
}
std::cout << "]" << std::endl;
}
std::cout << delim << std::endl;
ss::parser<setup> p{f.name, delim};
auto check_error = [&p] {
CHECK(p.valid());
if (!p.valid()) {
std::cout << p.error_msg() << std::endl;
}
};
for (size_t i = 0; i < n; ++i) {
switch (expected_data[i].size()) {
case 0:
// TODO handle;
break;
case 1: {
auto s0 = p.template get_next<std::string>();
check_error();
std::cout << s0 << std::endl;
CHECK(s0 == expected_data[i][0].value);
break;
}
case 2: {
auto [s0, s1] = p.template get_next<std::string, std::string>();
check_error();
std::cout << s0 << ' ' << s1 << std::endl;
CHECK(s0 == expected_data[i][0].value);
CHECK(s1 == expected_data[i][1].value);
break;
}
case 3: {
auto [s0, s1, s2] =
p.template get_next<std::string, std::string, std::string>();
check_error();
std::cout << s0 << ' ' << s1 << ' ' << s2 << std::endl;
CHECK(s0 == expected_data[i][0].value);
CHECK(s1 == expected_data[i][1].value);
CHECK(s2 == expected_data[i][2].value);
break;
}
case 4: {
auto [s0, s1, s2, s3] =
p.template get_next<std::string, std::string, std::string,
std::string>();
check_error();
std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3 << std::endl;
CHECK(s0 == expected_data[i][0].value);
CHECK(s1 == expected_data[i][1].value);
CHECK(s2 == expected_data[i][2].value);
CHECK(s3 == expected_data[i][3].value);
break;
}
case 5: {
auto [s0, s1, s2, s3, s4] =
p.template get_next<std::string, std::string, std::string,
std::string, std::string>();
check_error();
std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3 << ' ' << s4
<< std::endl;
CHECK(s0 == expected_data[i][0].value);
CHECK(s1 == expected_data[i][1].value);
CHECK(s2 == expected_data[i][2].value);
CHECK(s3 == expected_data[i][3].value);
CHECK(s4 == expected_data[i][4].value);
break;
}
// ...
default:
// TODO handle
break;
}
}
}
// TODO rename
template <typename... Ts>
void test_combinations_impl() {
column data0 =
make_column<Ts...>("data0", {field{111}, field{11}, field{1}});
column data1 = make_column<Ts...>("data1", {field{"hel\\lo"}, field{"h\ni"},
field{"new\nline"}});
column data2 =
make_column<Ts...>("data2", {field{222}, field{22}, field{12345}});
column data3 =
make_column<Ts...>("data3", {field{"h\"mm"}, field{"::::::::"}});
column data4 =
make_column<Ts...>("data4", {field{"h\"\"e\\llloooo"}, field{":D"}});
auto columns0 = std::vector{data0, data1, data2, data3, data4};
auto columns1 = std::vector{data4, data3, data2, data1, data0};
auto columns2 = std::vector{data2, data3, data0, data4, data1};
for (size_t i = 0; i < 2; ++i) {
for (const auto& delimiter: {",", "-", "--"}) {
test_combinations<Ts...>(columns0, delimiter);
test_combinations<Ts...>(columns1, delimiter);
test_combinations<Ts...>(columns2, delimiter);
}
}
}
TEST_CASE("parser test various cases version 2") {
using quote = ss::quote<'"'>;
using escape = ss::escape<'\\'>;
using trim = ss::trim<' '>;
using triml = ss::trim_left<' '>;
using trimr = ss::trim_right<' '>;
using multiline = ss::multiline;
test_combinations_impl<>();
test_combinations_impl<escape>();
test_combinations_impl<quote>();
test_combinations_impl<escape, quote>();
test_combinations_impl<escape, multiline>();
test_combinations_impl<quote, multiline>();
test_combinations_impl<quote, escape, multiline>();
}