mirror of
https://github.com/red0124/ssp.git
synced 2025-01-23 04:55:20 +01:00
WIP, Move additional parser tests to separate file
This commit is contained in:
parent
f28f000035
commit
d6cf9bd006
@ -5,37 +5,33 @@ project(ssp_tests CXX)
|
||||
# ---- Dependencies ----
|
||||
|
||||
include(FetchContent)
|
||||
fetchcontent_declare(ssp SOURCE_DIR "${PROJECT_SOURCE_DIR}/..")
|
||||
fetchcontent_makeavailable(ssp)
|
||||
FetchContent_Declare(ssp SOURCE_DIR "${PROJECT_SOURCE_DIR}/..")
|
||||
FetchContent_MakeAvailable(ssp)
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
||||
target_compile_options(ssp INTERFACE -Wall -Wextra)
|
||||
endif()
|
||||
|
||||
include(FetchContent)
|
||||
fetchcontent_declare(
|
||||
FetchContent_Declare(
|
||||
DOCTEST
|
||||
GIT_REPOSITORY https://github.com/red0124/doctest
|
||||
GIT_TAG origin/master
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
GIT_SHALLOW TRUE)
|
||||
|
||||
fetchcontent_makeavailable(DOCTEST)
|
||||
FetchContent_MakeAvailable(DOCTEST)
|
||||
set(DOCTEST "${FETCHCONTENT_BASE_DIR}/doctest-src")
|
||||
|
||||
# ---- Test ----
|
||||
|
||||
enable_testing()
|
||||
|
||||
foreach(name IN ITEMS test_splitter test_parser test_converter test_extractions)
|
||||
foreach(name IN ITEMS test_splitter test_parser test_parser2 test_converter
|
||||
test_extractions)
|
||||
add_executable("${name}" "${name}.cpp")
|
||||
target_link_libraries(
|
||||
"${name}"
|
||||
PRIVATE ssp::ssp fast_float doctest::doctest
|
||||
)
|
||||
target_link_libraries("${name}" PRIVATE ssp::ssp fast_float
|
||||
doctest::doctest)
|
||||
target_compile_definitions(
|
||||
"${name}"
|
||||
PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN CMAKE_GITHUB_CI
|
||||
)
|
||||
"${name}" PRIVATE DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN CMAKE_GITHUB_CI)
|
||||
add_test(NAME "${name}" COMMAND "${name}")
|
||||
endforeach()
|
||||
|
@ -3,6 +3,7 @@ test_sources = files([
|
||||
'test_splitter.cpp',
|
||||
'test_converter.cpp',
|
||||
'test_parser.cpp',
|
||||
'test_parser2.cpp',
|
||||
'test_extractions.cpp',
|
||||
'test_extractions_without_fast_float.cpp',
|
||||
])
|
||||
|
@ -588,7 +588,7 @@ TEST_CASE("converter test ss:oor restriction (out of range) with exceptions") {
|
||||
}
|
||||
}
|
||||
|
||||
const std::vector<int> extracted_vector = {1, 2, 3};
|
||||
const inline std::vector<int> extracted_vector = {1, 2, 3};
|
||||
|
||||
// custom extract
|
||||
template <>
|
||||
|
@ -1,6 +1,9 @@
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <ctime>
|
||||
#include <iomanip>
|
||||
|
||||
#ifdef CMAKE_GITHUB_CI
|
||||
#include <doctest/doctest.h>
|
||||
@ -8,57 +11,83 @@
|
||||
#include <doctest.h>
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
struct buffer {
|
||||
std::string data_;
|
||||
std::string data_;
|
||||
|
||||
char *operator()(const std::string &data) {
|
||||
data_ = data;
|
||||
return data_.data();
|
||||
}
|
||||
char* operator()(const std::string& data) {
|
||||
data_ = data;
|
||||
return data_.data();
|
||||
}
|
||||
|
||||
char *append(const std::string &data) {
|
||||
data_ += data;
|
||||
return data_.data();
|
||||
}
|
||||
char* append(const std::string& data) {
|
||||
data_ += data;
|
||||
return data_.data();
|
||||
}
|
||||
|
||||
char *append_overwrite_last(const std::string &data, size_t size) {
|
||||
data_.resize(data_.size() - size);
|
||||
return append(data);
|
||||
}
|
||||
char* append_overwrite_last(const std::string& data, size_t size) {
|
||||
data_.resize(data_.size() - size);
|
||||
return append(data);
|
||||
}
|
||||
};
|
||||
|
||||
[[maybe_unused]] inline buffer buff;
|
||||
|
||||
std::string time_now_rand() {
|
||||
std::stringstream ss;
|
||||
auto t = std::time(nullptr);
|
||||
auto tm = *std::localtime(&t);
|
||||
ss << std::put_time(&tm, "%d%m%Y%H%M%S");
|
||||
srand(time(nullptr));
|
||||
return ss.str() + std::to_string(rand());
|
||||
}
|
||||
|
||||
struct unique_file_name {
|
||||
static inline int i = 0;
|
||||
|
||||
const std::string name;
|
||||
|
||||
unique_file_name(const std::string& test)
|
||||
: name{"random_" + test + "_" + std::to_string(i++) + "_" + time_now_rand() +
|
||||
"_file.csv"} {
|
||||
}
|
||||
|
||||
~unique_file_name() {
|
||||
// TODO uncomment
|
||||
// std::filesystem::remove(name);
|
||||
}
|
||||
};
|
||||
|
||||
#define CHECK_FLOATING_CONVERSION(input, type) \
|
||||
{ \
|
||||
auto eps = std::numeric_limits<type>::min(); \
|
||||
std::string s = #input; \
|
||||
auto t = ss::to_num<type>(s.c_str(), s.c_str() + s.size()); \
|
||||
REQUIRE(t.has_value()); \
|
||||
CHECK_LT(std::abs(t.value() - type(input)), eps); \
|
||||
} \
|
||||
{ \
|
||||
/* check negative too */ \
|
||||
auto eps = std::numeric_limits<type>::min(); \
|
||||
auto s = std::string("-") + #input; \
|
||||
auto t = ss::to_num<type>(s.c_str(), s.c_str() + s.size()); \
|
||||
REQUIRE(t.has_value()); \
|
||||
CHECK_LT(std::abs(t.value() - type(-input)), eps); \
|
||||
}
|
||||
{ \
|
||||
auto eps = std::numeric_limits<type>::min(); \
|
||||
std::string s = #input; \
|
||||
auto t = ss::to_num<type>(s.c_str(), s.c_str() + s.size()); \
|
||||
REQUIRE(t.has_value()); \
|
||||
CHECK_LT(std::abs(t.value() - type(input)), eps); \
|
||||
} \
|
||||
{ \
|
||||
/* check negative too */ \
|
||||
auto eps = std::numeric_limits<type>::min(); \
|
||||
auto s = std::string("-") + #input; \
|
||||
auto t = ss::to_num<type>(s.c_str(), s.c_str() + s.size()); \
|
||||
REQUIRE(t.has_value()); \
|
||||
CHECK_LT(std::abs(t.value() - type(-input)), eps); \
|
||||
}
|
||||
|
||||
#define CHECK_INVALID_CONVERSION(input, type) \
|
||||
{ \
|
||||
std::string s = input; \
|
||||
auto t = ss::to_num<type>(s.c_str(), s.c_str() + s.size()); \
|
||||
CHECK_FALSE(t.has_value()); \
|
||||
}
|
||||
{ \
|
||||
std::string s = input; \
|
||||
auto t = ss::to_num<type>(s.c_str(), s.c_str() + s.size()); \
|
||||
CHECK_FALSE(t.has_value()); \
|
||||
}
|
||||
|
||||
#define REQUIRE_VARIANT(var, el, type) \
|
||||
{ \
|
||||
auto ptr = std::get_if<type>(&var); \
|
||||
REQUIRE(ptr); \
|
||||
REQUIRE_EQ(el, *ptr); \
|
||||
}
|
||||
{ \
|
||||
auto ptr = std::get_if<type>(&var); \
|
||||
REQUIRE(ptr); \
|
||||
REQUIRE_EQ(el, *ptr); \
|
||||
}
|
||||
|
||||
#define CHECK_NOT_VARIANT(var, type) CHECK(!std::holds_alternative<type>(var));
|
||||
|
||||
@ -71,8 +100,8 @@ struct buffer {
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::vector<std::vector<T>> vector_combinations(
|
||||
const std::vector<T>& v, size_t n) {
|
||||
std::vector<std::vector<T>> vector_combinations(const std::vector<T>& v,
|
||||
size_t n) {
|
||||
std::vector<std::vector<T>> ret;
|
||||
if (n <= 1) {
|
||||
for (const auto& i : v) {
|
||||
@ -90,4 +119,4 @@ std::vector<std::vector<T>> vector_combinations(
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} /* namespace */
|
||||
|
@ -3,38 +3,15 @@
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <ss/parser.hpp>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
// TODO add single header tests
|
||||
std::string time_now_rand() {
|
||||
std::stringstream ss;
|
||||
auto t = std::time(nullptr);
|
||||
auto tm = *std::localtime(&t);
|
||||
ss << std::put_time(&tm, "%d%m%Y%H%M%S");
|
||||
srand(time(nullptr));
|
||||
return ss.str() + std::to_string(rand());
|
||||
}
|
||||
|
||||
inline int i = 0;
|
||||
struct unique_file_name {
|
||||
const std::string name;
|
||||
|
||||
unique_file_name()
|
||||
: name{"random_" + std::to_string(i++) + time_now_rand() +
|
||||
"_file.csv"} {
|
||||
}
|
||||
|
||||
~unique_file_name() {
|
||||
// TODO uncomment
|
||||
// std::filesystem::remove(name);
|
||||
}
|
||||
};
|
||||
|
||||
void replace_all(std::string& s, const std::string& from,
|
||||
const std::string& to) {
|
||||
namespace {
|
||||
[[maybe_unused]] void replace_all(std::string& s, const std::string& from,
|
||||
const std::string& to) {
|
||||
if (from.empty()) return;
|
||||
size_t start_pos = 0;
|
||||
while ((start_pos = s.find(from, start_pos)) != std::string::npos) {
|
||||
@ -107,12 +84,10 @@ static void make_and_write(const std::string& file_name,
|
||||
out << data[i].to_string() << new_lines[i % new_lines.size()];
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
#include <iostream>
|
||||
} /* namespace */
|
||||
|
||||
TEST_CASE("parser test various cases") {
|
||||
unique_file_name f;
|
||||
unique_file_name f{"test_parser"};
|
||||
std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"},
|
||||
{7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}};
|
||||
make_and_write(f.name, data);
|
||||
@ -320,7 +295,7 @@ TEST_CASE("parser test various cases") {
|
||||
}
|
||||
|
||||
{
|
||||
unique_file_name empty_f;
|
||||
unique_file_name empty_f{"test_parser"};
|
||||
std::vector<X> empty_data = {};
|
||||
|
||||
make_and_write(empty_f.name, empty_data);
|
||||
@ -354,12 +329,12 @@ struct test_struct {
|
||||
}
|
||||
};
|
||||
|
||||
void expect_test_struct(const test_struct&) {
|
||||
static inline void expect_test_struct(const test_struct&) {
|
||||
}
|
||||
|
||||
// various scenarios
|
||||
TEST_CASE("parser test composite conversion") {
|
||||
unique_file_name f;
|
||||
unique_file_name f{"test_parser"};
|
||||
{
|
||||
std::ofstream out{f.name};
|
||||
for (auto& i :
|
||||
@ -624,7 +599,7 @@ TEST_CASE("parser test the moving of parsed composite values") {
|
||||
}
|
||||
|
||||
TEST_CASE("parser test error mode") {
|
||||
unique_file_name f;
|
||||
unique_file_name f{"test_parser"};
|
||||
{
|
||||
std::ofstream out{f.name};
|
||||
out << "junk" << std::endl;
|
||||
@ -639,7 +614,7 @@ TEST_CASE("parser test error mode") {
|
||||
CHECK_FALSE(p.error_msg().empty());
|
||||
}
|
||||
|
||||
std::string no_quote(const std::string& s) {
|
||||
static inline std::string no_quote(const std::string& s) {
|
||||
if (!s.empty() && s[0] == '"') {
|
||||
return {std::next(begin(s)), std::prev(end(s))};
|
||||
}
|
||||
@ -647,7 +622,7 @@ std::string no_quote(const std::string& s) {
|
||||
}
|
||||
|
||||
TEST_CASE("parser test csv on multiple lines with quotes") {
|
||||
unique_file_name f;
|
||||
unique_file_name f{"test_parser"};
|
||||
std::vector<X> data = {{1, 2, "\"x\r\nx\nx\""},
|
||||
{3, 4, "\"y\ny\r\ny\""},
|
||||
{5, 6, "\"z\nz\""},
|
||||
@ -686,13 +661,13 @@ TEST_CASE("parser test csv on multiple lines with quotes") {
|
||||
}
|
||||
}
|
||||
|
||||
std::string no_escape(std::string& s) {
|
||||
static inline std::string no_escape(std::string& s) {
|
||||
s.erase(std::remove(begin(s), end(s), '\\'), end(s));
|
||||
return s;
|
||||
}
|
||||
|
||||
TEST_CASE("parser test csv on multiple lines with escapes") {
|
||||
unique_file_name f;
|
||||
unique_file_name f{"test_parser"};
|
||||
std::vector<X> data = {{1, 2, "x\\\nx\\\r\nx"},
|
||||
{5, 6, "z\\\nz\\\nz"},
|
||||
{7, 8, "u"},
|
||||
@ -732,7 +707,7 @@ TEST_CASE("parser test csv on multiple lines with escapes") {
|
||||
}
|
||||
|
||||
TEST_CASE("parser test csv on multiple lines with quotes and escapes") {
|
||||
unique_file_name f;
|
||||
unique_file_name f{"test_parser"};
|
||||
{
|
||||
std::ofstream out{f.name};
|
||||
out << "1,2,\"just\\\n\nstrings\"" << std::endl;
|
||||
@ -772,7 +747,7 @@ TEST_CASE("parser test csv on multiple lines with quotes and escapes") {
|
||||
}
|
||||
|
||||
TEST_CASE("parser test multiline restricted") {
|
||||
unique_file_name f;
|
||||
unique_file_name f{"test_parser"};
|
||||
{
|
||||
std::ofstream out{f.name};
|
||||
out << "1,2,\"just\n\nstrings\"" << std::endl;
|
||||
@ -825,13 +800,13 @@ template <typename T, typename... Us>
|
||||
struct has_type<T, std::tuple<Us...>>
|
||||
: std::disjunction<std::is_same<T, Us>...> {};
|
||||
|
||||
void check_size(size_t size1, size_t size2) {
|
||||
static inline void check_size(size_t size1, size_t size2) {
|
||||
CHECK_EQ(size1, size2);
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
void test_fields(const std::string file_name, const std::vector<X>& data,
|
||||
const std::vector<std::string>& fields) {
|
||||
static void test_fields(const std::string file_name, const std::vector<X>& data,
|
||||
const std::vector<std::string>& fields) {
|
||||
using CaseType = std::tuple<Ts...>;
|
||||
|
||||
ss::parser p{file_name, ","};
|
||||
@ -858,7 +833,7 @@ void test_fields(const std::string file_name, const std::vector<X>& data,
|
||||
}
|
||||
|
||||
TEST_CASE("parser test various cases with header") {
|
||||
unique_file_name f;
|
||||
unique_file_name f{"test_parser"};
|
||||
constexpr static auto Int = "Int";
|
||||
constexpr static auto Dbl = "Double";
|
||||
constexpr static auto Str = "String";
|
||||
@ -1003,8 +978,8 @@ TEST_CASE("parser test various cases with header") {
|
||||
test_fields<double, int, str>(o, d, {Dbl, Int, Str});
|
||||
}
|
||||
|
||||
void test_ignore_empty(const std::vector<X>& data) {
|
||||
unique_file_name f;
|
||||
static inline void test_ignore_empty(const std::vector<X>& data) {
|
||||
unique_file_name f{"test_parser"};
|
||||
make_and_write(f.name, data);
|
||||
|
||||
std::vector<X> expected;
|
||||
@ -1094,565 +1069,3 @@ TEST_CASE("parser test various cases with empty lines") {
|
||||
|
||||
test_ignore_empty({});
|
||||
}
|
||||
#endif
|
||||
|
||||
////////////////
|
||||
// parser tests v2
|
||||
////////////////
|
||||
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
struct random_number_generator {
|
||||
size_t z1 = 12341;
|
||||
size_t z2 = 12342;
|
||||
size_t z3 = 12343;
|
||||
size_t z4 = 12344;
|
||||
|
||||
size_t rand() {
|
||||
unsigned int b;
|
||||
b = ((z1 << 6) ^ z1) >> 13;
|
||||
z1 = ((z1 & 4294967294U) << 18) ^ b;
|
||||
b = ((z2 << 2) ^ z2) >> 27;
|
||||
z2 = ((z2 & 4294967288U) << 2) ^ b;
|
||||
b = ((z3 << 13) ^ z3) >> 21;
|
||||
z3 = ((z3 & 4294967280U) << 7) ^ b;
|
||||
b = ((z4 << 3) ^ z4) >> 12;
|
||||
z4 = ((z4 & 4294967168U) << 13) ^ b;
|
||||
return (z1 ^ z2 ^ z3 ^ z4);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t rand_index(const T& s) {
|
||||
REQUIRE(!s.empty());
|
||||
return rand() % s.size();
|
||||
}
|
||||
|
||||
bool rand_bool() {
|
||||
return (rand() % 100) > 50;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void rand_insert(std::string& dst, const T& src) {
|
||||
dst.insert(rand_index(dst), std::string{src});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void rand_insert_n(std::string& dst, const T& src, size_t n_max) {
|
||||
size_t n = rand() % n_max;
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
rand_insert(dst, src);
|
||||
}
|
||||
}
|
||||
} rng;
|
||||
|
||||
struct field {
|
||||
std::string value;
|
||||
bool is_string = false;
|
||||
bool has_spaces_left = false;
|
||||
bool has_spaces_right = false;
|
||||
bool has_new_line = false;
|
||||
|
||||
field(const std::string& input) {
|
||||
value = input;
|
||||
is_string = true;
|
||||
|
||||
has_spaces_left = !input.empty() && input.front() == ' ';
|
||||
has_spaces_right = !input.empty() && input.back() == ' ';
|
||||
has_new_line = input.find_first_of('\n') != std::string::npos;
|
||||
}
|
||||
|
||||
field(int input) {
|
||||
value = std::to_string(input);
|
||||
}
|
||||
|
||||
field(double input) {
|
||||
value = std::to_string(input);
|
||||
}
|
||||
};
|
||||
|
||||
struct column {
|
||||
std::string header;
|
||||
std::vector<field> fields;
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
column make_column(const std::string& input_header,
|
||||
const std::vector<field>& input_fields) {
|
||||
using setup = ss::setup<Ts...>;
|
||||
std::vector<field> filtered_fields;
|
||||
|
||||
for (const auto& el : input_fields) {
|
||||
if (!setup::multiline::enabled && el.has_new_line) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!setup::escape::enabled && !setup::quote::enabled) {
|
||||
if (setup::trim_left::enabled && el.has_spaces_left) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (setup::trim_right::enabled && el.has_spaces_right) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
filtered_fields.push_back(el);
|
||||
}
|
||||
|
||||
column c;
|
||||
c.header = input_header;
|
||||
c.fields = filtered_fields;
|
||||
return c;
|
||||
}
|
||||
|
||||
void replace_all2(std::string& s, const std::string& old_value,
|
||||
const std::string& new_value) {
|
||||
for (size_t i = 0; i < 999; ++i) {
|
||||
size_t pos = s.find(old_value);
|
||||
if (pos == std::string::npos) {
|
||||
return;
|
||||
}
|
||||
s.replace(pos, old_value.size(), new_value);
|
||||
}
|
||||
FAIL("bad replace");
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
std::vector<std::string> generate_csv_data(const std::vector<field>& data,
|
||||
const std::string& delim) {
|
||||
(void)delim;
|
||||
using setup = ss::setup<Ts...>;
|
||||
constexpr static auto escape = '\\';
|
||||
constexpr static auto quote = '"';
|
||||
constexpr static auto space = ' ';
|
||||
constexpr static auto new_line = '\n';
|
||||
constexpr static auto helper0 = '#';
|
||||
constexpr static auto helper1 = '$';
|
||||
// constexpr static auto helper3 = '&';
|
||||
|
||||
std::vector<std::string> output;
|
||||
|
||||
if (setup::escape::enabled && setup::quote::enabled) {
|
||||
for (const auto& el : data) {
|
||||
auto value = el.value;
|
||||
|
||||
replace_all2(value, {escape, quote}, {helper1});
|
||||
|
||||
bool quote_newline = rng.rand_bool();
|
||||
bool quote_spacings = rng.rand_bool();
|
||||
bool has_spaces = el.has_spaces_right || el.has_spaces_left;
|
||||
|
||||
// handle escape
|
||||
replace_all2(value, {escape}, {helper0});
|
||||
rng.rand_insert_n(value, escape, 2);
|
||||
if (!quote_newline) {
|
||||
replace_all2(value, {new_line}, {helper1});
|
||||
replace_all2(value, {helper1}, {escape, new_line});
|
||||
}
|
||||
replace_all2(value, {escape, escape}, {escape});
|
||||
replace_all2(value, {escape, helper0}, {helper0});
|
||||
replace_all2(value, {helper0, escape}, {helper0});
|
||||
replace_all2(value, {helper0}, {escape, escape});
|
||||
|
||||
replace_all2(value, {helper1}, {escape, quote});
|
||||
|
||||
replace_all2(value, {escape, quote}, {helper1});
|
||||
|
||||
if (rng.rand_bool() || quote_newline ||
|
||||
(quote_spacings && has_spaces)) {
|
||||
replace_all2(value, {quote}, {helper0});
|
||||
if (rng.rand_bool()) {
|
||||
replace_all2(value, {helper0}, {escape, quote});
|
||||
} else {
|
||||
replace_all2(value, {helper0}, {quote, quote});
|
||||
}
|
||||
value = std::string{quote} + value + std::string{quote};
|
||||
}
|
||||
|
||||
replace_all2(value, {helper1}, {escape, quote});
|
||||
|
||||
if (!quote_spacings && has_spaces) {
|
||||
replace_all2(value, {escape, space}, {helper0});
|
||||
replace_all2(value, {space}, {helper0});
|
||||
replace_all2(value, {helper0}, {escape, space});
|
||||
}
|
||||
|
||||
output.push_back(value);
|
||||
}
|
||||
} else if (setup::escape::enabled) {
|
||||
for (const auto& el : data) {
|
||||
auto value = el.value;
|
||||
|
||||
replace_all2(value, {escape}, {helper0});
|
||||
rng.rand_insert_n(value, escape, 3);
|
||||
replace_all2(value, {new_line}, {helper1});
|
||||
replace_all2(value, {helper1}, {escape, new_line});
|
||||
|
||||
replace_all2(value, {escape, escape}, {escape});
|
||||
replace_all2(value, {escape, helper0}, {helper0});
|
||||
|
||||
replace_all2(value, {helper0, escape}, {helper0});
|
||||
replace_all2(value, {helper0}, {escape, escape});
|
||||
|
||||
if (setup::trim_right::enabled || setup::trim_left::enabled) {
|
||||
// escape space
|
||||
replace_all2(value, {escape, space}, {helper0});
|
||||
replace_all2(value, {space}, {helper0});
|
||||
replace_all2(value, {helper0}, {escape, space});
|
||||
}
|
||||
|
||||
output.push_back(value);
|
||||
}
|
||||
} else if (setup::quote::enabled) {
|
||||
for (const auto& el : data) {
|
||||
auto value = el.value;
|
||||
if (rng.rand_bool() || el.has_new_line || el.has_spaces_left ||
|
||||
el.has_spaces_right) {
|
||||
replace_all2(value, {quote}, {helper0});
|
||||
replace_all2(value, {helper0}, {quote, quote});
|
||||
value = std::string{quote} + value + std::string{quote};
|
||||
}
|
||||
output.push_back(value);
|
||||
}
|
||||
} else {
|
||||
for (const auto& el : data) {
|
||||
output.push_back(el.value);
|
||||
}
|
||||
}
|
||||
|
||||
if (setup::trim_right::enabled) {
|
||||
for (auto& el : output) {
|
||||
size_t n = rng.rand();
|
||||
for (size_t i = 0; i < n % 3; ++i) {
|
||||
el = el + " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (setup::trim_left::enabled) {
|
||||
for (auto& el : output) {
|
||||
size_t n = rng.rand();
|
||||
for (size_t i = 0; i < n % 3; ++i) {
|
||||
el = " " + el;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
void write_to_file(const std::vector<std::string>& data,
|
||||
const std::string& delim, const std::string& file_name) {
|
||||
std::ofstream out{file_name, std::ios_base::app};
|
||||
std::string line;
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
line += data[i];
|
||||
if (i != data.size() - 1) {
|
||||
line += delim;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
line += "\n";
|
||||
#else
|
||||
line += "\r\n";
|
||||
#endif
|
||||
|
||||
out << line;
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
void test_combinations(const std::vector<column>& input_data,
|
||||
const std::string& delim, bool include_header) {
|
||||
// TODO test without string_error
|
||||
using setup = ss::setup<Ts..., ss::string_error>;
|
||||
|
||||
unique_file_name f;
|
||||
std::vector<std::vector<field>> expected_data;
|
||||
std::vector<std::string> header;
|
||||
std::vector<field> field_header;
|
||||
|
||||
for (const auto& el : input_data) {
|
||||
header.push_back(el.header);
|
||||
field_header.push_back(field{el.header});
|
||||
}
|
||||
|
||||
if (include_header) {
|
||||
auto header_data = generate_csv_data<Ts...>(field_header, delim);
|
||||
write_to_file(header_data, delim, f.name);
|
||||
}
|
||||
|
||||
std::vector<int> layout;
|
||||
size_t n = 1 + rng.rand() % 10;
|
||||
|
||||
for (size_t i = 0; i < input_data.size(); ++i) {
|
||||
layout.push_back(i);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
std::vector<field> raw_data;
|
||||
for (const auto& el : input_data) {
|
||||
const auto& fields = el.fields;
|
||||
if (fields.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
raw_data.push_back(fields[rng.rand_index(fields)]);
|
||||
}
|
||||
|
||||
expected_data.push_back(raw_data);
|
||||
auto data = generate_csv_data<Ts...>(raw_data, delim);
|
||||
write_to_file(data, delim, f.name);
|
||||
|
||||
/*
|
||||
std::cout << "[.";
|
||||
for (const auto& el : data) {
|
||||
std::cout << el << '.';
|
||||
}
|
||||
std::cout << "]" << std::endl;
|
||||
*/
|
||||
}
|
||||
|
||||
auto layout_combinations = vector_combinations(layout, layout.size());
|
||||
|
||||
auto remove_duplicates = [](const auto& vec) {
|
||||
std::vector<int> unique_vec;
|
||||
std::unordered_set<int> vec_set;
|
||||
for (const auto& el : vec) {
|
||||
if (vec_set.find(el) == vec_set.end()) {
|
||||
vec_set.insert(el);
|
||||
unique_vec.push_back(el);
|
||||
}
|
||||
}
|
||||
|
||||
return unique_vec;
|
||||
};
|
||||
|
||||
std::vector<std::vector<int>> unique_layout_combinations;
|
||||
for (const auto& layout : layout_combinations) {
|
||||
unique_layout_combinations.push_back(remove_duplicates(layout));
|
||||
}
|
||||
|
||||
if (!include_header) {
|
||||
unique_layout_combinations.clear();
|
||||
unique_layout_combinations.push_back(layout);
|
||||
}
|
||||
|
||||
for (const auto& layout : unique_layout_combinations) {
|
||||
ss::parser<setup> p{f.name, delim};
|
||||
|
||||
if (include_header) {
|
||||
std::vector<std::string> fields;
|
||||
for (const auto& index : layout) {
|
||||
fields.push_back(header[index]);
|
||||
}
|
||||
|
||||
p.use_fields(fields);
|
||||
|
||||
if (!p.valid()) {
|
||||
std::cout << p.error_msg() << std::endl;
|
||||
}
|
||||
|
||||
REQUIRE(p.valid());
|
||||
}
|
||||
|
||||
auto check_error = [&p] {
|
||||
CHECK(p.valid());
|
||||
if (!p.valid()) {
|
||||
std::cout << p.error_msg() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
int num_columns = layout.size();
|
||||
for (size_t i = 0; i < n + 1; ++i) {
|
||||
switch (num_columns) {
|
||||
case 1: {
|
||||
auto s0 = p.template get_next<std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
// std::cout << s0 << std::endl;
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
auto [s0, s1] = p.template get_next<std::string, std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
// std::cout << s0 << ' ' << s1 << std::endl;
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
CHECK(s1 == expected_data[i][layout[1]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
auto [s0, s1, s2] =
|
||||
p.template get_next<std::string, std::string,
|
||||
std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
// std::cout << s0 << ' ' << s1 << ' ' << s2 << std::endl;
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
CHECK(s1 == expected_data[i][layout[1]].value);
|
||||
CHECK(s2 == expected_data[i][layout[2]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
auto [s0, s1, s2, s3] =
|
||||
p.template get_next<std::string, std::string, std::string,
|
||||
std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
/*
|
||||
std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3
|
||||
<< std::endl;
|
||||
*/
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
CHECK(s1 == expected_data[i][layout[1]].value);
|
||||
CHECK(s2 == expected_data[i][layout[2]].value);
|
||||
CHECK(s3 == expected_data[i][layout[3]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
auto [s0, s1, s2, s3, s4] =
|
||||
p.template get_next<std::string, std::string, std::string,
|
||||
std::string, std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
// std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3
|
||||
// << ' ' << s4 << std::endl;
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
CHECK(s1 == expected_data[i][layout[1]].value);
|
||||
CHECK(s2 == expected_data[i][layout[2]].value);
|
||||
CHECK(s3 == expected_data[i][layout[3]].value);
|
||||
CHECK(s4 == expected_data[i][layout[4]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
FAIL(("Invalid number of columns: " +
|
||||
std::to_string(num_columns)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO rename
|
||||
template <typename... Ts>
|
||||
void test_combinations_impl() {
|
||||
column ints0 =
|
||||
make_column<Ts...>("ints0", {field{123}, field{45}, field{6}});
|
||||
column ints1 =
|
||||
make_column<Ts...>("ints1", {field{123}, field{45}, field{6}});
|
||||
column ints2 =
|
||||
make_column<Ts...>("ints2", {field{123}, field{45}, field{6}});
|
||||
|
||||
column floats0 =
|
||||
make_column<Ts...>("floats0", {field{1.23}, field{456.7}, field{0.8},
|
||||
field{910}, field{123456789.987654321}});
|
||||
column floats1 =
|
||||
make_column<Ts...>("floats1", {field{1.23}, field{456.7}, field{0.8},
|
||||
field{910}, field{123456789.987654321}});
|
||||
column floats2 =
|
||||
make_column<Ts...>("floats2", {field{1.23}, field{456.7}, field{0.8},
|
||||
field{910}, field{123456789.987654321}});
|
||||
|
||||
column strings0 =
|
||||
make_column<Ts...>("strings0", {field{"just"}, field{"some"},
|
||||
field{"random"}, field{"string"}});
|
||||
|
||||
column strings1 =
|
||||
make_column<Ts...>("strings1", {field{"st\"rings"}, field{"w\"\"ith"},
|
||||
field{"qu\"otes\\"}, field{"\\a\\n\\d"},
|
||||
field{"escapes\""}});
|
||||
|
||||
column strings2 =
|
||||
make_column<Ts...>("strings2",
|
||||
{field{" with "}, field{" spaces"},
|
||||
field{"and "}, field{"\nnew"}, field{" \nlines"},
|
||||
field{" a\n\nn\n\nd "}, field{" \nso\n "},
|
||||
field{"on"}});
|
||||
|
||||
auto columns0 = std::vector{ints0, strings0, floats0, strings1, strings2};
|
||||
auto columns1 = std::vector{strings2, strings1, floats0, strings0, ints0};
|
||||
auto columns2 = std::vector{floats0, strings1, ints0, strings2, strings0};
|
||||
auto columns3 = std::vector{ints0, ints1, ints2};
|
||||
auto columns4 = std::vector{floats0, floats1, floats2};
|
||||
auto columns5 = std::vector{strings1, strings2};
|
||||
auto columns6 = std::vector{strings1};
|
||||
auto columns7 = std::vector{strings2};
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
for (const auto& delimiter : {",", "-", "--"}) {
|
||||
for (const auto& columns :
|
||||
{columns0, columns1, columns2, columns3, columns4, columns5,
|
||||
columns6, columns7}) {
|
||||
test_combinations<Ts...>(columns, delimiter, false);
|
||||
test_combinations<Ts...>(columns, delimiter, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("parser test various cases version 2") {
|
||||
// TODO handle crlf
|
||||
using quote = ss::quote<'"'>;
|
||||
using escape = ss::escape<'\\'>;
|
||||
using trim = ss::trim<' '>;
|
||||
using triml = ss::trim_left<' '>;
|
||||
using trimr = ss::trim_right<' '>;
|
||||
using multiline = ss::multiline;
|
||||
|
||||
test_combinations_impl<>();
|
||||
test_combinations_impl<trim>();
|
||||
test_combinations_impl<triml>();
|
||||
test_combinations_impl<trimr>();
|
||||
|
||||
test_combinations_impl<escape>();
|
||||
test_combinations_impl<escape, trim>();
|
||||
test_combinations_impl<escape, triml>();
|
||||
test_combinations_impl<escape, trimr>();
|
||||
|
||||
test_combinations_impl<quote>();
|
||||
test_combinations_impl<quote, trim>();
|
||||
test_combinations_impl<quote, triml>();
|
||||
test_combinations_impl<quote, trimr>();
|
||||
|
||||
test_combinations_impl<escape, quote>();
|
||||
test_combinations_impl<escape, quote, trim>();
|
||||
test_combinations_impl<escape, quote, triml>();
|
||||
test_combinations_impl<escape, quote, trimr>();
|
||||
|
||||
test_combinations_impl<escape, multiline>();
|
||||
test_combinations_impl<escape, multiline, trim>();
|
||||
test_combinations_impl<escape, multiline, triml>();
|
||||
test_combinations_impl<escape, multiline, trimr>();
|
||||
|
||||
test_combinations_impl<quote, multiline>();
|
||||
test_combinations_impl<quote, multiline, trim>();
|
||||
test_combinations_impl<quote, multiline, triml>();
|
||||
test_combinations_impl<quote, multiline, trimr>();
|
||||
|
||||
test_combinations_impl<quote, escape, multiline>();
|
||||
test_combinations_impl<quote, escape, multiline, trim>();
|
||||
test_combinations_impl<quote, escape, multiline, triml>();
|
||||
test_combinations_impl<quote, escape, multiline, trimr>();
|
||||
}
|
||||
|
578
test/test_parser2.cpp
Normal file
578
test/test_parser2.cpp
Normal file
@ -0,0 +1,578 @@
|
||||
#include "test_helpers.hpp"
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <ss/parser.hpp>
|
||||
#include <sstream>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
|
||||
// parser tests v2
|
||||
|
||||
namespace {
|
||||
struct random_number_generator {
|
||||
size_t z1 = 12341;
|
||||
size_t z2 = 12342;
|
||||
size_t z3 = 12343;
|
||||
size_t z4 = 12344;
|
||||
|
||||
size_t rand() {
|
||||
unsigned int b;
|
||||
b = ((z1 << 6) ^ z1) >> 13;
|
||||
z1 = ((z1 & 4294967294U) << 18) ^ b;
|
||||
b = ((z2 << 2) ^ z2) >> 27;
|
||||
z2 = ((z2 & 4294967288U) << 2) ^ b;
|
||||
b = ((z3 << 13) ^ z3) >> 21;
|
||||
z3 = ((z3 & 4294967280U) << 7) ^ b;
|
||||
b = ((z4 << 3) ^ z4) >> 12;
|
||||
z4 = ((z4 & 4294967168U) << 13) ^ b;
|
||||
return (z1 ^ z2 ^ z3 ^ z4);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
size_t rand_index(const T& s) {
|
||||
REQUIRE(!s.empty());
|
||||
return rand() % s.size();
|
||||
}
|
||||
|
||||
bool rand_bool() {
|
||||
return (rand() % 100) > 50;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void rand_insert(std::string& dst, const T& src) {
|
||||
dst.insert(rand_index(dst), std::string{src});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void rand_insert_n(std::string& dst, const T& src, size_t n_max) {
|
||||
size_t n = rand() % n_max;
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
rand_insert(dst, src);
|
||||
}
|
||||
}
|
||||
} rng;
|
||||
|
||||
struct field {
|
||||
std::string value;
|
||||
bool is_string = false;
|
||||
bool has_spaces_left = false;
|
||||
bool has_spaces_right = false;
|
||||
bool has_new_line = false;
|
||||
|
||||
field(const std::string& input) {
|
||||
value = input;
|
||||
is_string = true;
|
||||
|
||||
has_spaces_left = !input.empty() && input.front() == ' ';
|
||||
has_spaces_right = !input.empty() && input.back() == ' ';
|
||||
has_new_line = input.find_first_of('\n') != std::string::npos;
|
||||
}
|
||||
|
||||
field(int input) {
|
||||
value = std::to_string(input);
|
||||
}
|
||||
|
||||
field(double input) {
|
||||
value = std::to_string(input);
|
||||
}
|
||||
};
|
||||
|
||||
struct column {
|
||||
std::string header;
|
||||
std::vector<field> fields;
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
column make_column(const std::string& input_header,
|
||||
const std::vector<field>& input_fields) {
|
||||
using setup = ss::setup<Ts...>;
|
||||
std::vector<field> filtered_fields;
|
||||
|
||||
for (const auto& el : input_fields) {
|
||||
if (!setup::multiline::enabled && el.has_new_line) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!setup::escape::enabled && !setup::quote::enabled) {
|
||||
if (setup::trim_left::enabled && el.has_spaces_left) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (setup::trim_right::enabled && el.has_spaces_right) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
filtered_fields.push_back(el);
|
||||
}
|
||||
|
||||
column c;
|
||||
c.header = input_header;
|
||||
c.fields = filtered_fields;
|
||||
return c;
|
||||
}
|
||||
|
||||
void replace_all2(std::string& s, const std::string& old_value,
|
||||
const std::string& new_value) {
|
||||
for (size_t i = 0; i < 999; ++i) {
|
||||
size_t pos = s.find(old_value);
|
||||
if (pos == std::string::npos) {
|
||||
return;
|
||||
}
|
||||
s.replace(pos, old_value.size(), new_value);
|
||||
}
|
||||
FAIL("bad replace");
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
std::vector<std::string> generate_csv_data(const std::vector<field>& data,
|
||||
const std::string& delim) {
|
||||
(void)delim;
|
||||
using setup = ss::setup<Ts...>;
|
||||
constexpr static auto escape = '\\';
|
||||
constexpr static auto quote = '"';
|
||||
constexpr static auto space = ' ';
|
||||
constexpr static auto new_line = '\n';
|
||||
constexpr static auto helper0 = '#';
|
||||
constexpr static auto helper1 = '$';
|
||||
// constexpr static auto helper3 = '&';
|
||||
|
||||
std::vector<std::string> output;
|
||||
|
||||
if (setup::escape::enabled && setup::quote::enabled) {
|
||||
for (const auto& el : data) {
|
||||
auto value = el.value;
|
||||
|
||||
replace_all2(value, {escape, quote}, {helper1});
|
||||
|
||||
bool quote_newline = rng.rand_bool();
|
||||
bool quote_spacings = rng.rand_bool();
|
||||
bool has_spaces = el.has_spaces_right || el.has_spaces_left;
|
||||
|
||||
// handle escape
|
||||
replace_all2(value, {escape}, {helper0});
|
||||
rng.rand_insert_n(value, escape, 2);
|
||||
if (!quote_newline) {
|
||||
replace_all2(value, {new_line}, {helper1});
|
||||
replace_all2(value, {helper1}, {escape, new_line});
|
||||
}
|
||||
replace_all2(value, {escape, escape}, {escape});
|
||||
replace_all2(value, {escape, helper0}, {helper0});
|
||||
replace_all2(value, {helper0, escape}, {helper0});
|
||||
replace_all2(value, {helper0}, {escape, escape});
|
||||
|
||||
replace_all2(value, {helper1}, {escape, quote});
|
||||
|
||||
replace_all2(value, {escape, quote}, {helper1});
|
||||
|
||||
if (rng.rand_bool() || quote_newline ||
|
||||
(quote_spacings && has_spaces)) {
|
||||
replace_all2(value, {quote}, {helper0});
|
||||
if (rng.rand_bool()) {
|
||||
replace_all2(value, {helper0}, {escape, quote});
|
||||
} else {
|
||||
replace_all2(value, {helper0}, {quote, quote});
|
||||
}
|
||||
value = std::string{quote} + value + std::string{quote};
|
||||
}
|
||||
|
||||
replace_all2(value, {helper1}, {escape, quote});
|
||||
|
||||
if (!quote_spacings && has_spaces) {
|
||||
replace_all2(value, {escape, space}, {helper0});
|
||||
replace_all2(value, {space}, {helper0});
|
||||
replace_all2(value, {helper0}, {escape, space});
|
||||
}
|
||||
|
||||
output.push_back(value);
|
||||
}
|
||||
} else if (setup::escape::enabled) {
|
||||
for (const auto& el : data) {
|
||||
auto value = el.value;
|
||||
|
||||
replace_all2(value, {escape}, {helper0});
|
||||
rng.rand_insert_n(value, escape, 3);
|
||||
replace_all2(value, {new_line}, {helper1});
|
||||
replace_all2(value, {helper1}, {escape, new_line});
|
||||
|
||||
replace_all2(value, {escape, escape}, {escape});
|
||||
replace_all2(value, {escape, helper0}, {helper0});
|
||||
|
||||
replace_all2(value, {helper0, escape}, {helper0});
|
||||
replace_all2(value, {helper0}, {escape, escape});
|
||||
|
||||
if (setup::trim_right::enabled || setup::trim_left::enabled) {
|
||||
// escape space
|
||||
replace_all2(value, {escape, space}, {helper0});
|
||||
replace_all2(value, {space}, {helper0});
|
||||
replace_all2(value, {helper0}, {escape, space});
|
||||
}
|
||||
|
||||
output.push_back(value);
|
||||
}
|
||||
} else if (setup::quote::enabled) {
|
||||
for (const auto& el : data) {
|
||||
auto value = el.value;
|
||||
if (rng.rand_bool() || el.has_new_line || el.has_spaces_left ||
|
||||
el.has_spaces_right) {
|
||||
replace_all2(value, {quote}, {helper0});
|
||||
replace_all2(value, {helper0}, {quote, quote});
|
||||
value = std::string{quote} + value + std::string{quote};
|
||||
}
|
||||
output.push_back(value);
|
||||
}
|
||||
} else {
|
||||
for (const auto& el : data) {
|
||||
output.push_back(el.value);
|
||||
}
|
||||
}
|
||||
|
||||
if (setup::trim_right::enabled) {
|
||||
for (auto& el : output) {
|
||||
size_t n = rng.rand();
|
||||
for (size_t i = 0; i < n % 3; ++i) {
|
||||
el = el + " ";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (setup::trim_left::enabled) {
|
||||
for (auto& el : output) {
|
||||
size_t n = rng.rand();
|
||||
for (size_t i = 0; i < n % 3; ++i) {
|
||||
el = " " + el;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
void write_to_file(const std::vector<std::string>& data,
|
||||
const std::string& delim, const std::string& file_name) {
|
||||
std::ofstream out{file_name, std::ios_base::app};
|
||||
std::string line;
|
||||
for (size_t i = 0; i < data.size(); ++i) {
|
||||
line += data[i];
|
||||
if (i != data.size() - 1) {
|
||||
line += delim;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
line += "\n";
|
||||
#else
|
||||
line += "\r\n";
|
||||
#endif
|
||||
|
||||
out << line;
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
void test_combinations(const std::vector<column>& input_data,
|
||||
const std::string& delim, bool include_header) {
|
||||
// TODO test without string_error
|
||||
using setup = ss::setup<Ts..., ss::string_error>;
|
||||
|
||||
unique_file_name f{"test_parser2"};
|
||||
std::vector<std::vector<field>> expected_data;
|
||||
std::vector<std::string> header;
|
||||
std::vector<field> field_header;
|
||||
|
||||
for (const auto& el : input_data) {
|
||||
header.push_back(el.header);
|
||||
field_header.push_back(field{el.header});
|
||||
}
|
||||
|
||||
if (include_header) {
|
||||
auto header_data = generate_csv_data<Ts...>(field_header, delim);
|
||||
write_to_file(header_data, delim, f.name);
|
||||
}
|
||||
|
||||
std::vector<int> layout;
|
||||
size_t n = 1 + rng.rand() % 10;
|
||||
|
||||
for (size_t i = 0; i < input_data.size(); ++i) {
|
||||
layout.push_back(i);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
std::vector<field> raw_data;
|
||||
for (const auto& el : input_data) {
|
||||
const auto& fields = el.fields;
|
||||
if (fields.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
raw_data.push_back(fields[rng.rand_index(fields)]);
|
||||
}
|
||||
|
||||
expected_data.push_back(raw_data);
|
||||
auto data = generate_csv_data<Ts...>(raw_data, delim);
|
||||
write_to_file(data, delim, f.name);
|
||||
|
||||
std::cout << "[.";
|
||||
for (const auto& el : data) {
|
||||
std::cout << el << '.';
|
||||
}
|
||||
std::cout << "]" << std::endl;
|
||||
}
|
||||
|
||||
auto layout_combinations = vector_combinations(layout, layout.size());
|
||||
|
||||
auto remove_duplicates = [](const auto& vec) {
|
||||
std::vector<int> unique_vec;
|
||||
std::unordered_set<int> vec_set;
|
||||
for (const auto& el : vec) {
|
||||
if (vec_set.find(el) == vec_set.end()) {
|
||||
vec_set.insert(el);
|
||||
unique_vec.push_back(el);
|
||||
}
|
||||
}
|
||||
|
||||
return unique_vec;
|
||||
};
|
||||
|
||||
std::vector<std::vector<int>> unique_layout_combinations;
|
||||
for (const auto& layout : layout_combinations) {
|
||||
unique_layout_combinations.push_back(remove_duplicates(layout));
|
||||
}
|
||||
|
||||
if (!include_header) {
|
||||
unique_layout_combinations.clear();
|
||||
unique_layout_combinations.push_back(layout);
|
||||
}
|
||||
|
||||
for (const auto& layout : unique_layout_combinations) {
|
||||
ss::parser<setup> p{f.name, delim};
|
||||
|
||||
if (include_header) {
|
||||
std::vector<std::string> fields;
|
||||
for (const auto& index : layout) {
|
||||
fields.push_back(header[index]);
|
||||
}
|
||||
|
||||
p.use_fields(fields);
|
||||
|
||||
if (!p.valid()) {
|
||||
std::cout << p.error_msg() << std::endl;
|
||||
}
|
||||
|
||||
REQUIRE(p.valid());
|
||||
}
|
||||
|
||||
auto check_error = [&p] {
|
||||
CHECK(p.valid());
|
||||
if (!p.valid()) {
|
||||
std::cout << p.error_msg() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
int num_columns = layout.size();
|
||||
for (size_t i = 0; i < n + 1; ++i) {
|
||||
switch (num_columns) {
|
||||
case 1: {
|
||||
auto s0 = p.template get_next<std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
// std::cout << s0 << std::endl;
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
auto [s0, s1] = p.template get_next<std::string, std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
// std::cout << s0 << ' ' << s1 << std::endl;
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
CHECK(s1 == expected_data[i][layout[1]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
auto [s0, s1, s2] =
|
||||
p.template get_next<std::string, std::string,
|
||||
std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
// std::cout << s0 << ' ' << s1 << ' ' << s2 << std::endl;
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
CHECK(s1 == expected_data[i][layout[1]].value);
|
||||
CHECK(s2 == expected_data[i][layout[2]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
auto [s0, s1, s2, s3] =
|
||||
p.template get_next<std::string, std::string, std::string,
|
||||
std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
/*
|
||||
std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3
|
||||
<< std::endl;
|
||||
*/
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
CHECK(s1 == expected_data[i][layout[1]].value);
|
||||
CHECK(s2 == expected_data[i][layout[2]].value);
|
||||
CHECK(s3 == expected_data[i][layout[3]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
auto [s0, s1, s2, s3, s4] =
|
||||
p.template get_next<std::string, std::string, std::string,
|
||||
std::string, std::string>();
|
||||
if (i < n) {
|
||||
check_error();
|
||||
// std::cout << s0 << ' ' << s1 << ' ' << s2 << ' ' << s3
|
||||
// << ' ' << s4 << std::endl;
|
||||
CHECK(s0 == expected_data[i][layout[0]].value);
|
||||
CHECK(s1 == expected_data[i][layout[1]].value);
|
||||
CHECK(s2 == expected_data[i][layout[2]].value);
|
||||
CHECK(s3 == expected_data[i][layout[3]].value);
|
||||
CHECK(s4 == expected_data[i][layout[4]].value);
|
||||
} else {
|
||||
CHECK(p.eof());
|
||||
CHECK(!p.valid());
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
FAIL(("Invalid number of columns: " +
|
||||
std::to_string(num_columns)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO rename
|
||||
template <typename... Ts>
|
||||
void test_combinations_impl() {
|
||||
column ints0 =
|
||||
make_column<Ts...>("ints0", {field{123}, field{45}, field{6}});
|
||||
column ints1 =
|
||||
make_column<Ts...>("ints1", {field{123}, field{45}, field{6}});
|
||||
column ints2 =
|
||||
make_column<Ts...>("ints2", {field{123}, field{45}, field{6}});
|
||||
|
||||
column floats0 =
|
||||
make_column<Ts...>("floats0", {field{1.23}, field{456.7}, field{0.8},
|
||||
field{910}, field{123456789.987654321}});
|
||||
column floats1 =
|
||||
make_column<Ts...>("floats1", {field{1.23}, field{456.7}, field{0.8},
|
||||
field{910}, field{123456789.987654321}});
|
||||
column floats2 =
|
||||
make_column<Ts...>("floats2", {field{1.23}, field{456.7}, field{0.8},
|
||||
field{910}, field{123456789.987654321}});
|
||||
|
||||
column strings0 =
|
||||
make_column<Ts...>("strings0", {field{"just"}, field{"some"},
|
||||
field{"random"}, field{"string"}});
|
||||
|
||||
column strings1 =
|
||||
make_column<Ts...>("strings1", {field{"st\"rings"}, field{"w\"\"ith"},
|
||||
field{"qu\"otes\\"}, field{"\\a\\n\\d"},
|
||||
field{"escapes\""}});
|
||||
|
||||
#ifdef _WIN32
|
||||
column strings2 =
|
||||
make_column<Ts...>("strings2", {field{" with "}, field{" spaces"},
|
||||
field{"and "}, field{"\r\nnew"},
|
||||
field{" \r\nlines"},
|
||||
field{" a\r\n\r\nn\r\n\r\nd "},
|
||||
field{" \r\nso\r\n "}, field{"on"}});
|
||||
#else
|
||||
column strings2 =
|
||||
make_column<Ts...>("strings2",
|
||||
{field{" with "}, field{" spaces"},
|
||||
field{"and "}, field{"\nnew"}, field{" \nlines"},
|
||||
field{" a\n\nn\n\nd "}, field{" \nso\n "},
|
||||
field{"on"}});
|
||||
#endif
|
||||
|
||||
auto columns0 = std::vector{ints0, strings0, floats0, strings1, strings2};
|
||||
auto columns1 = std::vector{strings2, strings1, floats0, strings0, ints0};
|
||||
auto columns2 = std::vector{floats0, strings1, ints0, strings2, strings0};
|
||||
auto columns3 = std::vector{ints0, ints1, ints2};
|
||||
auto columns4 = std::vector{floats0, floats1, floats2};
|
||||
auto columns5 = std::vector{strings1, strings2};
|
||||
auto columns6 = std::vector{strings1};
|
||||
auto columns7 = std::vector{strings2};
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
for (const auto& delimiter : {",", "-", "--"}) {
|
||||
for (const auto& columns :
|
||||
{columns0, columns1, columns2, columns3, columns4, columns5,
|
||||
columns6, columns7}) {
|
||||
test_combinations<Ts...>(columns, delimiter, false);
|
||||
test_combinations<Ts...>(columns, delimiter, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} /* namespace */
|
||||
|
||||
TEST_CASE("parser test various cases version 2") {
|
||||
// TODO handle crlf
|
||||
using quote = ss::quote<'"'>;
|
||||
using escape = ss::escape<'\\'>;
|
||||
using trim = ss::trim<' '>;
|
||||
using triml = ss::trim_left<' '>;
|
||||
using trimr = ss::trim_right<' '>;
|
||||
using multiline = ss::multiline;
|
||||
|
||||
test_combinations_impl<>();
|
||||
test_combinations_impl<trim>();
|
||||
test_combinations_impl<triml>();
|
||||
test_combinations_impl<trimr>();
|
||||
/* TODO uncomment
|
||||
test_combinations_impl<escape>();
|
||||
test_combinations_impl<escape, trim>();
|
||||
test_combinations_impl<escape, triml>();
|
||||
test_combinations_impl<escape, trimr>();
|
||||
|
||||
test_combinations_impl<quote>();
|
||||
test_combinations_impl<quote, trim>();
|
||||
test_combinations_impl<quote, triml>();
|
||||
test_combinations_impl<quote, trimr>();
|
||||
|
||||
test_combinations_impl<escape, quote>();
|
||||
test_combinations_impl<escape, quote, trim>();
|
||||
test_combinations_impl<escape, quote, triml>();
|
||||
test_combinations_impl<escape, quote, trimr>();
|
||||
|
||||
test_combinations_impl<escape, multiline>();
|
||||
test_combinations_impl<escape, multiline, trim>();
|
||||
test_combinations_impl<escape, multiline, triml>();
|
||||
test_combinations_impl<escape, multiline, trimr>();
|
||||
|
||||
test_combinations_impl<quote, multiline>();
|
||||
test_combinations_impl<quote, multiline, trim>();
|
||||
test_combinations_impl<quote, multiline, triml>();
|
||||
test_combinations_impl<quote, multiline, trimr>();
|
||||
*/
|
||||
|
||||
test_combinations_impl<quote, escape, multiline>();
|
||||
test_combinations_impl<quote, escape, multiline, trim>();
|
||||
test_combinations_impl<quote, escape, multiline, triml>();
|
||||
test_combinations_impl<quote, escape, multiline, trimr>();
|
||||
}
|
@ -153,7 +153,8 @@ make_combinations(const std::vector<std::string>& input,
|
||||
using matches_type = std::vector<std::pair<case_type, std::string>>;
|
||||
|
||||
template <typename... Matchers>
|
||||
void test_combinations(matches_type& matches, std::vector<std::string> delims) {
|
||||
static inline void test_combinations(matches_type& matches,
|
||||
std::vector<std::string> delims) {
|
||||
|
||||
ss::splitter<Matchers...> s;
|
||||
ss::splitter<Matchers..., ss::throw_on_error> st;
|
||||
@ -520,7 +521,8 @@ TEST_CASE("splitter test error mode") {
|
||||
}
|
||||
|
||||
template <typename Splitter>
|
||||
auto expect_unterminated_quote(Splitter& s, const std::string& line) {
|
||||
static inline auto expect_unterminated_quote(Splitter& s,
|
||||
const std::string& line) {
|
||||
try {
|
||||
auto vec = s.split(buff(line.c_str()));
|
||||
CHECK(s.valid());
|
||||
|
Loading…
Reference in New Issue
Block a user