Merge pull request #5 from red0124/feature/quote_escape_trim

Feature/quote escape trim
This commit is contained in:
red0124 2021-02-06 01:49:37 +01:00 committed by GitHub
commit 5cd458e2bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 1676 additions and 153 deletions

View File

@ -48,6 +48,7 @@ Bill (Heath) Gates 65 3.3
* Works on any type * Works on any type
* Easy to use * Easy to use
* No exceptions * No exceptions
* Works with quotes, escapes and spacings
* Columns and rows can be ignored * Columns and rows can be ignored
* Works with any type of delimiter * Works with any type of delimiter
* Can return whole objects composed of converted values * Can return whole objects composed of converted values
@ -216,6 +217,15 @@ inline bool ss::extract(const char* begin, const char* end, shape& dst) {
``` ```
The shape enum will be used in an example below. The **inline** is there just to prevent multiple definition errors. The function returns **true** if the conversion was a success, and **false** otherwise. The function uses **const char*** begin and end for performance reasons. The shape enum will be used in an example below. The **inline** is there just to prevent multiple definition errors. The function returns **true** if the conversion was a success, and **false** otherwise. The function uses **const char*** begin and end for performance reasons.
## Quoting
Not yet documented.
## Escaping
Not yet documented.
## Spacing
Not yet documented.
## Error handling ## Error handling
Detailed error messages can be accessed via the **error_msg** method, and to enable them the error mode has to be changed to **error_mode::error_string** using the **set_error_mode** method: Detailed error messages can be accessed via the **error_msg** method, and to enable them the error mode has to be changed to **error_mode::error_string** using the **set_error_mode** method:

View File

@ -1,8 +1,8 @@
#pragma once #pragma once
#include "extract.hpp" #include "extract.hpp"
#include "function_traits.hpp" #include "function_traits.hpp"
#include "restrictions.hpp" #include "restrictions.hpp"
#include "splitter.hpp"
#include "type_traits.hpp" #include "type_traits.hpp"
#include <string> #include <string>
#include <type_traits> #include <type_traits>
@ -21,7 +21,7 @@ INIT_HAS_METHOD(error)
// eg. no_validator_tup_t<int, ss::nx<char, 'A', 'B'>> <=> std::tuple<int, char> // eg. no_validator_tup_t<int, ss::nx<char, 'A', 'B'>> <=> std::tuple<int, char>
// where ss::nx<char, 'A', 'B'> is a validator '(n)one e(x)cept' which // where ss::nx<char, 'A', 'B'> is a validator '(n)one e(x)cept' which
// checks if the returned character is either 'A' or 'B', returns error if not // checks if the returned character is either 'A' or 'B', returns error if not
// additionaly if one element is left in the pack, it will be unwraped from // additionally if one element is left in the pack, it will be unwrapped from
// the tuple eg. no_void_validator_tup_t<int> <=> int instead of std::tuple<int> // the tuple eg. no_void_validator_tup_t<int> <=> int instead of std::tuple<int>
template <typename T, typename U = void> template <typename T, typename U = void>
struct no_validator; struct no_validator;
@ -103,34 +103,31 @@ struct tied_class {
template <typename... Ts> template <typename... Ts>
constexpr bool tied_class_v = tied_class<Ts...>::value; constexpr bool tied_class_v = tied_class<Ts...>::value;
// the error can be set inside a string, or a bool
enum class error_mode { error_string, error_bool };
//////////////// ////////////////
// converter // converter
//////////////// ////////////////
template <typename... Matchers>
class converter { class converter {
using string_range = std::pair<const char*, const char*>; constexpr static auto default_delimiter = ",";
constexpr static auto default_delimiter = ','; using line_ptr_type = typename splitter<Matchers...>::line_ptr_type;
public: public:
using split_input = std::vector<string_range>;
// parses line with given delimiter, returns a 'T' object created with // parses line with given delimiter, returns a 'T' object created with
// extracted values of type 'Ts' // extracted values of type 'Ts'
template <typename T, typename... Ts> template <typename T, typename... Ts>
T convert_object(const char* const line, const std::string& delim = "") { T convert_object(line_ptr_type line,
const std::string& delim = default_delimiter) {
return to_object<T>(convert<Ts...>(line, delim)); return to_object<T>(convert<Ts...>(line, delim));
} }
// parses line with given delimiter, returns tuple of objects with // parses line with given delimiter, returns tuple of objects with
// extracted values of type 'Ts' // extracted values of type 'Ts'
template <typename... Ts> template <typename... Ts>
no_void_validator_tup_t<Ts...> convert(const char* const line, no_void_validator_tup_t<Ts...> convert(
const std::string& delim = "") { line_ptr_type line, const std::string& delim = default_delimiter) {
input_ = split(line, delim); split(line, delim);
return convert<Ts...>(input_); return convert<Ts...>(splitter_.split_input_);
} }
// parses already split line, returns 'T' object with extracted values // parses already split line, returns 'T' object with extracted values
@ -139,6 +136,12 @@ public:
return to_object<T>(convert<Ts...>(elems)); return to_object<T>(convert<Ts...>(elems));
} }
// same as above, but uses cached split line
template <typename T, typename... Ts>
T convert_object() {
return to_object<T>(convert<Ts...>());
}
// parses already split line, returns either a tuple of objects with // parses already split line, returns either a tuple of objects with
// parsed values (returns raw element (no tuple) if Ts is empty), or if // parsed values (returns raw element (no tuple) if Ts is empty), or if
// one argument is given which is a class which has a tied // one argument is given which is a class which has a tied
@ -163,35 +166,53 @@ public:
} }
} }
// same as above, but uses cached split line
template <typename T, typename... Ts>
no_void_validator_tup_t<T, Ts...> convert() {
return convert<T, Ts...>(splitter_.split_input_);
}
bool valid() const { bool valid() const {
return (error_mode_ == error_mode::error_string) ? string_error_.empty() return (error_mode_ == error_mode::error_string) ? string_error_.empty()
: bool_error_ == false; : bool_error_ == false;
} }
const std::string& error_msg() const { return string_error_; } bool unterminated_quote() const {
return splitter_.unterminated_quote();
void set_error_mode(error_mode mode) { error_mode_ = mode; }
// 'splits' string by given delimiter, returns vector of pairs which
// contain the beginings and the ends of each column of the string
const split_input& split(const char* const line,
const std::string& delim = "") {
input_.clear();
if (line[0] == '\0') {
return input_;
} }
switch (delim.size()) { const std::string& error_msg() const {
case 0: return string_error_;
return split_impl(line, ','); }
case 1:
return split_impl(line, delim[0]); void set_error_mode(error_mode mode) {
default: splitter_.set_error_mode(mode);
return split_impl(line, delim, delim.size()); error_mode_ = mode;
}; }
// 'splits' string by given delimiter, returns vector of pairs which
// contain the beginnings and the ends of each column of the string
const split_input& split(line_ptr_type line,
const std::string& delim = default_delimiter) {
splitter_.split_input_.clear();
if (line[0] == '\0') {
return splitter_.split_input_;
}
return splitter_.split(line, delim);
} }
private: private:
////////////////
// resplit
////////////////
const split_input& resplit(line_ptr_type new_line, ssize_t new_size,
const std::string& delim = default_delimiter) {
return splitter_.resplit(new_line, new_size, delim);
}
//////////////// ////////////////
// error // error
//////////////// ////////////////
@ -212,6 +233,15 @@ private:
return error; return error;
} }
void set_error_unterminated_quote() {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append(splitter_.error_msg());
} else {
bool_error_ = true;
}
}
void set_error_invalid_conversion(const string_range msg, size_t pos) { void set_error_invalid_conversion(const string_range msg, size_t pos) {
if (error_mode_ == error_mode::error_string) { if (error_mode_ == error_mode::error_string) {
string_error_.clear(); string_error_.clear();
@ -252,11 +282,19 @@ private:
template <typename... Ts> template <typename... Ts>
no_void_validator_tup_t<Ts...> convert_impl(const split_input& elems) { no_void_validator_tup_t<Ts...> convert_impl(const split_input& elems) {
clear_error(); clear_error();
if (!splitter_.valid()) {
set_error_unterminated_quote();
no_void_validator_tup_t<Ts...> ret{}; no_void_validator_tup_t<Ts...> ret{};
if (sizeof...(Ts) != elems.size()) {
set_error_number_of_colums(sizeof...(Ts), elems.size());
return ret; return ret;
} }
if (sizeof...(Ts) != elems.size()) {
set_error_number_of_colums(sizeof...(Ts), elems.size());
no_void_validator_tup_t<Ts...> ret{};
return ret;
}
return extract_tuple<Ts...>(elems); return extract_tuple<Ts...>(elems);
} }
@ -267,37 +305,6 @@ private:
return convert_impl<Ts...>(elems); return convert_impl<Ts...>(elems);
} }
////////////////
// substring
////////////////
template <typename Delim>
const split_input& split_impl(const char* const line, Delim delim,
size_t delim_size = 1) {
auto range = substring(line, delim);
input_.push_back(range);
while (range.second[0] != '\0') {
range = substring(range.second + delim_size, delim);
input_.push_back(range);
}
return input_;
}
bool no_match(const char* end, char delim) const { return *end != delim; }
bool no_match(const char* end, const std::string& delim) const {
return strncmp(end, delim.c_str(), delim.size()) != 0;
}
template <typename Delim>
string_range substring(const char* const begin, Delim delim) const {
const char* end;
for (end = begin; *end != '\0' && no_match(end, delim); ++end)
;
return string_range{begin, end};
}
//////////////// ////////////////
// conversion // conversion
//////////////// ////////////////
@ -309,6 +316,11 @@ private:
return; return;
} }
if constexpr (std::is_same_v<T, std::string>) {
extract(msg.first, msg.second, dst);
return;
}
if (!extract(msg.first, msg.second, dst)) { if (!extract(msg.first, msg.second, dst)) {
set_error_invalid_conversion(msg, pos); set_error_invalid_conversion(msg, pos);
return; return;
@ -353,7 +365,7 @@ private:
no_void_validator_tup_t<Ts...> extract_tuple(const split_input& elems) { no_void_validator_tup_t<Ts...> extract_tuple(const split_input& elems) {
static_assert(!all_of<std::is_void, Ts...>::value, static_assert(!all_of<std::is_void, Ts...>::value,
"at least one parameter must be non void"); "at least one parameter must be non void");
no_void_validator_tup_t<Ts...> ret; no_void_validator_tup_t<Ts...> ret{};
extract_multiple<0, 0, Ts...>(ret, elems); extract_multiple<0, 0, Ts...>(ret, elems);
return ret; return ret;
} }
@ -362,21 +374,13 @@ private:
// members // members
//////////////// ////////////////
std::vector<string_range> input_;
std::string string_error_; std::string string_error_;
bool bool_error_; bool bool_error_;
enum error_mode error_mode_ { error_mode::error_bool }; enum error_mode error_mode_ { error_mode::error_bool };
splitter<Matchers...> splitter_;
template <typename ...>
friend class parser;
}; };
template <>
inline void converter::extract_one<std::string>(std::string& dst,
const string_range msg,
size_t) {
if (!valid()) {
return;
}
extract(msg.first, msg.second, dst);
}
} /* ss */ } /* ss */

View File

@ -33,6 +33,7 @@ std::enable_if_t<std::is_floating_point_v<T>, T> pow10(int n) {
return ret; return ret;
} }
// TODO not working with large number of digits
template <typename T> template <typename T>
std::enable_if_t<std::is_floating_point_v<T>, std::optional<T>> to_num( std::enable_if_t<std::is_floating_point_v<T>, std::optional<T>> to_num(
const char* begin, const char* const end) { const char* begin, const char* const end) {

View File

@ -9,13 +9,14 @@
#include <string> #include <string>
#include <vector> #include <vector>
// TODO rule of 5-3-1
// TODO threads
namespace ss { namespace ss {
struct none {}; template <typename... Matchers>
template <typename...>
class composite;
class parser { class parser {
struct none {};
public: public:
parser(const std::string& file_name, const std::string& delimiter) parser(const std::string& file_name, const std::string& delimiter)
: file_name_{file_name}, delim_{delimiter}, : file_name_{file_name}, delim_{delimiter},
@ -41,7 +42,7 @@ public:
void set_error_mode(error_mode mode) { void set_error_mode(error_mode mode) {
error_mode_ = mode; error_mode_ = mode;
converter_.set_error_mode(mode); reader_.set_error_mode(mode);
} }
const std::string& error_msg() const { const std::string& error_msg() const {
@ -53,7 +54,7 @@ public:
} }
bool ignore_next() { bool ignore_next() {
return buff_.read(file_); return reader_.read(file_);
} }
template <typename T, typename... Ts> template <typename T, typename... Ts>
@ -63,17 +64,16 @@ public:
template <typename T, typename... Ts> template <typename T, typename... Ts>
no_void_validator_tup_t<T, Ts...> get_next() { no_void_validator_tup_t<T, Ts...> get_next() {
buff_.update(); reader_.update();
clear_error(); clear_error();
if (eof_) { if (eof_) {
set_error_eof_reached(); set_error_eof_reached();
return {}; return {};
} }
split_input_ = converter_.split(buff_.get(), delim_); auto value = reader_.get_converter().template convert<T, Ts...>();
auto value = converter_.convert<T, Ts...>(split_input_);
if (!converter_.valid()) { if (!reader_.get_converter().valid()) {
set_error_invalid_conversion(); set_error_invalid_conversion();
} }
@ -162,8 +162,8 @@ public:
no_void_validator_tup_t<U, Us...> try_same() { no_void_validator_tup_t<U, Us...> try_same() {
parser_.clear_error(); parser_.clear_error();
auto value = auto value =
parser_.converter_.convert<U, Us...>(parser_.split_input_); parser_.reader_.get_converter().template convert<U, Us...>();
if (!parser_.converter_.valid()) { if (!parser_.reader_.get_converter().valid()) {
parser_.set_error_invalid_conversion(); parser_.set_error_invalid_conversion();
} }
return value; return value;
@ -192,9 +192,6 @@ public:
} }
private: private:
template <typename...>
friend class composite;
// tries to invoke the given function (see below), if the function // tries to invoke the given function (see below), if the function
// returns a value which can be used as a conditional, and it returns // returns a value which can be used as a conditional, and it returns
// false, the function sets an error, and allows the invoke of the // false, the function sets an error, and allows the invoke of the
@ -249,44 +246,146 @@ private:
// line reading // line reading
//////////////// ////////////////
class buffer { class reader {
char* buffer_{nullptr}; char* buffer_{nullptr};
char* new_buffer_{nullptr}; char* next_line_buffer_{nullptr};
size_t size_{0}; char* helper_buffer_{nullptr};
public: converter<Matchers...> converter_;
~buffer() { converter<Matchers...> next_line_converter_;
free(buffer_);
free(new_buffer_); size_t size_{0};
size_t helper_size_{0};
const std::string& delim_;
bool crlf;
bool escaped_eol(size_t size) {
if constexpr (setup<Matchers...>::escape::enabled) {
const char* curr;
for (curr = next_line_buffer_ + size - 1;
curr >= next_line_buffer_ &&
setup<Matchers...>::escape::match(*curr);
--curr) {
}
return (next_line_buffer_ - curr + size) % 2 == 0;
} }
bool read(FILE* file) {
ssize_t size = getline(&new_buffer_, &size_, file);
size_t string_end = size - 1;
if (size == -1) {
return false; return false;
} }
if (size >= 2 && new_buffer_[size - 2] == '\r') { bool unterminated_quote() {
string_end--; if constexpr (ss::setup<Matchers...>::quote::enabled) {
if (next_line_converter_.unterminated_quote()) {
return true;
}
}
return false;
} }
new_buffer_[string_end] = '\0'; void undo_remove_eol(size_t& string_end) {
if (crlf) {
std::copy_n("\r\n\0", 3, next_line_buffer_ + string_end);
string_end += 2;
} else {
std::copy_n("\n\0", 2, next_line_buffer_ + string_end);
string_end += 1;
}
}
size_t remove_eol(char*& buffer, size_t size) {
size_t new_size = size - 1;
if (size >= 2 && buffer[size - 2] == '\r') {
crlf = true;
new_size--;
} else {
crlf = false;
}
buffer[new_size] = '\0';
return new_size;
}
void realloc_concat(char*& first, size_t& first_size,
const char* const second, size_t second_size) {
first = static_cast<char*>(realloc(static_cast<void*>(first),
first_size + second_size + 2));
std::copy_n(second, second_size + 1, first + first_size);
first_size += second_size;
}
bool append_line(FILE* file, char*& dst_buffer, size_t& dst_size) {
undo_remove_eol(dst_size);
ssize_t ssize = getline(&helper_buffer_, &helper_size_, file);
if (ssize == -1) {
return false;
}
size_t size = remove_eol(helper_buffer_, ssize);
realloc_concat(dst_buffer, dst_size, helper_buffer_, size);
return true; return true;
} }
const char* get() const { public:
reader(const std::string& delimiter) : delim_{delimiter} {
}
~reader() {
free(buffer_);
free(next_line_buffer_);
free(helper_buffer_);
}
bool read(FILE* file) {
ssize_t ssize = getline(&next_line_buffer_, &size_, file);
if (ssize == -1) {
return false;
}
size_t size = remove_eol(next_line_buffer_, ssize);
while (escaped_eol(size)) {
if (!append_line(file, next_line_buffer_, size)) {
return false;
}
}
next_line_converter_.split(next_line_buffer_, delim_);
while (unterminated_quote()) {
if (!append_line(file, next_line_buffer_, size)) {
return false;
}
next_line_converter_.resplit(next_line_buffer_, size);
}
return true;
}
void set_error_mode(error_mode mode) {
converter_.set_error_mode(mode);
next_line_converter_.set_error_mode(mode);
}
converter<Matchers...>& get_converter() {
return converter_;
}
const char* get_buffer() const {
return buffer_; return buffer_;
} }
void update() { void update() {
std::swap(buffer_, new_buffer_); std::swap(buffer_, next_line_buffer_);
std::swap(converter_, next_line_converter_);
} }
}; };
void read_line() { void read_line() {
eof_ = !buff_.read(file_); eof_ = !reader_.read(file_);
++line_number_; ++line_number_;
} }
@ -326,9 +425,9 @@ private:
.append(" ") .append(" ")
.append(std::to_string(line_number_)) .append(std::to_string(line_number_))
.append(": ") .append(": ")
.append(converter_.error_msg()) .append(reader_.get_converter().error_msg())
.append(": \"") .append(": \"")
.append(buff_.get()) .append(reader_.get_buffer())
.append("\""); .append("\"");
} else { } else {
bool_error_ = true; bool_error_ = true;
@ -344,10 +443,8 @@ private:
std::string string_error_; std::string string_error_;
bool bool_error_{false}; bool bool_error_{false};
error_mode error_mode_{error_mode::error_bool}; error_mode error_mode_{error_mode::error_bool};
converter converter_;
converter::split_input split_input_;
FILE* file_{nullptr}; FILE* file_{nullptr};
buffer buff_; reader reader_{delim_};
size_t line_number_{0}; size_t line_number_{0};
bool eof_{false}; bool eof_{false};
}; };

111
include/ss/setup.hpp Normal file
View File

@ -0,0 +1,111 @@
#pragma once
#include "type_traits.hpp"
#include <array>
namespace ss {
template <char... Cs>
struct matcher {
private:
template <char X, char... Xs>
static bool match_impl(char c) {
if constexpr (sizeof...(Xs) != 0) {
return (c == X) || match_impl<Xs...>(c);
}
return (c == X);
}
constexpr static bool contains_string_terminator() {
for (const auto& match : matches) {
if (match == '\0') {
return false;
}
}
return true;
}
public:
static bool match(char c) {
return match_impl<Cs...>(c);
}
constexpr static bool enabled = true;
constexpr static std::array<char, sizeof...(Cs)> matches{Cs...};
static_assert(contains_string_terminator(),
"string terminator cannot be used as a match character");
};
template <typename FirstMatcher, typename SecondMatcher>
constexpr bool matches_intersect() {
for (const auto& first_match : FirstMatcher::matches) {
for (const auto& second_match : SecondMatcher::matches) {
if (first_match != '\0' && first_match == second_match) {
return true;
}
}
}
return false;
}
template <>
class matcher<'\0'> {
public:
constexpr static bool enabled = false;
constexpr static std::array<char, 1> matches{'\0'};
static bool match(char c) = delete;
};
template <char C>
struct quote : matcher<C> {};
template <char... Cs>
struct trim : matcher<Cs...> {};
template <char... Cs>
struct escape : matcher<Cs...> {};
template <typename T, template <char...> class Template>
struct is_instance_of_matcher {
constexpr static bool value = false;
};
template <char... Ts, template <char...> class Template>
struct is_instance_of_matcher<Template<Ts...>, Template> {
constexpr static bool value = true;
};
template <template <char...> class Matcher, typename... Ts>
struct get_matcher;
template <template <char...> class Matcher, typename T, typename... Ts>
struct get_matcher<Matcher, T, Ts...> {
using type =
typename ternary<is_instance_of_matcher<T, Matcher>::value, T,
typename get_matcher<Matcher, Ts...>::type>::type;
};
template <template <char...> class Matcher>
struct get_matcher<Matcher> {
using type = Matcher<'\0'>;
};
template <template <char...> class Matcher, typename... Ts>
using get_matcher_t = typename get_matcher<Matcher, Ts...>::type;
template <typename... Ts>
struct setup {
using quote = get_matcher_t<quote, Ts...>;
using trim = get_matcher_t<trim, Ts...>;
using escape = get_matcher_t<escape, Ts...>;
#define ASSERT_MSG "cannot have the same match character in multiple matchers"
static_assert(!matches_intersect<quote, trim>(), ASSERT_MSG);
static_assert(!matches_intersect<trim, escape>(), ASSERT_MSG);
static_assert(!matches_intersect<escape, quote>(), ASSERT_MSG);
#undef ASSERT_MSG
};
template <typename... Ts>
struct setup<setup<Ts...>> : setup<Ts...> {};
} /* ss */

388
include/ss/splitter.hpp Normal file
View File

@ -0,0 +1,388 @@
#pragma once
#include "setup.hpp"
#include "type_traits.hpp"
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <string>
#include <vector>
namespace ss {
using string_range = std::pair<const char*, const char*>;
using split_input = std::vector<string_range>;
// the error can be set inside a string, or a bool
enum class error_mode { error_string, error_bool };
template <typename... Ts>
class splitter {
private:
constexpr static auto default_delimiter = ",";
using quote = typename setup<Ts...>::quote;
using trim = typename setup<Ts...>::trim;
using escape = typename setup<Ts...>::escape;
constexpr static auto is_const_line = !quote::enabled && !escape::enabled;
public:
using line_ptr_type =
typename ternary<is_const_line, const char*, char*>::type;
bool valid() const {
return (error_mode_ == error_mode::error_string) ? string_error_.empty()
: bool_error_ == false;
}
bool unterminated_quote() const {
return unterminated_quote_;
}
const std::string& error_msg() const {
return string_error_;
}
void set_error_mode(error_mode mode) {
error_mode_ = mode;
}
const split_input& split(line_ptr_type new_line,
const std::string& delimiter = default_delimiter) {
split_input_.clear();
return resplit(new_line, -1, delimiter);
}
void adjust_ranges(const char* old_line) {
for (auto& [begin, end] : split_input_) {
begin = begin - old_line + line_;
end = end - old_line + line_;
}
}
private:
////////////////
// resplit
////////////////
const split_input& resplit(
line_ptr_type new_line, ssize_t new_size,
const std::string& delimiter = default_delimiter) {
line_ = new_line;
// resplitting, continue from last slice
if (!split_input_.empty() && unterminated_quote()) {
const auto& last = std::prev(split_input_.end());
const auto [old_line, old_begin] = *last;
size_t begin = old_begin - old_line - 1;
split_input_.pop_back();
adjust_ranges(old_line);
// safety measure
if (new_size != -1 && static_cast<size_t>(new_size) < begin) {
set_error_invalid_resplit();
return split_input_;
}
begin_ = line_ + begin;
}
return split_impl_select_delim(delimiter);
}
////////////////
// error
////////////////
void clear_error() {
string_error_.clear();
bool_error_ = false;
unterminated_quote_ = false;
}
void set_error_empty_delimiter() {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("empty delimiter");
} else {
bool_error_ = true;
}
}
void set_error_mismatched_quote(size_t n) {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("mismatched quote at position: " +
std::to_string(n));
} else {
bool_error_ = true;
}
}
void set_error_unterminated_quote() {
unterminated_quote_ = true;
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("unterminated quote");
} else {
bool_error_ = true;
}
}
void set_error_invalid_resplit() {
unterminated_quote_ = false;
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("invalid resplit, new line must be longer"
"than the end of the last slice");
} else {
bool_error_ = true;
}
}
////////////////
// matching
////////////////
bool match(const char* const curr, char delim) {
return *curr == delim;
};
bool match(const char* const curr, const std::string& delim) {
return strncmp(curr, delim.c_str(), delim.size()) == 0;
};
size_t delimiter_size(char) {
return 1;
}
size_t delimiter_size(const std::string& delim) {
return delim.size();
}
void trim_if_enabled(line_ptr_type& curr) {
if constexpr (trim::enabled) {
while (trim::match(*curr)) {
++curr;
}
}
}
void shift_if_escaped(line_ptr_type& curr) {
if constexpr (escape::enabled) {
if (escape::match(*curr)) {
shift_and_jump_escape();
}
}
}
template <typename Delim>
std::tuple<size_t, bool> match_delimiter(line_ptr_type begin,
const Delim& delim) {
line_ptr_type end = begin;
trim_if_enabled(end);
// just spacing
if (*end == '\0') {
return {0, false};
}
// not a delimiter
if (!match(end, delim)) {
shift_if_escaped(end);
return {1 + end - begin, false};
}
end += delimiter_size(delim);
trim_if_enabled(end);
// delimiter
return {end - begin, true};
}
////////////////
// shifting
////////////////
void shift_and_set_current() {
if (escaped_ > 0) {
if constexpr (!is_const_line) {
std::copy_n(curr_ + escaped_, end_ - curr_, curr_);
}
}
curr_ = end_ - escaped_;
}
void shift_and_push() {
shift_and_set_current();
split_input_.emplace_back(begin_, curr_);
}
void shift_and_jump_escape() {
shift_and_set_current();
++end_;
++escaped_;
}
void shift_push_and_start_next(size_t n) {
shift_and_push();
begin_ = end_ + n;
}
////////////////
// split impl
////////////////
const split_input& split_impl_select_delim(
const std::string& delimiter = default_delimiter) {
clear_error();
switch (delimiter.size()) {
case 0:
set_error_empty_delimiter();
return split_input_;
case 1:
return split_impl(delimiter[0]);
default:
return split_impl(delimiter);
}
}
template <typename Delim>
const split_input& split_impl(const Delim& delim) {
if (split_input_.empty()) {
begin_ = line_;
}
trim_if_enabled(begin_);
for (done_ = false; !done_; read(delim))
;
return split_input_;
}
////////////////
// reading
////////////////
template <typename Delim>
void read(const Delim& delim) {
escaped_ = 0;
if constexpr (quote::enabled) {
if (quote::match(*begin_)) {
curr_ = end_ = ++begin_;
read_quoted(delim);
return;
}
}
curr_ = end_ = begin_;
read_normal(delim);
}
template <typename Delim>
void read_normal(const Delim& delim) {
while (true) {
auto [width, valid] = match_delimiter(end_, delim);
if (!valid) {
// not a delimiter
if (width == 0) {
// eol
shift_and_push();
done_ = true;
break;
} else {
end_ += width;
continue;
}
} else {
// found delimiter
shift_push_and_start_next(width);
break;
}
}
}
template <typename Delim>
void read_quoted(const Delim& delim) {
if constexpr (quote::enabled) {
while (true) {
if (!quote::match(*end_)) {
if constexpr (escape::enabled) {
if (escape::match(*end_)) {
shift_and_jump_escape();
++end_;
continue;
}
}
// unterminated quote error
// eg: ..."hell\0 -> quote not terminated
if (*end_ == '\0') {
set_error_unterminated_quote();
split_input_.emplace_back(line_, begin_);
done_ = true;
break;
}
++end_;
continue;
}
auto [width, valid] = match_delimiter(end_ + 1, delim);
// delimiter
if (valid) {
shift_push_and_start_next(width + 1);
break;
}
// double quote
// eg: ...,"hel""lo",... -> hel"lo
if (quote::match(end_[1])) {
shift_and_jump_escape();
++end_;
continue;
}
// not a delimiter
if (width == 0) {
// eol
// eg: ...,"hello" \0 -> hello
// eg no trim: ...,"hello"\0 -> hello
shift_and_push();
} else {
// mismatched quote
// eg: ...,"hel"lo,... -> error
set_error_mismatched_quote(end_ - line_);
split_input_.emplace_back(line_, begin_);
}
done_ = true;
break;
}
}
}
////////////////
// members
////////////////
std::string string_error_;
bool bool_error_{false};
bool unterminated_quote_{false};
enum error_mode error_mode_ { error_mode::error_bool };
line_ptr_type begin_;
line_ptr_type curr_;
line_ptr_type end_;
line_ptr_type line_;
bool done_;
size_t escaped_{0};
split_input split_input_;
template <typename ...>
friend class converter;
};
} /* ss */

View File

@ -314,6 +314,23 @@ struct is_instance_of<Template<Ts...>, Template> {
constexpr static bool value = true; constexpr static bool value = true;
}; };
////////////////
// ternary
////////////////
template <bool B, typename T, typename U>
struct ternary;
template <typename T, typename U>
struct ternary<true, T, U> {
using type = T;
};
template <typename T, typename U>
struct ternary<false, T, U> {
using type = U;
};
//////////////// ////////////////
// tuple to struct // tuple to struct
//////////////// ////////////////
@ -323,6 +340,7 @@ S to_object(std::index_sequence<Is...>, Tup&& tup) {
return {std::get<Is>(std::forward<Tup>(tup))...}; return {std::get<Is>(std::forward<Tup>(tup))...};
} }
// TODO Tup may not be a tuple ...
template <class S, class Tup> template <class S, class Tup>
S to_object(Tup&& tup) { S to_object(Tup&& tup) {
using T = std::remove_reference_t<Tup>; using T = std::remove_reference_t<Tup>;

View File

@ -2,7 +2,7 @@ project('ssp', 'cpp',
default_options : default_options :
['warning_level=3', ['warning_level=3',
'cpp_std=c++17', 'cpp_std=c++17',
'buildtype=debug']) 'buildtype=debugoptimized'])
includes = include_directories('include') includes = include_directories('include')
subdir('test') subdir('test')

View File

@ -28,7 +28,7 @@ include(doctest)
enable_testing() enable_testing()
foreach(name IN ITEMS test_parser test_converter test_extractions) foreach(name IN ITEMS test_splitter test_parser test_converter test_extractions)
add_executable("${name}" "${name}.cpp") add_executable("${name}" "${name}.cpp")
target_link_libraries("${name}" PRIVATE ssp::ssp doctest::doctest) target_link_libraries("${name}" PRIVATE ssp::ssp doctest::doctest)
target_compile_definitions("${name}" PRIVATE target_compile_definitions("${name}" PRIVATE

View File

@ -1,5 +1,6 @@
test_sources = files([ test_sources = files([
'test_main.cpp', 'test_main.cpp',
'test_splitter.cpp',
'test_converter.cpp', 'test_converter.cpp',
'test_parser.cpp', 'test_parser.cpp',
'test_extractions.cpp', 'test_extractions.cpp',

View File

@ -1,23 +1,18 @@
#include "test_helpers.hpp"
#include <algorithm> #include <algorithm>
#include <ss/converter.hpp> #include <ss/converter.hpp>
#ifdef CMAKE_GITHUB_CI
#include <doctest/doctest.h>
#else
#include <doctest.h>
#endif
TEST_CASE("testing split") { TEST_CASE("testing split") {
ss::converter c; ss::converter c;
for (const auto& [s, expected, delim] : for (const auto& [s, expected, delim] :
// clang-format off
{std::make_tuple("a,b,c,d", std::vector{"a", "b", "c", "d"}, ","), {std::make_tuple("a,b,c,d", std::vector{"a", "b", "c", "d"}, ","),
{"", {}, " "}, {"", {}, " "},
{"a,b,c", {"a", "b", "c"}, ""},
{" x x x x | x ", {" x x x x ", " x "}, "|"}, {" x x x x | x ", {" x x x x ", " x "}, "|"},
{"a::b::c::d", {"a", "b", "c", "d"}, "::"}, {"a::b::c::d", {"a", "b", "c", "d"}, "::"},
{"x\t-\ty", {"x", "y"}, "\t-\t"}, {"x\t-\ty", {"x", "y"}, "\t-\t"},
{"x", {"x"}, ","}}) { {"x", {"x"}, ","}} // clang-format on
) {
auto split = c.split(s, delim); auto split = c.split(s, delim);
CHECK(split.size() == expected.size()); CHECK(split.size() == expected.size());
for (size_t i = 0; i < split.size(); ++i) { for (size_t i = 0; i < split.size(); ++i) {
@ -120,6 +115,9 @@ TEST_CASE("testing invalid conversions") {
c.convert<int>(""); c.convert<int>("");
REQUIRE(!c.valid()); REQUIRE(!c.valid());
c.convert<int>("10", "");
REQUIRE(!c.valid());
c.convert<int, void>(""); c.convert<int, void>("");
REQUIRE(!c.valid()); REQUIRE(!c.valid());
@ -396,3 +394,71 @@ TEST_CASE("testing error mode") {
CHECK(!c.valid()); CHECK(!c.valid());
CHECK(!c.error_msg().empty()); CHECK(!c.error_msg().empty());
} }
TEST_CASE("testing converter with quotes spacing and escaping") {
{
ss::converter c;
auto tup = c.convert<std::string, std::string, std::string>(
R"("just","some","strings")");
REQUIRE(c.valid());
CHECK(tup == std::make_tuple("\"just\"", "\"some\"", "\"strings\""));
}
{
ss::converter<ss::quote<'"'>> c;
auto tup = c.convert<std::string, std::string, double, char>(
buff(R"("just",some,"12.3","a")"));
REQUIRE(c.valid());
CHECK(tup == std::make_tuple("just", "some", 12.3, 'a'));
}
{
ss::converter<ss::trim<' '>> c;
auto tup = c.convert<std::string, std::string, double, char>(
buff(R"( just , some , 12.3 ,a )"));
REQUIRE(c.valid());
CHECK(tup == std::make_tuple("just", "some", 12.3, 'a'));
}
{
ss::converter<ss::escape<'\\'>> c;
auto tup =
c.convert<std::string, std::string>(buff(R"(ju\,st,strings)"));
REQUIRE(c.valid());
CHECK(tup == std::make_tuple("ju,st", "strings"));
}
{
ss::converter<ss::escape<'\\'>, ss::trim<' '>, ss::quote<'"'>> c;
auto tup = c.convert<std::string, std::string, double, std::string>(
buff(R"( ju\,st , "so,me" , 12.34 , "str""ings")"));
REQUIRE(c.valid());
CHECK(tup == std::make_tuple("ju,st", "so,me", 12.34, "str\"ings"));
}
}
TEST_CASE("testing invalid split conversions") {
ss::converter<ss::escape<'\\'>, ss::trim<' '>, ss::quote<'"'>> c;
c.set_error_mode(ss::error_mode::error_string);
{
// mismatched quote
auto tup = c.convert<std::string, std::string, double, char>(
buff(R"( "just , some , "12.3","a" )"));
CHECK(!c.valid());
CHECK(!c.unterminated_quote());
}
{
// unterminated quote
auto tup = c.convert<std::string, std::string, double, std::string>(
buff(R"( ju\,st , "so,me" , 12.34 , "str""ings)"));
CHECK(!c.valid());
CHECK(c.unterminated_quote());
}
}

View File

@ -1,11 +1,6 @@
#include <ss/extract.hpp> #include "test_helpers.hpp"
#include <algorithm> #include <algorithm>
#include <ss/extract.hpp>
#ifdef CMAKE_GITHUB_CI
#include <doctest/doctest.h>
#else
#include <doctest.h>
#endif
constexpr auto eps = 0.000001; constexpr auto eps = 0.000001;
using ld = long double; using ld = long double;

44
test/test_helpers.hpp Normal file
View File

@ -0,0 +1,44 @@
#pragma once
#include <cstdlib>
#include <cstring>
#ifdef CMAKE_GITHUB_CI
#include <doctest/doctest.h>
#else
#include <doctest.h>
#endif
class buffer {
char* data_{nullptr};
public:
char* operator()(const char* data) {
if (data_) {
delete[] data_;
}
data_ = new char[strlen(data) + 1];
strcpy(data_, data);
return data_;
}
char* append(const char* data) {
if (data_) {
char* new_data_ = new char[strlen(data_) + strlen(data) + 1];
strcpy(new_data_, data_);
strcat(new_data_, data);
delete[] data_;
data_ = new_data_;
return data_;
} else {
return operator()(data);
}
}
~buffer() {
if (data_) {
delete[] data_;
}
}
};
[[maybe_unused]] inline buffer buff;

View File

@ -1,20 +1,18 @@
#include "test_helpers.hpp"
#include <algorithm> #include <algorithm>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
#include <ss/parser.hpp> #include <ss/parser.hpp>
#ifdef CMAKE_GITHUB_CI
#include <doctest/doctest.h>
#else
#include <doctest.h>
#endif
struct unique_file_name { struct unique_file_name {
const std::string name; const std::string name;
unique_file_name() : name{std::tmpnam(nullptr)} {} unique_file_name() : name{std::tmpnam(nullptr)} {
}
~unique_file_name() { std::filesystem::remove(name); } ~unique_file_name() {
std::filesystem::remove(name);
}
}; };
struct X { struct X {
@ -30,7 +28,9 @@ struct X {
.append(delim) .append(delim)
.append(s); .append(s);
} }
auto tied() const { return std::tie(i, d, s); } auto tied() const {
return std::tie(i, d, s);
}
}; };
template <typename T> template <typename T>
@ -56,6 +56,7 @@ TEST_CASE("testing parser") {
make_and_write(f.name, data); make_and_write(f.name, data);
{ {
ss::parser p{f.name, ","}; ss::parser p{f.name, ","};
p.set_error_mode(ss::error_mode::error_string);
std::vector<X> i; std::vector<X> i;
while (!p.eof()) { while (!p.eof()) {
@ -166,10 +167,13 @@ struct test_struct {
int i; int i;
double d; double d;
char c; char c;
auto tied() { return std::tie(i, d, c); } auto tied() {
return std::tie(i, d, c);
}
}; };
void expect_test_struct(const test_struct&) {} void expect_test_struct(const test_struct&) {
}
// various scenarios // various scenarios
TEST_CASE("testing composite conversion") { TEST_CASE("testing composite conversion") {
@ -391,7 +395,9 @@ struct my_string {
my_string() = default; my_string() = default;
~my_string() { delete[] data; } ~my_string() {
delete[] data;
}
// make sure no object is copied // make sure no object is copied
my_string(const my_string&) = delete; my_string(const my_string&) = delete;
@ -422,7 +428,9 @@ struct xyz {
my_string x; my_string x;
my_string y; my_string y;
my_string z; my_string z;
auto tied() { return std::tie(x, y, z); } auto tied() {
return std::tie(x, y, z);
}
}; };
TEST_CASE("testing the moving of parsed values") { TEST_CASE("testing the moving of parsed values") {
@ -474,8 +482,8 @@ TEST_CASE("testing the moving of parsed values") {
TEST_CASE("testing the moving of parsed composite values") { TEST_CASE("testing the moving of parsed composite values") {
// to compile is enough // to compile is enough
return; return;
ss::parser* p; ss::parser p{"", ""};
p->try_next<my_string, my_string, my_string>() p.try_next<my_string, my_string, my_string>()
.or_else<my_string, my_string, my_string, my_string>([](auto&&) {}) .or_else<my_string, my_string, my_string, my_string>([](auto&&) {})
.or_else<my_string>([](auto&) {}) .or_else<my_string>([](auto&) {})
.or_else<xyz>([](auto&&) {}) .or_else<xyz>([](auto&&) {})
@ -506,3 +514,66 @@ TEST_CASE("testing error mode") {
CHECK(!p.valid()); CHECK(!p.valid());
CHECK(!p.error_msg().empty()); CHECK(!p.error_msg().empty());
} }
std::string no_quote(const std::string& s) {
if (!s.empty() && s[0] == '"') {
return {std::next(begin(s)), std::prev(end(s))};
}
return s;
}
TEST_CASE("testing csv on multiple lines with quotes") {
unique_file_name f;
std::vector<X> data = {{1, 2, "\"x\nx\nx\""}, {3, 4, "\"y\ny\ny\""},
{5, 6, "\"z\nz\""}, {7, 8, "\"u\"\"\""},
{9, 10, "v"}, {11, 12, "\"w\n\""}};
make_and_write(f.name, data);
for (auto& [_, __, s] : data) {
s = no_quote(s);
if (s[0] == 'u') {
s = "u\"";
}
}
ss::parser<ss::quote<'"'>> p{f.name, ","};
p.set_error_mode(ss::error_mode::error_string);
std::vector<X> i;
while (!p.eof()) {
auto a = p.get_next<int, double, std::string>();
i.emplace_back(ss::to_object<X>(a));
}
CHECK(std::equal(i.begin(), i.end(), data.begin()));
}
std::string no_escape(std::string& s) {
s.erase(std::remove(begin(s), end(s), '\\'), end(s));
return s;
}
TEST_CASE("testing csv on multiple lines with escapes") {
unique_file_name f;
std::vector<X> data = {{1, 2, "x\\\nx\\\nx"}, {3, 4, "y\\\ny\\\ny"},
{5, 6, "z\\\nz"}, {7, 8, "u"},
{9, 10, "v\\\\"}, {11, 12, "w\\\n"}};
make_and_write(f.name, data);
for (auto& [_, __, s] : data) {
s = no_escape(s);
if (s == "v") {
s = "v\\";
}
}
ss::parser<ss::escape<'\\'>> p{f.name, ","};
p.set_error_mode(ss::error_mode::error_string);
std::vector<X> i;
while (!p.eof()) {
auto a = p.get_next<int, double, std::string>();
i.emplace_back(ss::to_object<X>(a));
}
CHECK(std::equal(i.begin(), i.end(), data.begin()));
}

717
test/test_splitter.cpp Normal file
View File

@ -0,0 +1,717 @@
#include "test_helpers.hpp"
#include <algorithm>
#include <cstring>
#include <iostream>
#include <ss/splitter.hpp>
namespace {
constexpr static auto combinations_size_default = 4;
size_t combinations_size = combinations_size_default;
struct set_combinations_size {
set_combinations_size(size_t size) {
combinations_size = size;
}
~set_combinations_size() {
combinations_size = combinations_size_default;
}
};
std::vector<std::string> words(const ss::split_input& input) {
std::vector<std::string> ret;
for (const auto& [begin, end] : input) {
ret.emplace_back(begin, end);
}
return ret;
}
[[maybe_unused]] std::string concat(const std::vector<std::string>& v) {
std::string ret = "[";
for (const auto& i : v) {
ret.append(i).append(",");
}
ret.back() = (']');
return ret;
}
template <typename... Ts>
size_t strings_size(const std::string& s, const Ts&... ss) {
if constexpr (sizeof...(Ts) > 0) {
return s.size() + strings_size(ss...);
}
return s.size();
}
template <typename... Ts>
void concat_to(std::string& dst, const std::string& s, const Ts&... ss) {
dst.append(s);
if constexpr (sizeof...(Ts) > 0) {
concat_to(dst, ss...);
}
}
template <typename... Ts>
std::string concat(const Ts&... ss) {
std::string ret;
ret.reserve(strings_size(ss...));
concat_to(ret, ss...);
return ret;
}
using case_type = std::vector<std::string>;
auto spaced(const case_type& input, const std::string& s) {
case_type ret = input;
for (const auto& i : input) {
ret.push_back(concat(s, i, s));
ret.push_back(concat(i, s));
ret.push_back(concat(s, i));
ret.push_back(concat(s, s, i));
ret.push_back(concat(s, s, i, s, s));
ret.push_back(concat(i, s, s));
}
return ret;
}
auto spaced(const case_type& input, const std::string& s1,
const std::string& s2) {
case_type ret = input;
for (const auto& i : input) {
ret.push_back(concat(s1, i, s2));
ret.push_back(concat(s2, i, s1));
ret.push_back(concat(s2, s2, s1, s1, i));
ret.push_back(concat(i, s1, s2, s1, s2));
ret.push_back(concat(s1, s1, s1, i, s2, s2, s2));
ret.push_back(concat(s2, s2, s2, i, s1, s1, s1));
}
return ret;
}
std::vector<std::string> combinations(const std::vector<std::string>& v,
const std::string& delim, size_t n) {
if (n <= 1) {
return v;
}
std::vector<std::string> ret;
auto inner_combinations = combinations(v, delim, n - 1);
for (const auto& i : v) {
for (const auto& j : inner_combinations) {
ret.push_back(concat(i, delim, j));
}
}
return ret;
}
std::vector<std::vector<std::string>> vector_combinations(
const std::vector<std::string>& v, size_t n) {
std::vector<std::vector<std::string>> ret;
if (n <= 1) {
for (const auto& i : v) {
ret.push_back({i});
}
return ret;
}
auto inner_combinations = vector_combinations(v, n - 1);
for (const auto& i : v) {
for (auto j : inner_combinations) {
j.insert(j.begin(), i);
ret.push_back(move(j));
}
}
return ret;
}
std::pair<std::vector<std::string>, std::vector<std::vector<std::string>>>
make_combinations(const std::vector<std::string>& input,
const std::vector<std::string>& output,
const std::string& delim) {
std::vector<std::string> lines;
std::vector<std::vector<std::string>> expectations;
for (size_t i = 0; i < combinations_size; ++i) {
auto l = combinations(input, delim, i);
lines.reserve(lines.size() + l.size());
lines.insert(lines.end(), l.begin(), l.end());
auto e = vector_combinations(output, i);
expectations.reserve(expectations.size() + e.size());
expectations.insert(expectations.end(), e.begin(), e.end());
}
return {std::move(lines), std::move(expectations)};
}
} /* namespace */
/* ********************************** */
/* ********************************** */
using matches_type = std::vector<std::pair<case_type, std::string>>;
template <typename... Matchers>
void test_combinations(matches_type& matches, std::vector<std::string> delims) {
ss::splitter<Matchers...> s;
std::vector<std::string> inputs;
std::vector<std::string> outputs;
for (const auto& [cases, e] : matches) {
for (const auto& c : cases) {
inputs.emplace_back(c);
outputs.emplace_back(e);
}
}
for (const auto& delim : delims) {
auto [lines, expectations] = make_combinations(inputs, outputs, delim);
REQUIRE(lines.size() == expectations.size());
for (size_t i = 0; i < lines.size(); ++i) {
auto vec = s.split(buff(lines[i].c_str()), delim);
CHECK(s.valid());
CHECK(words(vec) == expectations[i]);
}
}
}
TEST_CASE("testing splitter no setup") {
{
matches_type p{{{"x"}, "x"}, {{"\""}, "\""},
{{""}, ""}, {{"\n"}, "\n"},
{{"\"\""}, "\"\""}, {{"\" \\ \""}, "\" \\ \""},
{{" "}, " "}};
test_combinations(p, {",", ";", "\t", "::"});
}
}
TEST_CASE("testing splitter quote") {
case_type case1 = {R"("""")"};
case_type case2 = {R"("x""x")", R"(x"x)"};
case_type case3 = {R"("")", R"()"};
case_type case4 = {R"("x")", R"(x)"};
case_type case5 = {R"("""""")"};
case_type case6 = {R"("\")", R"(\)"};
case_type case7 = {R"("xxxxxxxxxx")", R"(xxxxxxxxxx)"};
std::vector<std::string> delims = {",", "::", " ", "\t", "\n"};
{
matches_type p{{case1, "\""}, {case2, "x\"x"}, {case3, ""},
{case4, "x"}, {case5, "\"\""}, {case6, "\\"},
{case7, "xxxxxxxxxx"}};
test_combinations<ss::quote<'"'>>(p, delims);
}
case_type case8 = {R"(",")"};
case_type case9 = {R"("x,")"};
case_type case10 = {R"(",x")"};
case_type case11 = {R"("x,x")"};
case_type case12 = {R"(",,")"};
{
matches_type p{{case1, "\""}, {case3, ""}, {case8, ","},
{case9, "x,"}, {case10, ",x"}, {case11, "x,x"},
{case12, ",,"}};
test_combinations<ss::quote<'"'>>(p, {","});
}
case_type case13 = {R"("::")"};
case_type case14 = {R"("x::")"};
case_type case15 = {R"("::x")"};
case_type case16 = {R"("x::x")"};
case_type case17 = {R"("::::")"};
{
matches_type p{{case1, "\""}, {case3, ""}, {case13, "::"},
{case14, "x::"}, {case15, "::x"}, {case16, "x::x"},
{case17, "::::"}};
test_combinations<ss::quote<'"'>>(p, {"::"});
}
}
TEST_CASE("testing splitter trim") {
auto guard = set_combinations_size(3);
case_type case1 = spaced({R"(x)"}, " ");
case_type case2 = spaced({R"(yy)"}, " ");
case_type case3 = spaced({R"(y y)"}, " ");
case_type case4 = spaced({R"()"}, " ");
std::vector<std::string> delims = {",", "::", "\t", "\n"};
{
matches_type p{{case1, "x"},
{case2, "yy"},
{case3, "y y"},
{case4, ""}};
test_combinations<ss::trim<' '>>(p, delims);
}
case_type case5 = spaced({"z"}, "\t");
case_type case6 = spaced({"ab"}, " ", "\t");
case_type case7 = spaced({"a\tb"}, " ", "\t");
case_type case8 = spaced({"a \t b"}, " ", "\t");
{
matches_type p{{case1, "x"}, {case2, "yy"}, {case3, "y y"},
{case4, ""}, {case5, "z"}, {case6, "ab"},
{case7, "a\tb"}, {case8, "a \t b"}};
test_combinations<ss::trim<' ', '\t'>>(p, {",", "::", "\n"});
}
}
TEST_CASE("testing splitter escape") {
case_type case1 = {R"(x)", R"(\x)"};
case_type case2 = {R"(xx)", R"(\xx)", R"(x\x)", R"(\x\x)"};
case_type case3 = {R"(\\)"};
std::vector<std::string> delims = {",", "::", " ", "\t", "\n"};
{
matches_type p{{case1, "x"}, {case2, "xx"}, {case3, "\\"}};
test_combinations<ss::escape<'\\'>>(p, delims);
}
case_type case4 = {R"(\,)"};
case_type case5 = {R"(x#,)"};
case_type case6 = {R"(#,x)"};
case_type case7 = {R"(x\,x)"};
{
matches_type p{{case1, "x"}, {case2, "xx"}, {case3, "\\"},
{case4, ","}, {case5, "x,"}, {case6, ",x"},
{case7, "x,x"}};
test_combinations<ss::escape<'\\', '#'>>(p, {","});
}
case_type case8 = {R"(\:\:)"};
case_type case9 = {R"(x\::x)"};
{
matches_type p{{case1, "x"},
{case2, "xx"},
{case3, "\\"},
{case8, "::"},
{case9, "x::x"}};
test_combinations<ss::escape<'\\'>>(p, {"::"});
}
}
TEST_CASE("testing splitter quote and trim") {
auto guard = set_combinations_size(3);
case_type case1 = spaced({R"("""")"}, " ");
case_type case2 = spaced({R"("x""x")", R"(x"x)"}, " ");
case_type case3 = spaced({R"("")", R"()"}, " ");
case_type case4 = spaced({R"("x")", R"(x)"}, " ");
case_type case5 = spaced({R"("""""")"}, " ");
case_type case6 = spaced({R"("\")", R"(\)"}, " ");
case_type case7 = spaced({R"("xxxxxxxxxx")", R"(xxxxxxxxxx)"}, " ");
std::vector<std::string> delims = {",", "::", "\t", "\n"};
{
matches_type p{{case1, "\""}, {case2, "x\"x"}, {case3, ""},
{case4, "x"}, {case5, "\"\""}, {case6, "\\"},
{case7, "xxxxxxxxxx"}};
test_combinations<ss::quote<'"'>, ss::trim<' '>>(p, delims);
}
case_type case8 = spaced({R"(",")"}, " ", "\t");
case_type case9 = spaced({R"("x,")"}, " ", "\t");
case_type case10 = spaced({R"(",x")"}, " ", "\t");
case_type case11 = spaced({R"("x,x")"}, " ", "\t");
case_type case12 = spaced({R"(",,")"}, " ", "\t");
{
matches_type p{{case1, "\""}, {case3, ""}, {case8, ","},
{case9, "x,"}, {case10, ",x"}, {case11, "x,x"},
{case12, ",,"}};
test_combinations<ss::quote<'"'>, ss::trim<' ', '\t'>>(p, {","});
}
}
TEST_CASE("testing splitter quote and escape") {
case_type case1 = {R"("\"")", R"(\")", R"("""")"};
case_type case2 = {R"("x\"x")", R"(x\"x)", R"(x"x)", R"("x""x")"};
case_type case3 = {R"("")", R"()"};
case_type case4 = {R"("x")", R"(x)"};
case_type case5 = {R"("\"\"")", R"("""""")", R"("\"""")", R"("""\"")"};
case_type case6 = {R"("\\")", R"(\\)"};
case_type case7 = {R"("xxxxxxxxxx")", R"(xxxxxxxxxx)"};
std::vector<std::string> delims = {",", "::", " ", "\t", "\n"};
ss::splitter<ss::quote<'"'>, ss::escape<'\\'>> s;
{
matches_type p{{case1, "\""}, {case2, "x\"x"}, {case3, ""},
{case4, "x"}, {case5, "\"\""}, {case6, "\\"},
{case7, "xxxxxxxxxx"}};
test_combinations<ss::quote<'"'>, ss::escape<'\\'>>(p, delims);
}
case_type case8 = {R"('xxxxxxxxxx')", R"(xxxxxxxxxx)"};
case_type case9 = {R"('')", R"()"};
case_type case10 = {R"('#\')", R"(#\)"};
case_type case11 = {R"('#'')", R"(#')", R"('''')"};
case_type case12 = {R"('##')", R"(##)"};
{
matches_type p{{case8, "xxxxxxxxxx"},
{case9, ""},
{case10, "\\"},
{case11, "'"},
{case12, "#"}};
test_combinations<ss::quote<'\''>, ss::escape<'#'>>(p, delims);
}
case_type case13 = {R"("x,x")", R"(x\,x)", R"(x#,x)",
R"("x\,x")", R"("x#,x")", R"("x#,x")"};
case_type case14 = {R"("#\\#")", R"(#\\#)", R"(\\##)", R"("\\##")"};
{
matches_type p{{case1, "\""},
{case2, "x\"x"},
{case3, ""},
{case13, "x,x"},
{case14, "\\#"}};
test_combinations<ss::quote<'"'>, ss::escape<'\\', '#'>>(p, {","});
}
}
TEST_CASE("testing splitter escape and trim") {
case_type case0 = spaced({R"(\ x\ )", R"(\ \x\ )"}, " ");
case_type case1 = spaced({R"(x)", R"(\x)"}, " ");
case_type case3 = spaced({R"(\\)"}, " ");
std::vector<std::string> delims = {",", "::", "\t", "\n"};
{
matches_type p{{case0, " x "}, {case1, "x"}, {case3, "\\"}};
test_combinations<ss::escape<'\\'>, ss::trim<' '>>(p, delims);
}
case_type case4 = spaced({R"(\,)"}, " ");
case_type case6 = spaced({R"(#,x)"}, " ");
case_type case7 = spaced({R"(x\,x)"}, " ");
{
matches_type p{{case1, "x"},
{case3, "\\"},
{case4, ","},
{case6, ",x"},
{case7, "x,x"}};
test_combinations<ss::escape<'\\', '#'>, ss::trim<' '>>(p, {","});
}
case_type case8 = spaced({R"(\:\:)"}, " ", "\t");
case_type case9 = spaced({R"(x\::x)"}, " ", "\t");
{
matches_type p{{case1, "x"},
{case3, "\\"},
{case8, "::"},
{case9, "x::x"}};
test_combinations<ss::escape<'\\'>, ss::trim<' ', '\t'>>(p, {"::"});
}
}
TEST_CASE("testing splitter quote and escape and trim") {
auto guard = set_combinations_size(3);
case_type case1 = spaced({R"("\"")", R"(\")", R"("""")"}, " ");
case_type case2 =
spaced({R"("x\"x")", R"(x\"x)", R"(x"x)", R"("x""x")"}, " ");
case_type case3 = spaced({R"("")", R"()"}, " ");
case_type case4 = spaced({R"("x")", R"(x)"}, " ");
case_type case5 =
spaced({R"("\"\"")", R"("""""")", R"("\"""")", R"("""\"")"}, " ");
case_type case6 = spaced({R"("\\")", R"(\\)"}, " ");
case_type case7 = spaced({R"("xxxxxxxxxx")", R"(xxxxxxxxxx)"}, " ");
std::vector<std::string> delims = {"::", "\n"};
{
matches_type p{{case1, "\""}, {case2, "x\"x"}, {case3, ""},
{case5, "\"\""}, {case6, "\\"}, {case7, "xxxxxxxxxx"}};
test_combinations<ss::quote<'"'>, ss::escape<'\\'>,
ss::trim<' '>>(p, delims);
}
case_type case8 = spaced({R"('xxxxxxxxxx')", R"(xxxxxxxxxx)"}, " ", "\t");
case_type case9 = spaced({R"('')", R"()"}, " ", "\t");
case_type case10 = spaced({R"('#\')", R"(#\)"}, " ", "\t");
case_type case11 = spaced({R"('#'')", R"(#')", R"('''')"}, " ", "\t");
case_type case12 = spaced({R"('##')", R"(##)"}, " ", "\t");
{
matches_type p{{case8, "xxxxxxxxxx"},
{case9, ""},
{case10, "\\"},
{case11, "'"},
{case12, "#"}};
test_combinations<ss::quote<'\''>, ss::escape<'#'>,
ss::trim<' ', '\t'>>(p, {","});
}
case_type case13 = spaced({R"("x,x")", R"(x\,x)", R"(x#,x)", R"("x\,x")",
R"("x#,x")", R"("x#,x")"},
" ", "\t");
case_type case14 =
spaced({R"("#\\#")", R"(#\\#)", R"(\\##)", R"("\\##")"}, " ", "\t");
{
matches_type p{{case1, "\""},
{case2, "x\"x"},
{case3, ""},
{case13, "x,x"},
{case14, "\\#"}};
test_combinations<ss::quote<'"'>, ss::escape<'\\', '#'>,
ss::trim<' ', '\t'>>(p, {","});
}
}
TEST_CASE("testing splitter constnes if quoting and escaping are disabled") {
// to compile is enough
return;
const char* const line{};
ss::splitter s1;
ss::splitter<ss::trim<' '>> s2;
s1.split(line);
s2.split(line);
}
TEST_CASE("testing error mode") {
{
// empty delimiter
ss::splitter s;
s.split(buff("just,some,strings"), "");
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
CHECK(s.error_msg().empty());
s.set_error_mode(ss::error_mode::error_string);
s.split(buff("just,some,strings"), "");
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
CHECK(!s.error_msg().empty());
}
{
// unterminated quote
ss::splitter<ss::quote<'"'>> s;
s.split(buff("\"just"));
CHECK(!s.valid());
CHECK(s.unterminated_quote());
CHECK(s.error_msg().empty());
s.set_error_mode(ss::error_mode::error_string);
s.split(buff("\"just"));
CHECK(!s.valid());
CHECK(s.unterminated_quote());
CHECK(!s.error_msg().empty());
}
}
template <typename Splitter>
auto expect_unterminated_quote(Splitter& s, const std::string& line) {
auto vec = s.split(buff(line.c_str()));
CHECK(!s.valid());
CHECK(s.unterminated_quote());
return vec;
}
namespace ss {
// Used to test resplit since it is only accessible via friend class converter
template <typename... Matchers>
class converter {
public:
ss::splitter<Matchers...> splitter;
auto resplit(char* new_line, size_t new_line_size) {
return splitter.resplit(new_line, new_line_size);
}
};
} /* ss */
TEST_CASE("testing unterminated quote") {
{
ss::converter<ss::quote<'"'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, "\"just");
CHECK(vec.size() == 1);
auto new_line = buff.append(R"(",strings)");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
std::vector<std::string> expected{"just", "strings"};
CHECK(words(vec) == expected);
}
{
ss::converter<ss::quote<'"'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, "just,some,\"random");
std::vector<std::string> expected{"just", "some", "just,some,\""};
CHECK(words(vec) == expected);
auto new_line = buff.append(R"(",strings)");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just", "some", "random", "strings"};
CHECK(words(vec) == expected);
}
{
ss::converter<ss::quote<'"'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just","some","ran"")");
std::vector<std::string> expected{"just", "some", R"("just","some",")"};
CHECK(words(vec) == expected);
auto new_line = buff.append(R"(,dom","strings")");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just", "some", "ran\",dom", "strings"};
CHECK(words(vec) == expected);
}
{
ss::converter<ss::quote<'"'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just","some","ran)");
std::vector<std::string> expected{"just", "some", R"("just","some",")"};
CHECK(words(vec) == expected);
{
auto new_line = buff.append(R"(,dom)");
vec = c.resplit(new_line, strlen(new_line));
CHECK(!s.valid());
CHECK(s.unterminated_quote());
CHECK(words(vec) == expected);
}
{
auto new_line = buff.append(R"(",strings)");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just", "some", "ran,dom", "strings"};
CHECK(words(vec) == expected);
}
}
{
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just\"some","ra)");
std::vector<std::string> expected{"just\"some"};
auto w = words(vec);
w.pop_back();
CHECK(w == expected);
{
auto new_line = buff.append(R"(n,dom",str\"ings)");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just\"some", "ran,dom", "str\"ings"};
CHECK(words(vec) == expected);
}
}
{
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just\"some","ra"")");
std::vector<std::string> expected{"just\"some"};
auto w = words(vec);
w.pop_back();
CHECK(w == expected);
{
auto new_line = buff.append(R"(n,dom",str\"ings)");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just\"some", "ra\"n,dom", "str\"ings"};
CHECK(words(vec) == expected);
}
}
{
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just\"some","ra\")");
std::vector<std::string> expected{"just\"some"};
auto w = words(vec);
w.pop_back();
CHECK(w == expected);
{
auto new_line = buff.append(R"(n,dom",str\"ings)");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just\"some", "ra\"n,dom", "str\"ings"};
CHECK(words(vec) == expected);
}
}
{
ss::converter<ss::quote<'"'>, ss::trim<' '>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"( "just" ,some, "ra )");
std::vector<std::string> expected{"just", "some"};
auto w = words(vec);
w.pop_back();
CHECK(w == expected);
{
auto new_line = buff.append(R"( n,dom" , strings )");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just", "some", "ra n,dom", "strings"};
CHECK(words(vec) == expected);
}
}
{
ss::converter<ss::quote<'"'>, ss::trim<' '>, ss::escape<'\\'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"( "ju\"st" ,some, "ra \")");
std::vector<std::string> expected{"ju\"st", "some"};
auto w = words(vec);
w.pop_back();
CHECK(w == expected);
{
auto new_line = buff.append(R"( n,dom" , strings )");
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"ju\"st", "some", "ra \" n,dom", "strings"};
CHECK(words(vec) == expected);
}
}
}
TEST_CASE("testing invalid splits") {
ss::converter<ss::quote<'"'>, ss::trim<' '>, ss::escape<'\\'>> c;
auto& s = c.splitter;
// empty delimiter
s.split(buff("some,random,strings"), "");
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
// mismatched delimiter
s.split(buff(R"(some,"random,"strings")"));
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
// unterminated quote
s.split(buff("some,random,\"strings"));
CHECK(!s.valid());
CHECK(s.unterminated_quote());
// invalid resplit
char new_line[] = "some";
auto a = c.resplit(new_line, strlen(new_line));
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
}