mirror of
https://github.com/red0124/ssp.git
synced 2025-02-02 16:51:12 +01:00
Merge pull request #5 from red0124/feature/quote_escape_trim
Feature/quote escape trim
This commit is contained in:
commit
5cd458e2bc
10
README.md
10
README.md
@ -48,6 +48,7 @@ Bill (Heath) Gates 65 3.3
|
||||
* Works on any type
|
||||
* Easy to use
|
||||
* No exceptions
|
||||
* Works with quotes, escapes and spacings
|
||||
* Columns and rows can be ignored
|
||||
* Works with any type of delimiter
|
||||
* Can return whole objects composed of converted values
|
||||
@ -216,6 +217,15 @@ inline bool ss::extract(const char* begin, const char* end, shape& dst) {
|
||||
```
|
||||
The shape enum will be used in an example below. The **inline** is there just to prevent multiple definition errors. The function returns **true** if the conversion was a success, and **false** otherwise. The function uses **const char*** begin and end for performance reasons.
|
||||
|
||||
## Quoting
|
||||
Not yet documented.
|
||||
|
||||
## Escaping
|
||||
Not yet documented.
|
||||
|
||||
## Spacing
|
||||
Not yet documented.
|
||||
|
||||
## Error handling
|
||||
|
||||
Detailed error messages can be accessed via the **error_msg** method, and to enable them the error mode has to be changed to **error_mode::error_string** using the **set_error_mode** method:
|
||||
|
@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include "extract.hpp"
|
||||
#include "function_traits.hpp"
|
||||
#include "restrictions.hpp"
|
||||
#include "splitter.hpp"
|
||||
#include "type_traits.hpp"
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
@ -21,7 +21,7 @@ INIT_HAS_METHOD(error)
|
||||
// eg. no_validator_tup_t<int, ss::nx<char, 'A', 'B'>> <=> std::tuple<int, char>
|
||||
// where ss::nx<char, 'A', 'B'> is a validator '(n)one e(x)cept' which
|
||||
// checks if the returned character is either 'A' or 'B', returns error if not
|
||||
// additionaly if one element is left in the pack, it will be unwraped from
|
||||
// additionally if one element is left in the pack, it will be unwrapped from
|
||||
// the tuple eg. no_void_validator_tup_t<int> <=> int instead of std::tuple<int>
|
||||
template <typename T, typename U = void>
|
||||
struct no_validator;
|
||||
@ -103,34 +103,31 @@ struct tied_class {
|
||||
template <typename... Ts>
|
||||
constexpr bool tied_class_v = tied_class<Ts...>::value;
|
||||
|
||||
// the error can be set inside a string, or a bool
|
||||
enum class error_mode { error_string, error_bool };
|
||||
|
||||
////////////////
|
||||
// converter
|
||||
////////////////
|
||||
|
||||
template <typename... Matchers>
|
||||
class converter {
|
||||
using string_range = std::pair<const char*, const char*>;
|
||||
constexpr static auto default_delimiter = ',';
|
||||
constexpr static auto default_delimiter = ",";
|
||||
using line_ptr_type = typename splitter<Matchers...>::line_ptr_type;
|
||||
|
||||
public:
|
||||
using split_input = std::vector<string_range>;
|
||||
|
||||
// parses line with given delimiter, returns a 'T' object created with
|
||||
// extracted values of type 'Ts'
|
||||
template <typename T, typename... Ts>
|
||||
T convert_object(const char* const line, const std::string& delim = "") {
|
||||
T convert_object(line_ptr_type line,
|
||||
const std::string& delim = default_delimiter) {
|
||||
return to_object<T>(convert<Ts...>(line, delim));
|
||||
}
|
||||
|
||||
// parses line with given delimiter, returns tuple of objects with
|
||||
// extracted values of type 'Ts'
|
||||
template <typename... Ts>
|
||||
no_void_validator_tup_t<Ts...> convert(const char* const line,
|
||||
const std::string& delim = "") {
|
||||
input_ = split(line, delim);
|
||||
return convert<Ts...>(input_);
|
||||
no_void_validator_tup_t<Ts...> convert(
|
||||
line_ptr_type line, const std::string& delim = default_delimiter) {
|
||||
split(line, delim);
|
||||
return convert<Ts...>(splitter_.split_input_);
|
||||
}
|
||||
|
||||
// parses already split line, returns 'T' object with extracted values
|
||||
@ -139,6 +136,12 @@ public:
|
||||
return to_object<T>(convert<Ts...>(elems));
|
||||
}
|
||||
|
||||
// same as above, but uses cached split line
|
||||
template <typename T, typename... Ts>
|
||||
T convert_object() {
|
||||
return to_object<T>(convert<Ts...>());
|
||||
}
|
||||
|
||||
// parses already split line, returns either a tuple of objects with
|
||||
// parsed values (returns raw element (no tuple) if Ts is empty), or if
|
||||
// one argument is given which is a class which has a tied
|
||||
@ -163,35 +166,53 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
// same as above, but uses cached split line
|
||||
template <typename T, typename... Ts>
|
||||
no_void_validator_tup_t<T, Ts...> convert() {
|
||||
return convert<T, Ts...>(splitter_.split_input_);
|
||||
}
|
||||
|
||||
bool valid() const {
|
||||
return (error_mode_ == error_mode::error_string) ? string_error_.empty()
|
||||
: bool_error_ == false;
|
||||
}
|
||||
|
||||
const std::string& error_msg() const { return string_error_; }
|
||||
bool unterminated_quote() const {
|
||||
return splitter_.unterminated_quote();
|
||||
}
|
||||
|
||||
void set_error_mode(error_mode mode) { error_mode_ = mode; }
|
||||
const std::string& error_msg() const {
|
||||
return string_error_;
|
||||
}
|
||||
|
||||
void set_error_mode(error_mode mode) {
|
||||
splitter_.set_error_mode(mode);
|
||||
error_mode_ = mode;
|
||||
}
|
||||
|
||||
// 'splits' string by given delimiter, returns vector of pairs which
|
||||
// contain the beginings and the ends of each column of the string
|
||||
const split_input& split(const char* const line,
|
||||
const std::string& delim = "") {
|
||||
input_.clear();
|
||||
// contain the beginnings and the ends of each column of the string
|
||||
const split_input& split(line_ptr_type line,
|
||||
const std::string& delim = default_delimiter) {
|
||||
splitter_.split_input_.clear();
|
||||
if (line[0] == '\0') {
|
||||
return input_;
|
||||
return splitter_.split_input_;
|
||||
}
|
||||
|
||||
switch (delim.size()) {
|
||||
case 0:
|
||||
return split_impl(line, ',');
|
||||
case 1:
|
||||
return split_impl(line, delim[0]);
|
||||
default:
|
||||
return split_impl(line, delim, delim.size());
|
||||
};
|
||||
return splitter_.split(line, delim);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
////////////////
|
||||
// resplit
|
||||
////////////////
|
||||
|
||||
const split_input& resplit(line_ptr_type new_line, ssize_t new_size,
|
||||
const std::string& delim = default_delimiter) {
|
||||
return splitter_.resplit(new_line, new_size, delim);
|
||||
}
|
||||
|
||||
////////////////
|
||||
// error
|
||||
////////////////
|
||||
@ -212,6 +233,15 @@ private:
|
||||
return error;
|
||||
}
|
||||
|
||||
void set_error_unterminated_quote() {
|
||||
if (error_mode_ == error_mode::error_string) {
|
||||
string_error_.clear();
|
||||
string_error_.append(splitter_.error_msg());
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void set_error_invalid_conversion(const string_range msg, size_t pos) {
|
||||
if (error_mode_ == error_mode::error_string) {
|
||||
string_error_.clear();
|
||||
@ -252,11 +282,19 @@ private:
|
||||
template <typename... Ts>
|
||||
no_void_validator_tup_t<Ts...> convert_impl(const split_input& elems) {
|
||||
clear_error();
|
||||
no_void_validator_tup_t<Ts...> ret{};
|
||||
if (sizeof...(Ts) != elems.size()) {
|
||||
set_error_number_of_colums(sizeof...(Ts), elems.size());
|
||||
|
||||
if (!splitter_.valid()) {
|
||||
set_error_unterminated_quote();
|
||||
no_void_validator_tup_t<Ts...> ret{};
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (sizeof...(Ts) != elems.size()) {
|
||||
set_error_number_of_colums(sizeof...(Ts), elems.size());
|
||||
no_void_validator_tup_t<Ts...> ret{};
|
||||
return ret;
|
||||
}
|
||||
|
||||
return extract_tuple<Ts...>(elems);
|
||||
}
|
||||
|
||||
@ -267,37 +305,6 @@ private:
|
||||
return convert_impl<Ts...>(elems);
|
||||
}
|
||||
|
||||
////////////////
|
||||
// substring
|
||||
////////////////
|
||||
|
||||
template <typename Delim>
|
||||
const split_input& split_impl(const char* const line, Delim delim,
|
||||
size_t delim_size = 1) {
|
||||
auto range = substring(line, delim);
|
||||
input_.push_back(range);
|
||||
while (range.second[0] != '\0') {
|
||||
range = substring(range.second + delim_size, delim);
|
||||
input_.push_back(range);
|
||||
}
|
||||
return input_;
|
||||
}
|
||||
|
||||
bool no_match(const char* end, char delim) const { return *end != delim; }
|
||||
|
||||
bool no_match(const char* end, const std::string& delim) const {
|
||||
return strncmp(end, delim.c_str(), delim.size()) != 0;
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
string_range substring(const char* const begin, Delim delim) const {
|
||||
const char* end;
|
||||
for (end = begin; *end != '\0' && no_match(end, delim); ++end)
|
||||
;
|
||||
|
||||
return string_range{begin, end};
|
||||
}
|
||||
|
||||
////////////////
|
||||
// conversion
|
||||
////////////////
|
||||
@ -309,6 +316,11 @@ private:
|
||||
return;
|
||||
}
|
||||
|
||||
if constexpr (std::is_same_v<T, std::string>) {
|
||||
extract(msg.first, msg.second, dst);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!extract(msg.first, msg.second, dst)) {
|
||||
set_error_invalid_conversion(msg, pos);
|
||||
return;
|
||||
@ -353,7 +365,7 @@ private:
|
||||
no_void_validator_tup_t<Ts...> extract_tuple(const split_input& elems) {
|
||||
static_assert(!all_of<std::is_void, Ts...>::value,
|
||||
"at least one parameter must be non void");
|
||||
no_void_validator_tup_t<Ts...> ret;
|
||||
no_void_validator_tup_t<Ts...> ret{};
|
||||
extract_multiple<0, 0, Ts...>(ret, elems);
|
||||
return ret;
|
||||
}
|
||||
@ -362,21 +374,13 @@ private:
|
||||
// members
|
||||
////////////////
|
||||
|
||||
std::vector<string_range> input_;
|
||||
std::string string_error_;
|
||||
bool bool_error_;
|
||||
enum error_mode error_mode_ { error_mode::error_bool };
|
||||
splitter<Matchers...> splitter_;
|
||||
|
||||
template <typename ...>
|
||||
friend class parser;
|
||||
};
|
||||
|
||||
template <>
|
||||
inline void converter::extract_one<std::string>(std::string& dst,
|
||||
const string_range msg,
|
||||
size_t) {
|
||||
if (!valid()) {
|
||||
return;
|
||||
}
|
||||
|
||||
extract(msg.first, msg.second, dst);
|
||||
}
|
||||
|
||||
} /* ss */
|
||||
|
@ -33,6 +33,7 @@ std::enable_if_t<std::is_floating_point_v<T>, T> pow10(int n) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
// TODO not working with large number of digits
|
||||
template <typename T>
|
||||
std::enable_if_t<std::is_floating_point_v<T>, std::optional<T>> to_num(
|
||||
const char* begin, const char* const end) {
|
||||
|
@ -9,13 +9,14 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// TODO rule of 5-3-1
|
||||
// TODO threads
|
||||
namespace ss {
|
||||
|
||||
struct none {};
|
||||
template <typename...>
|
||||
class composite;
|
||||
|
||||
template <typename... Matchers>
|
||||
class parser {
|
||||
struct none {};
|
||||
|
||||
public:
|
||||
parser(const std::string& file_name, const std::string& delimiter)
|
||||
: file_name_{file_name}, delim_{delimiter},
|
||||
@ -41,7 +42,7 @@ public:
|
||||
|
||||
void set_error_mode(error_mode mode) {
|
||||
error_mode_ = mode;
|
||||
converter_.set_error_mode(mode);
|
||||
reader_.set_error_mode(mode);
|
||||
}
|
||||
|
||||
const std::string& error_msg() const {
|
||||
@ -53,7 +54,7 @@ public:
|
||||
}
|
||||
|
||||
bool ignore_next() {
|
||||
return buff_.read(file_);
|
||||
return reader_.read(file_);
|
||||
}
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
@ -63,17 +64,16 @@ public:
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
no_void_validator_tup_t<T, Ts...> get_next() {
|
||||
buff_.update();
|
||||
reader_.update();
|
||||
clear_error();
|
||||
if (eof_) {
|
||||
set_error_eof_reached();
|
||||
return {};
|
||||
}
|
||||
|
||||
split_input_ = converter_.split(buff_.get(), delim_);
|
||||
auto value = converter_.convert<T, Ts...>(split_input_);
|
||||
auto value = reader_.get_converter().template convert<T, Ts...>();
|
||||
|
||||
if (!converter_.valid()) {
|
||||
if (!reader_.get_converter().valid()) {
|
||||
set_error_invalid_conversion();
|
||||
}
|
||||
|
||||
@ -162,8 +162,8 @@ public:
|
||||
no_void_validator_tup_t<U, Us...> try_same() {
|
||||
parser_.clear_error();
|
||||
auto value =
|
||||
parser_.converter_.convert<U, Us...>(parser_.split_input_);
|
||||
if (!parser_.converter_.valid()) {
|
||||
parser_.reader_.get_converter().template convert<U, Us...>();
|
||||
if (!parser_.reader_.get_converter().valid()) {
|
||||
parser_.set_error_invalid_conversion();
|
||||
}
|
||||
return value;
|
||||
@ -192,9 +192,6 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename...>
|
||||
friend class composite;
|
||||
|
||||
// tries to invoke the given function (see below), if the function
|
||||
// returns a value which can be used as a conditional, and it returns
|
||||
// false, the function sets an error, and allows the invoke of the
|
||||
@ -249,44 +246,146 @@ private:
|
||||
// line reading
|
||||
////////////////
|
||||
|
||||
class buffer {
|
||||
class reader {
|
||||
char* buffer_{nullptr};
|
||||
char* new_buffer_{nullptr};
|
||||
size_t size_{0};
|
||||
char* next_line_buffer_{nullptr};
|
||||
char* helper_buffer_{nullptr};
|
||||
|
||||
public:
|
||||
~buffer() {
|
||||
free(buffer_);
|
||||
free(new_buffer_);
|
||||
converter<Matchers...> converter_;
|
||||
converter<Matchers...> next_line_converter_;
|
||||
|
||||
size_t size_{0};
|
||||
size_t helper_size_{0};
|
||||
const std::string& delim_;
|
||||
|
||||
bool crlf;
|
||||
|
||||
bool escaped_eol(size_t size) {
|
||||
if constexpr (setup<Matchers...>::escape::enabled) {
|
||||
const char* curr;
|
||||
for (curr = next_line_buffer_ + size - 1;
|
||||
curr >= next_line_buffer_ &&
|
||||
setup<Matchers...>::escape::match(*curr);
|
||||
--curr) {
|
||||
}
|
||||
return (next_line_buffer_ - curr + size) % 2 == 0;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool read(FILE* file) {
|
||||
ssize_t size = getline(&new_buffer_, &size_, file);
|
||||
size_t string_end = size - 1;
|
||||
bool unterminated_quote() {
|
||||
if constexpr (ss::setup<Matchers...>::quote::enabled) {
|
||||
if (next_line_converter_.unterminated_quote()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (size == -1) {
|
||||
void undo_remove_eol(size_t& string_end) {
|
||||
if (crlf) {
|
||||
std::copy_n("\r\n\0", 3, next_line_buffer_ + string_end);
|
||||
string_end += 2;
|
||||
} else {
|
||||
std::copy_n("\n\0", 2, next_line_buffer_ + string_end);
|
||||
string_end += 1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remove_eol(char*& buffer, size_t size) {
|
||||
size_t new_size = size - 1;
|
||||
if (size >= 2 && buffer[size - 2] == '\r') {
|
||||
crlf = true;
|
||||
new_size--;
|
||||
} else {
|
||||
crlf = false;
|
||||
}
|
||||
|
||||
buffer[new_size] = '\0';
|
||||
return new_size;
|
||||
}
|
||||
|
||||
void realloc_concat(char*& first, size_t& first_size,
|
||||
const char* const second, size_t second_size) {
|
||||
first = static_cast<char*>(realloc(static_cast<void*>(first),
|
||||
first_size + second_size + 2));
|
||||
|
||||
std::copy_n(second, second_size + 1, first + first_size);
|
||||
first_size += second_size;
|
||||
}
|
||||
|
||||
bool append_line(FILE* file, char*& dst_buffer, size_t& dst_size) {
|
||||
undo_remove_eol(dst_size);
|
||||
|
||||
ssize_t ssize = getline(&helper_buffer_, &helper_size_, file);
|
||||
if (ssize == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (size >= 2 && new_buffer_[size - 2] == '\r') {
|
||||
string_end--;
|
||||
}
|
||||
|
||||
new_buffer_[string_end] = '\0';
|
||||
size_t size = remove_eol(helper_buffer_, ssize);
|
||||
realloc_concat(dst_buffer, dst_size, helper_buffer_, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
const char* get() const {
|
||||
public:
|
||||
reader(const std::string& delimiter) : delim_{delimiter} {
|
||||
}
|
||||
|
||||
~reader() {
|
||||
free(buffer_);
|
||||
free(next_line_buffer_);
|
||||
free(helper_buffer_);
|
||||
}
|
||||
|
||||
bool read(FILE* file) {
|
||||
ssize_t ssize = getline(&next_line_buffer_, &size_, file);
|
||||
|
||||
if (ssize == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t size = remove_eol(next_line_buffer_, ssize);
|
||||
|
||||
while (escaped_eol(size)) {
|
||||
if (!append_line(file, next_line_buffer_, size)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
next_line_converter_.split(next_line_buffer_, delim_);
|
||||
|
||||
while (unterminated_quote()) {
|
||||
if (!append_line(file, next_line_buffer_, size)) {
|
||||
return false;
|
||||
}
|
||||
next_line_converter_.resplit(next_line_buffer_, size);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void set_error_mode(error_mode mode) {
|
||||
converter_.set_error_mode(mode);
|
||||
next_line_converter_.set_error_mode(mode);
|
||||
}
|
||||
|
||||
converter<Matchers...>& get_converter() {
|
||||
return converter_;
|
||||
}
|
||||
|
||||
const char* get_buffer() const {
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
void update() {
|
||||
std::swap(buffer_, new_buffer_);
|
||||
std::swap(buffer_, next_line_buffer_);
|
||||
std::swap(converter_, next_line_converter_);
|
||||
}
|
||||
};
|
||||
|
||||
void read_line() {
|
||||
eof_ = !buff_.read(file_);
|
||||
eof_ = !reader_.read(file_);
|
||||
++line_number_;
|
||||
}
|
||||
|
||||
@ -326,9 +425,9 @@ private:
|
||||
.append(" ")
|
||||
.append(std::to_string(line_number_))
|
||||
.append(": ")
|
||||
.append(converter_.error_msg())
|
||||
.append(reader_.get_converter().error_msg())
|
||||
.append(": \"")
|
||||
.append(buff_.get())
|
||||
.append(reader_.get_buffer())
|
||||
.append("\"");
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
@ -344,10 +443,8 @@ private:
|
||||
std::string string_error_;
|
||||
bool bool_error_{false};
|
||||
error_mode error_mode_{error_mode::error_bool};
|
||||
converter converter_;
|
||||
converter::split_input split_input_;
|
||||
FILE* file_{nullptr};
|
||||
buffer buff_;
|
||||
reader reader_{delim_};
|
||||
size_t line_number_{0};
|
||||
bool eof_{false};
|
||||
};
|
||||
|
111
include/ss/setup.hpp
Normal file
111
include/ss/setup.hpp
Normal file
@ -0,0 +1,111 @@
|
||||
#pragma once
|
||||
#include "type_traits.hpp"
|
||||
#include <array>
|
||||
|
||||
namespace ss {
|
||||
|
||||
template <char... Cs>
|
||||
struct matcher {
|
||||
private:
|
||||
template <char X, char... Xs>
|
||||
static bool match_impl(char c) {
|
||||
if constexpr (sizeof...(Xs) != 0) {
|
||||
return (c == X) || match_impl<Xs...>(c);
|
||||
}
|
||||
return (c == X);
|
||||
}
|
||||
|
||||
constexpr static bool contains_string_terminator() {
|
||||
for (const auto& match : matches) {
|
||||
if (match == '\0') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
static bool match(char c) {
|
||||
return match_impl<Cs...>(c);
|
||||
}
|
||||
|
||||
constexpr static bool enabled = true;
|
||||
constexpr static std::array<char, sizeof...(Cs)> matches{Cs...};
|
||||
static_assert(contains_string_terminator(),
|
||||
"string terminator cannot be used as a match character");
|
||||
};
|
||||
|
||||
template <typename FirstMatcher, typename SecondMatcher>
|
||||
constexpr bool matches_intersect() {
|
||||
for (const auto& first_match : FirstMatcher::matches) {
|
||||
for (const auto& second_match : SecondMatcher::matches) {
|
||||
if (first_match != '\0' && first_match == second_match) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
class matcher<'\0'> {
|
||||
public:
|
||||
constexpr static bool enabled = false;
|
||||
constexpr static std::array<char, 1> matches{'\0'};
|
||||
static bool match(char c) = delete;
|
||||
};
|
||||
|
||||
template <char C>
|
||||
struct quote : matcher<C> {};
|
||||
|
||||
template <char... Cs>
|
||||
struct trim : matcher<Cs...> {};
|
||||
|
||||
template <char... Cs>
|
||||
struct escape : matcher<Cs...> {};
|
||||
|
||||
template <typename T, template <char...> class Template>
|
||||
struct is_instance_of_matcher {
|
||||
constexpr static bool value = false;
|
||||
};
|
||||
|
||||
template <char... Ts, template <char...> class Template>
|
||||
struct is_instance_of_matcher<Template<Ts...>, Template> {
|
||||
constexpr static bool value = true;
|
||||
};
|
||||
|
||||
template <template <char...> class Matcher, typename... Ts>
|
||||
struct get_matcher;
|
||||
|
||||
template <template <char...> class Matcher, typename T, typename... Ts>
|
||||
struct get_matcher<Matcher, T, Ts...> {
|
||||
using type =
|
||||
typename ternary<is_instance_of_matcher<T, Matcher>::value, T,
|
||||
typename get_matcher<Matcher, Ts...>::type>::type;
|
||||
};
|
||||
|
||||
template <template <char...> class Matcher>
|
||||
struct get_matcher<Matcher> {
|
||||
using type = Matcher<'\0'>;
|
||||
};
|
||||
|
||||
template <template <char...> class Matcher, typename... Ts>
|
||||
using get_matcher_t = typename get_matcher<Matcher, Ts...>::type;
|
||||
|
||||
template <typename... Ts>
|
||||
struct setup {
|
||||
using quote = get_matcher_t<quote, Ts...>;
|
||||
using trim = get_matcher_t<trim, Ts...>;
|
||||
using escape = get_matcher_t<escape, Ts...>;
|
||||
|
||||
#define ASSERT_MSG "cannot have the same match character in multiple matchers"
|
||||
static_assert(!matches_intersect<quote, trim>(), ASSERT_MSG);
|
||||
static_assert(!matches_intersect<trim, escape>(), ASSERT_MSG);
|
||||
static_assert(!matches_intersect<escape, quote>(), ASSERT_MSG);
|
||||
#undef ASSERT_MSG
|
||||
};
|
||||
|
||||
template <typename... Ts>
|
||||
struct setup<setup<Ts...>> : setup<Ts...> {};
|
||||
|
||||
} /* ss */
|
388
include/ss/splitter.hpp
Normal file
388
include/ss/splitter.hpp
Normal file
@ -0,0 +1,388 @@
|
||||
#pragma once
|
||||
#include "setup.hpp"
|
||||
#include "type_traits.hpp"
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ss {
|
||||
|
||||
using string_range = std::pair<const char*, const char*>;
|
||||
using split_input = std::vector<string_range>;
|
||||
|
||||
// the error can be set inside a string, or a bool
|
||||
enum class error_mode { error_string, error_bool };
|
||||
|
||||
template <typename... Ts>
|
||||
class splitter {
|
||||
private:
|
||||
constexpr static auto default_delimiter = ",";
|
||||
|
||||
using quote = typename setup<Ts...>::quote;
|
||||
using trim = typename setup<Ts...>::trim;
|
||||
using escape = typename setup<Ts...>::escape;
|
||||
|
||||
constexpr static auto is_const_line = !quote::enabled && !escape::enabled;
|
||||
|
||||
public:
|
||||
using line_ptr_type =
|
||||
typename ternary<is_const_line, const char*, char*>::type;
|
||||
|
||||
bool valid() const {
|
||||
return (error_mode_ == error_mode::error_string) ? string_error_.empty()
|
||||
: bool_error_ == false;
|
||||
}
|
||||
|
||||
bool unterminated_quote() const {
|
||||
return unterminated_quote_;
|
||||
}
|
||||
|
||||
const std::string& error_msg() const {
|
||||
return string_error_;
|
||||
}
|
||||
|
||||
void set_error_mode(error_mode mode) {
|
||||
error_mode_ = mode;
|
||||
}
|
||||
|
||||
const split_input& split(line_ptr_type new_line,
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
split_input_.clear();
|
||||
return resplit(new_line, -1, delimiter);
|
||||
}
|
||||
|
||||
void adjust_ranges(const char* old_line) {
|
||||
for (auto& [begin, end] : split_input_) {
|
||||
begin = begin - old_line + line_;
|
||||
end = end - old_line + line_;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
////////////////
|
||||
// resplit
|
||||
////////////////
|
||||
|
||||
const split_input& resplit(
|
||||
line_ptr_type new_line, ssize_t new_size,
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
line_ = new_line;
|
||||
|
||||
// resplitting, continue from last slice
|
||||
if (!split_input_.empty() && unterminated_quote()) {
|
||||
const auto& last = std::prev(split_input_.end());
|
||||
const auto [old_line, old_begin] = *last;
|
||||
size_t begin = old_begin - old_line - 1;
|
||||
split_input_.pop_back();
|
||||
adjust_ranges(old_line);
|
||||
|
||||
// safety measure
|
||||
if (new_size != -1 && static_cast<size_t>(new_size) < begin) {
|
||||
set_error_invalid_resplit();
|
||||
return split_input_;
|
||||
}
|
||||
|
||||
begin_ = line_ + begin;
|
||||
}
|
||||
|
||||
return split_impl_select_delim(delimiter);
|
||||
}
|
||||
|
||||
////////////////
|
||||
// error
|
||||
////////////////
|
||||
|
||||
void clear_error() {
|
||||
string_error_.clear();
|
||||
bool_error_ = false;
|
||||
unterminated_quote_ = false;
|
||||
}
|
||||
|
||||
void set_error_empty_delimiter() {
|
||||
if (error_mode_ == error_mode::error_string) {
|
||||
string_error_.clear();
|
||||
string_error_.append("empty delimiter");
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void set_error_mismatched_quote(size_t n) {
|
||||
if (error_mode_ == error_mode::error_string) {
|
||||
string_error_.clear();
|
||||
string_error_.append("mismatched quote at position: " +
|
||||
std::to_string(n));
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void set_error_unterminated_quote() {
|
||||
unterminated_quote_ = true;
|
||||
if (error_mode_ == error_mode::error_string) {
|
||||
string_error_.clear();
|
||||
string_error_.append("unterminated quote");
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void set_error_invalid_resplit() {
|
||||
unterminated_quote_ = false;
|
||||
if (error_mode_ == error_mode::error_string) {
|
||||
string_error_.clear();
|
||||
string_error_.append("invalid resplit, new line must be longer"
|
||||
"than the end of the last slice");
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////
|
||||
// matching
|
||||
////////////////
|
||||
|
||||
bool match(const char* const curr, char delim) {
|
||||
return *curr == delim;
|
||||
};
|
||||
|
||||
bool match(const char* const curr, const std::string& delim) {
|
||||
return strncmp(curr, delim.c_str(), delim.size()) == 0;
|
||||
};
|
||||
|
||||
size_t delimiter_size(char) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t delimiter_size(const std::string& delim) {
|
||||
return delim.size();
|
||||
}
|
||||
|
||||
void trim_if_enabled(line_ptr_type& curr) {
|
||||
if constexpr (trim::enabled) {
|
||||
while (trim::match(*curr)) {
|
||||
++curr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void shift_if_escaped(line_ptr_type& curr) {
|
||||
if constexpr (escape::enabled) {
|
||||
if (escape::match(*curr)) {
|
||||
shift_and_jump_escape();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
std::tuple<size_t, bool> match_delimiter(line_ptr_type begin,
|
||||
const Delim& delim) {
|
||||
line_ptr_type end = begin;
|
||||
|
||||
trim_if_enabled(end);
|
||||
|
||||
// just spacing
|
||||
if (*end == '\0') {
|
||||
return {0, false};
|
||||
}
|
||||
|
||||
// not a delimiter
|
||||
if (!match(end, delim)) {
|
||||
shift_if_escaped(end);
|
||||
return {1 + end - begin, false};
|
||||
}
|
||||
|
||||
end += delimiter_size(delim);
|
||||
trim_if_enabled(end);
|
||||
|
||||
// delimiter
|
||||
return {end - begin, true};
|
||||
}
|
||||
|
||||
////////////////
|
||||
// shifting
|
||||
////////////////
|
||||
|
||||
void shift_and_set_current() {
|
||||
if (escaped_ > 0) {
|
||||
if constexpr (!is_const_line) {
|
||||
std::copy_n(curr_ + escaped_, end_ - curr_, curr_);
|
||||
}
|
||||
}
|
||||
curr_ = end_ - escaped_;
|
||||
}
|
||||
|
||||
void shift_and_push() {
|
||||
shift_and_set_current();
|
||||
split_input_.emplace_back(begin_, curr_);
|
||||
}
|
||||
|
||||
void shift_and_jump_escape() {
|
||||
shift_and_set_current();
|
||||
++end_;
|
||||
++escaped_;
|
||||
}
|
||||
|
||||
void shift_push_and_start_next(size_t n) {
|
||||
shift_and_push();
|
||||
begin_ = end_ + n;
|
||||
}
|
||||
|
||||
////////////////
|
||||
// split impl
|
||||
////////////////
|
||||
|
||||
const split_input& split_impl_select_delim(
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
clear_error();
|
||||
switch (delimiter.size()) {
|
||||
case 0:
|
||||
set_error_empty_delimiter();
|
||||
return split_input_;
|
||||
case 1:
|
||||
return split_impl(delimiter[0]);
|
||||
default:
|
||||
return split_impl(delimiter);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
const split_input& split_impl(const Delim& delim) {
|
||||
|
||||
if (split_input_.empty()) {
|
||||
begin_ = line_;
|
||||
}
|
||||
|
||||
trim_if_enabled(begin_);
|
||||
|
||||
for (done_ = false; !done_; read(delim))
|
||||
;
|
||||
|
||||
return split_input_;
|
||||
}
|
||||
|
||||
////////////////
|
||||
// reading
|
||||
////////////////
|
||||
|
||||
template <typename Delim>
|
||||
void read(const Delim& delim) {
|
||||
escaped_ = 0;
|
||||
if constexpr (quote::enabled) {
|
||||
if (quote::match(*begin_)) {
|
||||
curr_ = end_ = ++begin_;
|
||||
read_quoted(delim);
|
||||
return;
|
||||
}
|
||||
}
|
||||
curr_ = end_ = begin_;
|
||||
read_normal(delim);
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
void read_normal(const Delim& delim) {
|
||||
while (true) {
|
||||
auto [width, valid] = match_delimiter(end_, delim);
|
||||
|
||||
if (!valid) {
|
||||
// not a delimiter
|
||||
if (width == 0) {
|
||||
// eol
|
||||
shift_and_push();
|
||||
done_ = true;
|
||||
break;
|
||||
} else {
|
||||
end_ += width;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// found delimiter
|
||||
shift_push_and_start_next(width);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
void read_quoted(const Delim& delim) {
|
||||
if constexpr (quote::enabled) {
|
||||
while (true) {
|
||||
if (!quote::match(*end_)) {
|
||||
if constexpr (escape::enabled) {
|
||||
if (escape::match(*end_)) {
|
||||
shift_and_jump_escape();
|
||||
++end_;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// unterminated quote error
|
||||
// eg: ..."hell\0 -> quote not terminated
|
||||
if (*end_ == '\0') {
|
||||
set_error_unterminated_quote();
|
||||
split_input_.emplace_back(line_, begin_);
|
||||
done_ = true;
|
||||
break;
|
||||
}
|
||||
++end_;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto [width, valid] = match_delimiter(end_ + 1, delim);
|
||||
|
||||
// delimiter
|
||||
if (valid) {
|
||||
shift_push_and_start_next(width + 1);
|
||||
break;
|
||||
}
|
||||
|
||||
// double quote
|
||||
// eg: ...,"hel""lo",... -> hel"lo
|
||||
if (quote::match(end_[1])) {
|
||||
shift_and_jump_escape();
|
||||
++end_;
|
||||
continue;
|
||||
}
|
||||
|
||||
// not a delimiter
|
||||
if (width == 0) {
|
||||
// eol
|
||||
// eg: ...,"hello" \0 -> hello
|
||||
// eg no trim: ...,"hello"\0 -> hello
|
||||
shift_and_push();
|
||||
} else {
|
||||
// mismatched quote
|
||||
// eg: ...,"hel"lo,... -> error
|
||||
set_error_mismatched_quote(end_ - line_);
|
||||
split_input_.emplace_back(line_, begin_);
|
||||
}
|
||||
done_ = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
////////////////
|
||||
// members
|
||||
////////////////
|
||||
|
||||
std::string string_error_;
|
||||
bool bool_error_{false};
|
||||
bool unterminated_quote_{false};
|
||||
enum error_mode error_mode_ { error_mode::error_bool };
|
||||
line_ptr_type begin_;
|
||||
line_ptr_type curr_;
|
||||
line_ptr_type end_;
|
||||
line_ptr_type line_;
|
||||
bool done_;
|
||||
size_t escaped_{0};
|
||||
split_input split_input_;
|
||||
|
||||
template <typename ...>
|
||||
friend class converter;
|
||||
};
|
||||
|
||||
} /* ss */
|
@ -314,6 +314,23 @@ struct is_instance_of<Template<Ts...>, Template> {
|
||||
constexpr static bool value = true;
|
||||
};
|
||||
|
||||
////////////////
|
||||
// ternary
|
||||
////////////////
|
||||
|
||||
template <bool B, typename T, typename U>
|
||||
struct ternary;
|
||||
|
||||
template <typename T, typename U>
|
||||
struct ternary<true, T, U> {
|
||||
using type = T;
|
||||
};
|
||||
|
||||
template <typename T, typename U>
|
||||
struct ternary<false, T, U> {
|
||||
using type = U;
|
||||
};
|
||||
|
||||
////////////////
|
||||
// tuple to struct
|
||||
////////////////
|
||||
@ -323,6 +340,7 @@ S to_object(std::index_sequence<Is...>, Tup&& tup) {
|
||||
return {std::get<Is>(std::forward<Tup>(tup))...};
|
||||
}
|
||||
|
||||
// TODO Tup may not be a tuple ...
|
||||
template <class S, class Tup>
|
||||
S to_object(Tup&& tup) {
|
||||
using T = std::remove_reference_t<Tup>;
|
||||
|
@ -2,7 +2,7 @@ project('ssp', 'cpp',
|
||||
default_options :
|
||||
['warning_level=3',
|
||||
'cpp_std=c++17',
|
||||
'buildtype=debug'])
|
||||
'buildtype=debugoptimized'])
|
||||
|
||||
includes = include_directories('include')
|
||||
subdir('test')
|
||||
|
@ -28,7 +28,7 @@ include(doctest)
|
||||
|
||||
enable_testing()
|
||||
|
||||
foreach(name IN ITEMS test_parser test_converter test_extractions)
|
||||
foreach(name IN ITEMS test_splitter test_parser test_converter test_extractions)
|
||||
add_executable("${name}" "${name}.cpp")
|
||||
target_link_libraries("${name}" PRIVATE ssp::ssp doctest::doctest)
|
||||
target_compile_definitions("${name}" PRIVATE
|
||||
|
@ -1,5 +1,6 @@
|
||||
test_sources = files([
|
||||
'test_main.cpp',
|
||||
'test_splitter.cpp',
|
||||
'test_converter.cpp',
|
||||
'test_parser.cpp',
|
||||
'test_extractions.cpp',
|
||||
|
@ -1,23 +1,18 @@
|
||||
#include "test_helpers.hpp"
|
||||
#include <algorithm>
|
||||
#include <ss/converter.hpp>
|
||||
|
||||
#ifdef CMAKE_GITHUB_CI
|
||||
#include <doctest/doctest.h>
|
||||
#else
|
||||
#include <doctest.h>
|
||||
#endif
|
||||
|
||||
TEST_CASE("testing split") {
|
||||
ss::converter c;
|
||||
|
||||
for (const auto& [s, expected, delim] :
|
||||
{std::make_tuple("a,b,c,d", std::vector{"a", "b", "c", "d"}, ","),
|
||||
{"", {}, " "},
|
||||
{"a,b,c", {"a", "b", "c"}, ""},
|
||||
{" x x x x | x ", {" x x x x ", " x "}, "|"},
|
||||
{"a::b::c::d", {"a", "b", "c", "d"}, "::"},
|
||||
{"x\t-\ty", {"x", "y"}, "\t-\t"},
|
||||
{"x", {"x"}, ","}}) {
|
||||
// clang-format off
|
||||
{std::make_tuple("a,b,c,d", std::vector{"a", "b", "c", "d"}, ","),
|
||||
{"", {}, " "},
|
||||
{" x x x x | x ", {" x x x x ", " x "}, "|"},
|
||||
{"a::b::c::d", {"a", "b", "c", "d"}, "::"},
|
||||
{"x\t-\ty", {"x", "y"}, "\t-\t"},
|
||||
{"x", {"x"}, ","}} // clang-format on
|
||||
) {
|
||||
auto split = c.split(s, delim);
|
||||
CHECK(split.size() == expected.size());
|
||||
for (size_t i = 0; i < split.size(); ++i) {
|
||||
@ -120,6 +115,9 @@ TEST_CASE("testing invalid conversions") {
|
||||
c.convert<int>("");
|
||||
REQUIRE(!c.valid());
|
||||
|
||||
c.convert<int>("10", "");
|
||||
REQUIRE(!c.valid());
|
||||
|
||||
c.convert<int, void>("");
|
||||
REQUIRE(!c.valid());
|
||||
|
||||
@ -396,3 +394,71 @@ TEST_CASE("testing error mode") {
|
||||
CHECK(!c.valid());
|
||||
CHECK(!c.error_msg().empty());
|
||||
}
|
||||
|
||||
TEST_CASE("testing converter with quotes spacing and escaping") {
|
||||
{
|
||||
ss::converter c;
|
||||
|
||||
auto tup = c.convert<std::string, std::string, std::string>(
|
||||
R"("just","some","strings")");
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("\"just\"", "\"some\"", "\"strings\""));
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>> c;
|
||||
|
||||
auto tup = c.convert<std::string, std::string, double, char>(
|
||||
buff(R"("just",some,"12.3","a")"));
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("just", "some", 12.3, 'a'));
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::trim<' '>> c;
|
||||
|
||||
auto tup = c.convert<std::string, std::string, double, char>(
|
||||
buff(R"( just , some , 12.3 ,a )"));
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("just", "some", 12.3, 'a'));
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::escape<'\\'>> c;
|
||||
|
||||
auto tup =
|
||||
c.convert<std::string, std::string>(buff(R"(ju\,st,strings)"));
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("ju,st", "strings"));
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::escape<'\\'>, ss::trim<' '>, ss::quote<'"'>> c;
|
||||
|
||||
auto tup = c.convert<std::string, std::string, double, std::string>(
|
||||
buff(R"( ju\,st , "so,me" , 12.34 , "str""ings")"));
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("ju,st", "so,me", 12.34, "str\"ings"));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing invalid split conversions") {
|
||||
ss::converter<ss::escape<'\\'>, ss::trim<' '>, ss::quote<'"'>> c;
|
||||
c.set_error_mode(ss::error_mode::error_string);
|
||||
|
||||
{
|
||||
// mismatched quote
|
||||
auto tup = c.convert<std::string, std::string, double, char>(
|
||||
buff(R"( "just , some , "12.3","a" )"));
|
||||
CHECK(!c.valid());
|
||||
CHECK(!c.unterminated_quote());
|
||||
}
|
||||
|
||||
{
|
||||
// unterminated quote
|
||||
auto tup = c.convert<std::string, std::string, double, std::string>(
|
||||
buff(R"( ju\,st , "so,me" , 12.34 , "str""ings)"));
|
||||
CHECK(!c.valid());
|
||||
CHECK(c.unterminated_quote());
|
||||
}
|
||||
}
|
||||
|
@ -1,11 +1,6 @@
|
||||
#include <ss/extract.hpp>
|
||||
#include "test_helpers.hpp"
|
||||
#include <algorithm>
|
||||
|
||||
#ifdef CMAKE_GITHUB_CI
|
||||
#include <doctest/doctest.h>
|
||||
#else
|
||||
#include <doctest.h>
|
||||
#endif
|
||||
#include <ss/extract.hpp>
|
||||
|
||||
constexpr auto eps = 0.000001;
|
||||
using ld = long double;
|
||||
|
44
test/test_helpers.hpp
Normal file
44
test/test_helpers.hpp
Normal file
@ -0,0 +1,44 @@
|
||||
#pragma once
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#ifdef CMAKE_GITHUB_CI
|
||||
#include <doctest/doctest.h>
|
||||
#else
|
||||
#include <doctest.h>
|
||||
#endif
|
||||
|
||||
class buffer {
|
||||
char* data_{nullptr};
|
||||
|
||||
public:
|
||||
char* operator()(const char* data) {
|
||||
if (data_) {
|
||||
delete[] data_;
|
||||
}
|
||||
data_ = new char[strlen(data) + 1];
|
||||
strcpy(data_, data);
|
||||
return data_;
|
||||
}
|
||||
|
||||
char* append(const char* data) {
|
||||
if (data_) {
|
||||
char* new_data_ = new char[strlen(data_) + strlen(data) + 1];
|
||||
strcpy(new_data_, data_);
|
||||
strcat(new_data_, data);
|
||||
delete[] data_;
|
||||
data_ = new_data_;
|
||||
return data_;
|
||||
} else {
|
||||
return operator()(data);
|
||||
}
|
||||
}
|
||||
|
||||
~buffer() {
|
||||
if (data_) {
|
||||
delete[] data_;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
[[maybe_unused]] inline buffer buff;
|
@ -1,20 +1,18 @@
|
||||
#include "test_helpers.hpp"
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <ss/parser.hpp>
|
||||
|
||||
#ifdef CMAKE_GITHUB_CI
|
||||
#include <doctest/doctest.h>
|
||||
#else
|
||||
#include <doctest.h>
|
||||
#endif
|
||||
|
||||
struct unique_file_name {
|
||||
const std::string name;
|
||||
|
||||
unique_file_name() : name{std::tmpnam(nullptr)} {}
|
||||
unique_file_name() : name{std::tmpnam(nullptr)} {
|
||||
}
|
||||
|
||||
~unique_file_name() { std::filesystem::remove(name); }
|
||||
~unique_file_name() {
|
||||
std::filesystem::remove(name);
|
||||
}
|
||||
};
|
||||
|
||||
struct X {
|
||||
@ -30,7 +28,9 @@ struct X {
|
||||
.append(delim)
|
||||
.append(s);
|
||||
}
|
||||
auto tied() const { return std::tie(i, d, s); }
|
||||
auto tied() const {
|
||||
return std::tie(i, d, s);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
@ -56,6 +56,7 @@ TEST_CASE("testing parser") {
|
||||
make_and_write(f.name, data);
|
||||
{
|
||||
ss::parser p{f.name, ","};
|
||||
p.set_error_mode(ss::error_mode::error_string);
|
||||
std::vector<X> i;
|
||||
|
||||
while (!p.eof()) {
|
||||
@ -166,10 +167,13 @@ struct test_struct {
|
||||
int i;
|
||||
double d;
|
||||
char c;
|
||||
auto tied() { return std::tie(i, d, c); }
|
||||
auto tied() {
|
||||
return std::tie(i, d, c);
|
||||
}
|
||||
};
|
||||
|
||||
void expect_test_struct(const test_struct&) {}
|
||||
void expect_test_struct(const test_struct&) {
|
||||
}
|
||||
|
||||
// various scenarios
|
||||
TEST_CASE("testing composite conversion") {
|
||||
@ -391,7 +395,9 @@ struct my_string {
|
||||
|
||||
my_string() = default;
|
||||
|
||||
~my_string() { delete[] data; }
|
||||
~my_string() {
|
||||
delete[] data;
|
||||
}
|
||||
|
||||
// make sure no object is copied
|
||||
my_string(const my_string&) = delete;
|
||||
@ -422,7 +428,9 @@ struct xyz {
|
||||
my_string x;
|
||||
my_string y;
|
||||
my_string z;
|
||||
auto tied() { return std::tie(x, y, z); }
|
||||
auto tied() {
|
||||
return std::tie(x, y, z);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_CASE("testing the moving of parsed values") {
|
||||
@ -474,8 +482,8 @@ TEST_CASE("testing the moving of parsed values") {
|
||||
TEST_CASE("testing the moving of parsed composite values") {
|
||||
// to compile is enough
|
||||
return;
|
||||
ss::parser* p;
|
||||
p->try_next<my_string, my_string, my_string>()
|
||||
ss::parser p{"", ""};
|
||||
p.try_next<my_string, my_string, my_string>()
|
||||
.or_else<my_string, my_string, my_string, my_string>([](auto&&) {})
|
||||
.or_else<my_string>([](auto&) {})
|
||||
.or_else<xyz>([](auto&&) {})
|
||||
@ -506,3 +514,66 @@ TEST_CASE("testing error mode") {
|
||||
CHECK(!p.valid());
|
||||
CHECK(!p.error_msg().empty());
|
||||
}
|
||||
|
||||
std::string no_quote(const std::string& s) {
|
||||
if (!s.empty() && s[0] == '"') {
|
||||
return {std::next(begin(s)), std::prev(end(s))};
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
TEST_CASE("testing csv on multiple lines with quotes") {
|
||||
unique_file_name f;
|
||||
std::vector<X> data = {{1, 2, "\"x\nx\nx\""}, {3, 4, "\"y\ny\ny\""},
|
||||
{5, 6, "\"z\nz\""}, {7, 8, "\"u\"\"\""},
|
||||
{9, 10, "v"}, {11, 12, "\"w\n\""}};
|
||||
make_and_write(f.name, data);
|
||||
for (auto& [_, __, s] : data) {
|
||||
s = no_quote(s);
|
||||
if (s[0] == 'u') {
|
||||
s = "u\"";
|
||||
}
|
||||
}
|
||||
|
||||
ss::parser<ss::quote<'"'>> p{f.name, ","};
|
||||
p.set_error_mode(ss::error_mode::error_string);
|
||||
std::vector<X> i;
|
||||
|
||||
while (!p.eof()) {
|
||||
auto a = p.get_next<int, double, std::string>();
|
||||
i.emplace_back(ss::to_object<X>(a));
|
||||
}
|
||||
|
||||
CHECK(std::equal(i.begin(), i.end(), data.begin()));
|
||||
}
|
||||
|
||||
std::string no_escape(std::string& s) {
|
||||
s.erase(std::remove(begin(s), end(s), '\\'), end(s));
|
||||
return s;
|
||||
}
|
||||
|
||||
TEST_CASE("testing csv on multiple lines with escapes") {
|
||||
unique_file_name f;
|
||||
std::vector<X> data = {{1, 2, "x\\\nx\\\nx"}, {3, 4, "y\\\ny\\\ny"},
|
||||
{5, 6, "z\\\nz"}, {7, 8, "u"},
|
||||
{9, 10, "v\\\\"}, {11, 12, "w\\\n"}};
|
||||
|
||||
make_and_write(f.name, data);
|
||||
for (auto& [_, __, s] : data) {
|
||||
s = no_escape(s);
|
||||
if (s == "v") {
|
||||
s = "v\\";
|
||||
}
|
||||
}
|
||||
|
||||
ss::parser<ss::escape<'\\'>> p{f.name, ","};
|
||||
p.set_error_mode(ss::error_mode::error_string);
|
||||
std::vector<X> i;
|
||||
|
||||
while (!p.eof()) {
|
||||
auto a = p.get_next<int, double, std::string>();
|
||||
i.emplace_back(ss::to_object<X>(a));
|
||||
}
|
||||
|
||||
CHECK(std::equal(i.begin(), i.end(), data.begin()));
|
||||
}
|
||||
|
717
test/test_splitter.cpp
Normal file
717
test/test_splitter.cpp
Normal file
@ -0,0 +1,717 @@
|
||||
#include "test_helpers.hpp"
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <ss/splitter.hpp>
|
||||
|
||||
namespace {
|
||||
constexpr static auto combinations_size_default = 4;
|
||||
size_t combinations_size = combinations_size_default;
|
||||
|
||||
struct set_combinations_size {
|
||||
set_combinations_size(size_t size) {
|
||||
combinations_size = size;
|
||||
}
|
||||
~set_combinations_size() {
|
||||
combinations_size = combinations_size_default;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<std::string> words(const ss::split_input& input) {
|
||||
std::vector<std::string> ret;
|
||||
for (const auto& [begin, end] : input) {
|
||||
ret.emplace_back(begin, end);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
[[maybe_unused]] std::string concat(const std::vector<std::string>& v) {
|
||||
std::string ret = "[";
|
||||
for (const auto& i : v) {
|
||||
ret.append(i).append(",");
|
||||
}
|
||||
ret.back() = (']');
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
size_t strings_size(const std::string& s, const Ts&... ss) {
|
||||
if constexpr (sizeof...(Ts) > 0) {
|
||||
return s.size() + strings_size(ss...);
|
||||
}
|
||||
return s.size();
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
void concat_to(std::string& dst, const std::string& s, const Ts&... ss) {
|
||||
dst.append(s);
|
||||
if constexpr (sizeof...(Ts) > 0) {
|
||||
concat_to(dst, ss...);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
std::string concat(const Ts&... ss) {
|
||||
std::string ret;
|
||||
ret.reserve(strings_size(ss...));
|
||||
concat_to(ret, ss...);
|
||||
return ret;
|
||||
}
|
||||
|
||||
using case_type = std::vector<std::string>;
|
||||
auto spaced(const case_type& input, const std::string& s) {
|
||||
case_type ret = input;
|
||||
for (const auto& i : input) {
|
||||
ret.push_back(concat(s, i, s));
|
||||
ret.push_back(concat(i, s));
|
||||
ret.push_back(concat(s, i));
|
||||
ret.push_back(concat(s, s, i));
|
||||
ret.push_back(concat(s, s, i, s, s));
|
||||
ret.push_back(concat(i, s, s));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto spaced(const case_type& input, const std::string& s1,
|
||||
const std::string& s2) {
|
||||
case_type ret = input;
|
||||
for (const auto& i : input) {
|
||||
ret.push_back(concat(s1, i, s2));
|
||||
ret.push_back(concat(s2, i, s1));
|
||||
ret.push_back(concat(s2, s2, s1, s1, i));
|
||||
ret.push_back(concat(i, s1, s2, s1, s2));
|
||||
ret.push_back(concat(s1, s1, s1, i, s2, s2, s2));
|
||||
ret.push_back(concat(s2, s2, s2, i, s1, s1, s1));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<std::string> combinations(const std::vector<std::string>& v,
|
||||
const std::string& delim, size_t n) {
|
||||
if (n <= 1) {
|
||||
return v;
|
||||
}
|
||||
std::vector<std::string> ret;
|
||||
auto inner_combinations = combinations(v, delim, n - 1);
|
||||
for (const auto& i : v) {
|
||||
for (const auto& j : inner_combinations) {
|
||||
ret.push_back(concat(i, delim, j));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::vector<std::vector<std::string>> vector_combinations(
|
||||
const std::vector<std::string>& v, size_t n) {
|
||||
std::vector<std::vector<std::string>> ret;
|
||||
if (n <= 1) {
|
||||
for (const auto& i : v) {
|
||||
ret.push_back({i});
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto inner_combinations = vector_combinations(v, n - 1);
|
||||
for (const auto& i : v) {
|
||||
for (auto j : inner_combinations) {
|
||||
j.insert(j.begin(), i);
|
||||
ret.push_back(move(j));
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::pair<std::vector<std::string>, std::vector<std::vector<std::string>>>
|
||||
make_combinations(const std::vector<std::string>& input,
|
||||
const std::vector<std::string>& output,
|
||||
const std::string& delim) {
|
||||
std::vector<std::string> lines;
|
||||
std::vector<std::vector<std::string>> expectations;
|
||||
for (size_t i = 0; i < combinations_size; ++i) {
|
||||
auto l = combinations(input, delim, i);
|
||||
lines.reserve(lines.size() + l.size());
|
||||
lines.insert(lines.end(), l.begin(), l.end());
|
||||
|
||||
auto e = vector_combinations(output, i);
|
||||
expectations.reserve(expectations.size() + e.size());
|
||||
expectations.insert(expectations.end(), e.begin(), e.end());
|
||||
}
|
||||
|
||||
return {std::move(lines), std::move(expectations)};
|
||||
}
|
||||
} /* namespace */
|
||||
|
||||
/* ********************************** */
|
||||
/* ********************************** */
|
||||
|
||||
using matches_type = std::vector<std::pair<case_type, std::string>>;
|
||||
|
||||
template <typename... Matchers>
|
||||
void test_combinations(matches_type& matches, std::vector<std::string> delims) {
|
||||
|
||||
ss::splitter<Matchers...> s;
|
||||
std::vector<std::string> inputs;
|
||||
std::vector<std::string> outputs;
|
||||
for (const auto& [cases, e] : matches) {
|
||||
for (const auto& c : cases) {
|
||||
inputs.emplace_back(c);
|
||||
outputs.emplace_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& delim : delims) {
|
||||
auto [lines, expectations] = make_combinations(inputs, outputs, delim);
|
||||
|
||||
REQUIRE(lines.size() == expectations.size());
|
||||
|
||||
for (size_t i = 0; i < lines.size(); ++i) {
|
||||
auto vec = s.split(buff(lines[i].c_str()), delim);
|
||||
CHECK(s.valid());
|
||||
CHECK(words(vec) == expectations[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter no setup") {
|
||||
{
|
||||
matches_type p{{{"x"}, "x"}, {{"\""}, "\""},
|
||||
{{""}, ""}, {{"\n"}, "\n"},
|
||||
{{"\"\""}, "\"\""}, {{"\" \\ \""}, "\" \\ \""},
|
||||
{{" "}, " "}};
|
||||
test_combinations(p, {",", ";", "\t", "::"});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter quote") {
|
||||
case_type case1 = {R"("""")"};
|
||||
case_type case2 = {R"("x""x")", R"(x"x)"};
|
||||
case_type case3 = {R"("")", R"()"};
|
||||
case_type case4 = {R"("x")", R"(x)"};
|
||||
case_type case5 = {R"("""""")"};
|
||||
case_type case6 = {R"("\")", R"(\)"};
|
||||
case_type case7 = {R"("xxxxxxxxxx")", R"(xxxxxxxxxx)"};
|
||||
|
||||
std::vector<std::string> delims = {",", "::", " ", "\t", "\n"};
|
||||
|
||||
{
|
||||
matches_type p{{case1, "\""}, {case2, "x\"x"}, {case3, ""},
|
||||
{case4, "x"}, {case5, "\"\""}, {case6, "\\"},
|
||||
{case7, "xxxxxxxxxx"}};
|
||||
test_combinations<ss::quote<'"'>>(p, delims);
|
||||
}
|
||||
|
||||
case_type case8 = {R"(",")"};
|
||||
case_type case9 = {R"("x,")"};
|
||||
case_type case10 = {R"(",x")"};
|
||||
case_type case11 = {R"("x,x")"};
|
||||
case_type case12 = {R"(",,")"};
|
||||
{
|
||||
matches_type p{{case1, "\""}, {case3, ""}, {case8, ","},
|
||||
{case9, "x,"}, {case10, ",x"}, {case11, "x,x"},
|
||||
{case12, ",,"}};
|
||||
test_combinations<ss::quote<'"'>>(p, {","});
|
||||
}
|
||||
|
||||
case_type case13 = {R"("::")"};
|
||||
case_type case14 = {R"("x::")"};
|
||||
case_type case15 = {R"("::x")"};
|
||||
case_type case16 = {R"("x::x")"};
|
||||
case_type case17 = {R"("::::")"};
|
||||
|
||||
{
|
||||
matches_type p{{case1, "\""}, {case3, ""}, {case13, "::"},
|
||||
{case14, "x::"}, {case15, "::x"}, {case16, "x::x"},
|
||||
{case17, "::::"}};
|
||||
test_combinations<ss::quote<'"'>>(p, {"::"});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter trim") {
|
||||
auto guard = set_combinations_size(3);
|
||||
case_type case1 = spaced({R"(x)"}, " ");
|
||||
case_type case2 = spaced({R"(yy)"}, " ");
|
||||
case_type case3 = spaced({R"(y y)"}, " ");
|
||||
case_type case4 = spaced({R"()"}, " ");
|
||||
|
||||
std::vector<std::string> delims = {",", "::", "\t", "\n"};
|
||||
|
||||
{
|
||||
matches_type p{{case1, "x"},
|
||||
{case2, "yy"},
|
||||
{case3, "y y"},
|
||||
{case4, ""}};
|
||||
test_combinations<ss::trim<' '>>(p, delims);
|
||||
}
|
||||
|
||||
case_type case5 = spaced({"z"}, "\t");
|
||||
case_type case6 = spaced({"ab"}, " ", "\t");
|
||||
case_type case7 = spaced({"a\tb"}, " ", "\t");
|
||||
case_type case8 = spaced({"a \t b"}, " ", "\t");
|
||||
|
||||
{
|
||||
matches_type p{{case1, "x"}, {case2, "yy"}, {case3, "y y"},
|
||||
{case4, ""}, {case5, "z"}, {case6, "ab"},
|
||||
{case7, "a\tb"}, {case8, "a \t b"}};
|
||||
test_combinations<ss::trim<' ', '\t'>>(p, {",", "::", "\n"});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter escape") {
|
||||
case_type case1 = {R"(x)", R"(\x)"};
|
||||
case_type case2 = {R"(xx)", R"(\xx)", R"(x\x)", R"(\x\x)"};
|
||||
case_type case3 = {R"(\\)"};
|
||||
|
||||
std::vector<std::string> delims = {",", "::", " ", "\t", "\n"};
|
||||
|
||||
{
|
||||
matches_type p{{case1, "x"}, {case2, "xx"}, {case3, "\\"}};
|
||||
test_combinations<ss::escape<'\\'>>(p, delims);
|
||||
}
|
||||
|
||||
case_type case4 = {R"(\,)"};
|
||||
case_type case5 = {R"(x#,)"};
|
||||
case_type case6 = {R"(#,x)"};
|
||||
case_type case7 = {R"(x\,x)"};
|
||||
|
||||
{
|
||||
matches_type p{{case1, "x"}, {case2, "xx"}, {case3, "\\"},
|
||||
{case4, ","}, {case5, "x,"}, {case6, ",x"},
|
||||
{case7, "x,x"}};
|
||||
test_combinations<ss::escape<'\\', '#'>>(p, {","});
|
||||
}
|
||||
|
||||
case_type case8 = {R"(\:\:)"};
|
||||
case_type case9 = {R"(x\::x)"};
|
||||
|
||||
{
|
||||
matches_type p{{case1, "x"},
|
||||
{case2, "xx"},
|
||||
{case3, "\\"},
|
||||
{case8, "::"},
|
||||
{case9, "x::x"}};
|
||||
test_combinations<ss::escape<'\\'>>(p, {"::"});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter quote and trim") {
|
||||
auto guard = set_combinations_size(3);
|
||||
case_type case1 = spaced({R"("""")"}, " ");
|
||||
case_type case2 = spaced({R"("x""x")", R"(x"x)"}, " ");
|
||||
case_type case3 = spaced({R"("")", R"()"}, " ");
|
||||
case_type case4 = spaced({R"("x")", R"(x)"}, " ");
|
||||
case_type case5 = spaced({R"("""""")"}, " ");
|
||||
case_type case6 = spaced({R"("\")", R"(\)"}, " ");
|
||||
case_type case7 = spaced({R"("xxxxxxxxxx")", R"(xxxxxxxxxx)"}, " ");
|
||||
|
||||
std::vector<std::string> delims = {",", "::", "\t", "\n"};
|
||||
|
||||
{
|
||||
matches_type p{{case1, "\""}, {case2, "x\"x"}, {case3, ""},
|
||||
{case4, "x"}, {case5, "\"\""}, {case6, "\\"},
|
||||
{case7, "xxxxxxxxxx"}};
|
||||
test_combinations<ss::quote<'"'>, ss::trim<' '>>(p, delims);
|
||||
}
|
||||
|
||||
case_type case8 = spaced({R"(",")"}, " ", "\t");
|
||||
case_type case9 = spaced({R"("x,")"}, " ", "\t");
|
||||
case_type case10 = spaced({R"(",x")"}, " ", "\t");
|
||||
case_type case11 = spaced({R"("x,x")"}, " ", "\t");
|
||||
case_type case12 = spaced({R"(",,")"}, " ", "\t");
|
||||
|
||||
{
|
||||
matches_type p{{case1, "\""}, {case3, ""}, {case8, ","},
|
||||
{case9, "x,"}, {case10, ",x"}, {case11, "x,x"},
|
||||
{case12, ",,"}};
|
||||
test_combinations<ss::quote<'"'>, ss::trim<' ', '\t'>>(p, {","});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter quote and escape") {
|
||||
case_type case1 = {R"("\"")", R"(\")", R"("""")"};
|
||||
case_type case2 = {R"("x\"x")", R"(x\"x)", R"(x"x)", R"("x""x")"};
|
||||
case_type case3 = {R"("")", R"()"};
|
||||
case_type case4 = {R"("x")", R"(x)"};
|
||||
case_type case5 = {R"("\"\"")", R"("""""")", R"("\"""")", R"("""\"")"};
|
||||
case_type case6 = {R"("\\")", R"(\\)"};
|
||||
case_type case7 = {R"("xxxxxxxxxx")", R"(xxxxxxxxxx)"};
|
||||
|
||||
std::vector<std::string> delims = {",", "::", " ", "\t", "\n"};
|
||||
|
||||
ss::splitter<ss::quote<'"'>, ss::escape<'\\'>> s;
|
||||
|
||||
{
|
||||
matches_type p{{case1, "\""}, {case2, "x\"x"}, {case3, ""},
|
||||
{case4, "x"}, {case5, "\"\""}, {case6, "\\"},
|
||||
{case7, "xxxxxxxxxx"}};
|
||||
test_combinations<ss::quote<'"'>, ss::escape<'\\'>>(p, delims);
|
||||
}
|
||||
|
||||
case_type case8 = {R"('xxxxxxxxxx')", R"(xxxxxxxxxx)"};
|
||||
case_type case9 = {R"('')", R"()"};
|
||||
case_type case10 = {R"('#\')", R"(#\)"};
|
||||
case_type case11 = {R"('#'')", R"(#')", R"('''')"};
|
||||
case_type case12 = {R"('##')", R"(##)"};
|
||||
{
|
||||
matches_type p{{case8, "xxxxxxxxxx"},
|
||||
{case9, ""},
|
||||
{case10, "\\"},
|
||||
{case11, "'"},
|
||||
{case12, "#"}};
|
||||
test_combinations<ss::quote<'\''>, ss::escape<'#'>>(p, delims);
|
||||
}
|
||||
|
||||
case_type case13 = {R"("x,x")", R"(x\,x)", R"(x#,x)",
|
||||
R"("x\,x")", R"("x#,x")", R"("x#,x")"};
|
||||
case_type case14 = {R"("#\\#")", R"(#\\#)", R"(\\##)", R"("\\##")"};
|
||||
|
||||
{
|
||||
matches_type p{{case1, "\""},
|
||||
{case2, "x\"x"},
|
||||
{case3, ""},
|
||||
{case13, "x,x"},
|
||||
{case14, "\\#"}};
|
||||
test_combinations<ss::quote<'"'>, ss::escape<'\\', '#'>>(p, {","});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter escape and trim") {
|
||||
case_type case0 = spaced({R"(\ x\ )", R"(\ \x\ )"}, " ");
|
||||
case_type case1 = spaced({R"(x)", R"(\x)"}, " ");
|
||||
case_type case3 = spaced({R"(\\)"}, " ");
|
||||
|
||||
std::vector<std::string> delims = {",", "::", "\t", "\n"};
|
||||
|
||||
{
|
||||
matches_type p{{case0, " x "}, {case1, "x"}, {case3, "\\"}};
|
||||
test_combinations<ss::escape<'\\'>, ss::trim<' '>>(p, delims);
|
||||
}
|
||||
|
||||
case_type case4 = spaced({R"(\,)"}, " ");
|
||||
case_type case6 = spaced({R"(#,x)"}, " ");
|
||||
case_type case7 = spaced({R"(x\,x)"}, " ");
|
||||
|
||||
{
|
||||
matches_type p{{case1, "x"},
|
||||
{case3, "\\"},
|
||||
{case4, ","},
|
||||
{case6, ",x"},
|
||||
{case7, "x,x"}};
|
||||
test_combinations<ss::escape<'\\', '#'>, ss::trim<' '>>(p, {","});
|
||||
}
|
||||
|
||||
case_type case8 = spaced({R"(\:\:)"}, " ", "\t");
|
||||
case_type case9 = spaced({R"(x\::x)"}, " ", "\t");
|
||||
|
||||
{
|
||||
matches_type p{{case1, "x"},
|
||||
{case3, "\\"},
|
||||
{case8, "::"},
|
||||
{case9, "x::x"}};
|
||||
test_combinations<ss::escape<'\\'>, ss::trim<' ', '\t'>>(p, {"::"});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter quote and escape and trim") {
|
||||
auto guard = set_combinations_size(3);
|
||||
case_type case1 = spaced({R"("\"")", R"(\")", R"("""")"}, " ");
|
||||
case_type case2 =
|
||||
spaced({R"("x\"x")", R"(x\"x)", R"(x"x)", R"("x""x")"}, " ");
|
||||
case_type case3 = spaced({R"("")", R"()"}, " ");
|
||||
case_type case4 = spaced({R"("x")", R"(x)"}, " ");
|
||||
case_type case5 =
|
||||
spaced({R"("\"\"")", R"("""""")", R"("\"""")", R"("""\"")"}, " ");
|
||||
case_type case6 = spaced({R"("\\")", R"(\\)"}, " ");
|
||||
case_type case7 = spaced({R"("xxxxxxxxxx")", R"(xxxxxxxxxx)"}, " ");
|
||||
|
||||
std::vector<std::string> delims = {"::", "\n"};
|
||||
|
||||
{
|
||||
matches_type p{{case1, "\""}, {case2, "x\"x"}, {case3, ""},
|
||||
{case5, "\"\""}, {case6, "\\"}, {case7, "xxxxxxxxxx"}};
|
||||
test_combinations<ss::quote<'"'>, ss::escape<'\\'>,
|
||||
ss::trim<' '>>(p, delims);
|
||||
}
|
||||
|
||||
case_type case8 = spaced({R"('xxxxxxxxxx')", R"(xxxxxxxxxx)"}, " ", "\t");
|
||||
case_type case9 = spaced({R"('')", R"()"}, " ", "\t");
|
||||
case_type case10 = spaced({R"('#\')", R"(#\)"}, " ", "\t");
|
||||
case_type case11 = spaced({R"('#'')", R"(#')", R"('''')"}, " ", "\t");
|
||||
case_type case12 = spaced({R"('##')", R"(##)"}, " ", "\t");
|
||||
{
|
||||
matches_type p{{case8, "xxxxxxxxxx"},
|
||||
{case9, ""},
|
||||
{case10, "\\"},
|
||||
{case11, "'"},
|
||||
{case12, "#"}};
|
||||
test_combinations<ss::quote<'\''>, ss::escape<'#'>,
|
||||
ss::trim<' ', '\t'>>(p, {","});
|
||||
}
|
||||
|
||||
case_type case13 = spaced({R"("x,x")", R"(x\,x)", R"(x#,x)", R"("x\,x")",
|
||||
R"("x#,x")", R"("x#,x")"},
|
||||
" ", "\t");
|
||||
case_type case14 =
|
||||
spaced({R"("#\\#")", R"(#\\#)", R"(\\##)", R"("\\##")"}, " ", "\t");
|
||||
|
||||
{
|
||||
matches_type p{{case1, "\""},
|
||||
{case2, "x\"x"},
|
||||
{case3, ""},
|
||||
{case13, "x,x"},
|
||||
{case14, "\\#"}};
|
||||
test_combinations<ss::quote<'"'>, ss::escape<'\\', '#'>,
|
||||
ss::trim<' ', '\t'>>(p, {","});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter constnes if quoting and escaping are disabled") {
|
||||
// to compile is enough
|
||||
return;
|
||||
const char* const line{};
|
||||
ss::splitter s1;
|
||||
ss::splitter<ss::trim<' '>> s2;
|
||||
s1.split(line);
|
||||
s2.split(line);
|
||||
}
|
||||
|
||||
TEST_CASE("testing error mode") {
|
||||
|
||||
{
|
||||
// empty delimiter
|
||||
ss::splitter s;
|
||||
s.split(buff("just,some,strings"), "");
|
||||
CHECK(!s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
CHECK(s.error_msg().empty());
|
||||
|
||||
s.set_error_mode(ss::error_mode::error_string);
|
||||
s.split(buff("just,some,strings"), "");
|
||||
CHECK(!s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
CHECK(!s.error_msg().empty());
|
||||
}
|
||||
|
||||
{
|
||||
// unterminated quote
|
||||
ss::splitter<ss::quote<'"'>> s;
|
||||
s.split(buff("\"just"));
|
||||
CHECK(!s.valid());
|
||||
CHECK(s.unterminated_quote());
|
||||
CHECK(s.error_msg().empty());
|
||||
|
||||
s.set_error_mode(ss::error_mode::error_string);
|
||||
s.split(buff("\"just"));
|
||||
CHECK(!s.valid());
|
||||
CHECK(s.unterminated_quote());
|
||||
CHECK(!s.error_msg().empty());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Splitter>
|
||||
auto expect_unterminated_quote(Splitter& s, const std::string& line) {
|
||||
auto vec = s.split(buff(line.c_str()));
|
||||
CHECK(!s.valid());
|
||||
CHECK(s.unterminated_quote());
|
||||
return vec;
|
||||
}
|
||||
|
||||
namespace ss {
|
||||
// Used to test resplit since it is only accessible via friend class converter
|
||||
template <typename... Matchers>
|
||||
class converter {
|
||||
public:
|
||||
ss::splitter<Matchers...> splitter;
|
||||
auto resplit(char* new_line, size_t new_line_size) {
|
||||
return splitter.resplit(new_line, new_line_size);
|
||||
}
|
||||
};
|
||||
} /* ss */
|
||||
|
||||
TEST_CASE("testing unterminated quote") {
|
||||
{
|
||||
ss::converter<ss::quote<'"'>> c;
|
||||
auto& s = c.splitter;
|
||||
auto vec = expect_unterminated_quote(s, "\"just");
|
||||
CHECK(vec.size() == 1);
|
||||
|
||||
auto new_line = buff.append(R"(",strings)");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
std::vector<std::string> expected{"just", "strings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>> c;
|
||||
auto& s = c.splitter;
|
||||
auto vec = expect_unterminated_quote(s, "just,some,\"random");
|
||||
std::vector<std::string> expected{"just", "some", "just,some,\""};
|
||||
CHECK(words(vec) == expected);
|
||||
|
||||
auto new_line = buff.append(R"(",strings)");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"just", "some", "random", "strings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>> c;
|
||||
auto& s = c.splitter;
|
||||
auto vec = expect_unterminated_quote(s, R"("just","some","ran"")");
|
||||
std::vector<std::string> expected{"just", "some", R"("just","some",")"};
|
||||
CHECK(words(vec) == expected);
|
||||
|
||||
auto new_line = buff.append(R"(,dom","strings")");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"just", "some", "ran\",dom", "strings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>> c;
|
||||
auto& s = c.splitter;
|
||||
auto vec = expect_unterminated_quote(s, R"("just","some","ran)");
|
||||
std::vector<std::string> expected{"just", "some", R"("just","some",")"};
|
||||
CHECK(words(vec) == expected);
|
||||
|
||||
{
|
||||
auto new_line = buff.append(R"(,dom)");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(!s.valid());
|
||||
CHECK(s.unterminated_quote());
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
|
||||
{
|
||||
auto new_line = buff.append(R"(",strings)");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"just", "some", "ran,dom", "strings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
|
||||
auto& s = c.splitter;
|
||||
auto vec = expect_unterminated_quote(s, R"("just\"some","ra)");
|
||||
std::vector<std::string> expected{"just\"some"};
|
||||
auto w = words(vec);
|
||||
w.pop_back();
|
||||
CHECK(w == expected);
|
||||
{
|
||||
auto new_line = buff.append(R"(n,dom",str\"ings)");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"just\"some", "ran,dom", "str\"ings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
|
||||
auto& s = c.splitter;
|
||||
auto vec = expect_unterminated_quote(s, R"("just\"some","ra"")");
|
||||
std::vector<std::string> expected{"just\"some"};
|
||||
auto w = words(vec);
|
||||
w.pop_back();
|
||||
CHECK(w == expected);
|
||||
{
|
||||
auto new_line = buff.append(R"(n,dom",str\"ings)");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"just\"some", "ra\"n,dom", "str\"ings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
|
||||
auto& s = c.splitter;
|
||||
auto vec = expect_unterminated_quote(s, R"("just\"some","ra\")");
|
||||
std::vector<std::string> expected{"just\"some"};
|
||||
auto w = words(vec);
|
||||
w.pop_back();
|
||||
CHECK(w == expected);
|
||||
{
|
||||
auto new_line = buff.append(R"(n,dom",str\"ings)");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"just\"some", "ra\"n,dom", "str\"ings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>, ss::trim<' '>> c;
|
||||
auto& s = c.splitter;
|
||||
auto vec = expect_unterminated_quote(s, R"( "just" ,some, "ra )");
|
||||
std::vector<std::string> expected{"just", "some"};
|
||||
auto w = words(vec);
|
||||
w.pop_back();
|
||||
CHECK(w == expected);
|
||||
{
|
||||
auto new_line = buff.append(R"( n,dom" , strings )");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"just", "some", "ra n,dom", "strings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>, ss::trim<' '>, ss::escape<'\\'>> c;
|
||||
auto& s = c.splitter;
|
||||
auto vec = expect_unterminated_quote(s, R"( "ju\"st" ,some, "ra \")");
|
||||
std::vector<std::string> expected{"ju\"st", "some"};
|
||||
auto w = words(vec);
|
||||
w.pop_back();
|
||||
CHECK(w == expected);
|
||||
{
|
||||
auto new_line = buff.append(R"( n,dom" , strings )");
|
||||
vec = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"ju\"st", "some", "ra \" n,dom", "strings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing invalid splits") {
|
||||
ss::converter<ss::quote<'"'>, ss::trim<' '>, ss::escape<'\\'>> c;
|
||||
auto& s = c.splitter;
|
||||
|
||||
// empty delimiter
|
||||
s.split(buff("some,random,strings"), "");
|
||||
CHECK(!s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
|
||||
// mismatched delimiter
|
||||
s.split(buff(R"(some,"random,"strings")"));
|
||||
CHECK(!s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
|
||||
// unterminated quote
|
||||
s.split(buff("some,random,\"strings"));
|
||||
CHECK(!s.valid());
|
||||
CHECK(s.unterminated_quote());
|
||||
|
||||
// invalid resplit
|
||||
char new_line[] = "some";
|
||||
auto a = c.resplit(new_line, strlen(new_line));
|
||||
CHECK(!s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
}
|
Loading…
Reference in New Issue
Block a user