mirror of
https://github.com/red0124/ssp.git
synced 2025-01-23 04:55:20 +01:00
refator splitter, add resplit functionality, write some unit tests
This commit is contained in:
parent
9302a25256
commit
a5c9216824
@ -103,9 +103,6 @@ struct tied_class {
|
||||
template <typename... Ts>
|
||||
constexpr bool tied_class_v = tied_class<Ts...>::value;
|
||||
|
||||
// the error can be set inside a string, or a bool
|
||||
enum class error_mode { error_string, error_bool };
|
||||
|
||||
////////////////
|
||||
// converter
|
||||
////////////////
|
||||
@ -128,6 +125,12 @@ public:
|
||||
no_void_validator_tup_t<Ts...> convert(
|
||||
char* line, const std::string& delim = default_delimiter) {
|
||||
input_ = split(line, delim);
|
||||
/* TODO
|
||||
if (!splitter_.valid()) {
|
||||
// set error
|
||||
return {};
|
||||
}
|
||||
*/
|
||||
return convert<Ts...>(input_);
|
||||
}
|
||||
|
||||
@ -178,9 +181,13 @@ public:
|
||||
: bool_error_ == false;
|
||||
}
|
||||
|
||||
const std::string& error_msg() const { return string_error_; }
|
||||
const std::string& error_msg() const {
|
||||
return string_error_;
|
||||
}
|
||||
|
||||
void set_error_mode(error_mode mode) { error_mode_ = mode; }
|
||||
void set_error_mode(error_mode mode) {
|
||||
error_mode_ = mode;
|
||||
}
|
||||
|
||||
// 'splits' string by given delimiter, returns vector of pairs which
|
||||
// contain the beginnings and the ends of each column of the string
|
||||
|
@ -5,6 +5,9 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// TODO remove
|
||||
#include <iostream>
|
||||
|
||||
namespace ss {
|
||||
template <char... Cs>
|
||||
struct matcher {
|
||||
@ -31,8 +34,8 @@ public:
|
||||
static bool match(char c) = delete;
|
||||
};
|
||||
|
||||
template <char... Cs>
|
||||
struct quote : matcher<Cs...> {};
|
||||
template <char C>
|
||||
struct quote : matcher<C> {};
|
||||
|
||||
template <char... Cs>
|
||||
struct trim : matcher<Cs...> {};
|
||||
@ -79,24 +82,131 @@ struct setup {
|
||||
template <typename... Ts>
|
||||
struct setup<setup<Ts...>> : setup<Ts...> {};
|
||||
|
||||
enum class state { begin, reading, quoting, finished };
|
||||
using range = std::pair<const char*, const char*>;
|
||||
|
||||
using string_range = std::pair<const char*, const char*>;
|
||||
using split_input = std::vector<string_range>;
|
||||
|
||||
// the error can be set inside a string, or a bool
|
||||
enum class error_mode { error_string, error_bool };
|
||||
|
||||
template <typename... Ts>
|
||||
class splitter {
|
||||
private:
|
||||
enum class state { begin, reading, quoting, finished };
|
||||
constexpr static auto default_delimiter = ",";
|
||||
|
||||
using quote = typename setup<Ts...>::quote;
|
||||
using trim = typename setup<Ts...>::trim;
|
||||
using escape = typename setup<Ts...>::escape;
|
||||
|
||||
bool match(const char* end_i, char delim) {
|
||||
return *end_i == delim;
|
||||
constexpr static auto is_const_line = !quote::enabled && !escape::enabled;
|
||||
using line_ptr_type =
|
||||
typename ternary<is_const_line, const char*, char*>::type;
|
||||
|
||||
public:
|
||||
bool valid() const {
|
||||
return (error_mode_ == error_mode::error_string) ? string_error_.empty()
|
||||
: bool_error_ == false;
|
||||
}
|
||||
|
||||
bool unterminated_quote() {
|
||||
return unterminated_quote_;
|
||||
}
|
||||
|
||||
const std::string& error_msg() const {
|
||||
return string_error_;
|
||||
}
|
||||
|
||||
void set_error_mode(error_mode mode) {
|
||||
error_mode_ = mode;
|
||||
}
|
||||
|
||||
split_input& split(line_ptr_type new_line,
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
output_.clear();
|
||||
return resplit(new_line, -1, delimiter);
|
||||
}
|
||||
|
||||
void adjust_ranges(const char* old_line) {
|
||||
for (auto& [begin, end] : output_) {
|
||||
begin = begin - old_line + line_;
|
||||
end = end - old_line + line_;
|
||||
}
|
||||
}
|
||||
|
||||
split_input& resplit(line_ptr_type new_line, ssize_t new_size,
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
clear_error();
|
||||
line_ = new_line;
|
||||
|
||||
// resplitting, continue from last slice
|
||||
if (!output_.empty()) {
|
||||
const auto& last = std::prev(output_.end());
|
||||
const auto [old_line, old_begin] = *last;
|
||||
size_t begin = old_begin - old_line - 1;
|
||||
output_.pop_back();
|
||||
adjust_ranges(old_line);
|
||||
|
||||
// safety measure
|
||||
if (new_size != -1 && static_cast<size_t>(new_size) < begin) {
|
||||
set_error_invalid_resplit();
|
||||
return output_;
|
||||
}
|
||||
|
||||
line_ += begin;
|
||||
}
|
||||
|
||||
return split_impl_select_delim(delimiter);
|
||||
}
|
||||
|
||||
private:
|
||||
////////////////
|
||||
// error
|
||||
////////////////
|
||||
|
||||
void clear_error() {
|
||||
string_error_.clear();
|
||||
bool_error_ = false;
|
||||
unterminated_quote_ = false;
|
||||
}
|
||||
|
||||
void set_error_empty_delimiter() {
|
||||
if (error_mode_ == error_mode::error_string) {
|
||||
string_error_.clear();
|
||||
string_error_.append("empty delimiter");
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void set_error_unterminated_quote() {
|
||||
unterminated_quote_ = true;
|
||||
if (error_mode_ == error_mode::error_string) {
|
||||
string_error_.clear();
|
||||
string_error_.append("unterminated quote");
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void set_error_invalid_resplit() {
|
||||
if (error_mode_ == error_mode::error_string) {
|
||||
string_error_.clear();
|
||||
string_error_.append("invalid_resplit");
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////
|
||||
// matching
|
||||
////////////////
|
||||
|
||||
bool match(const char* const curr, char delim) {
|
||||
return *curr == delim;
|
||||
};
|
||||
|
||||
bool match(const char* end_i, const std::string& delim) {
|
||||
return strncmp(end_i, delim.c_str(), delim.size()) == 0;
|
||||
bool match(const char* const curr, const std::string& delim) {
|
||||
return strncmp(curr, delim.c_str(), delim.size()) == 0;
|
||||
};
|
||||
|
||||
size_t delimiter_size(char) {
|
||||
@ -107,7 +217,7 @@ class splitter {
|
||||
return delim.size();
|
||||
}
|
||||
|
||||
void trim_if_enabled(char*& curr) {
|
||||
void trim_if_enabled(line_ptr_type& curr) {
|
||||
if constexpr (trim::enabled) {
|
||||
while (trim::match(*curr)) {
|
||||
++curr;
|
||||
@ -115,89 +225,88 @@ class splitter {
|
||||
}
|
||||
}
|
||||
|
||||
void shift_if_escaped(char*& curr_i) {
|
||||
void shift_if_escaped(line_ptr_type& curr) {
|
||||
if constexpr (escape::enabled) {
|
||||
if (escape::match(*curr_i)) {
|
||||
*curr = end[1];
|
||||
++end;
|
||||
if (escape::match(*curr)) {
|
||||
*curr_ = end_[1];
|
||||
++end_;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void shift() {
|
||||
if constexpr (escape::enabled || quote::enabled) {
|
||||
*curr = *end;
|
||||
}
|
||||
++end;
|
||||
++curr;
|
||||
}
|
||||
|
||||
void shift(size_t n) {
|
||||
if constexpr (escape::enabled || quote::enabled) {
|
||||
memcpy(curr, end, n);
|
||||
}
|
||||
end += n;
|
||||
curr += n;
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
std::tuple<size_t, bool> match_delimiter(char* begin, const Delim& delim) {
|
||||
char* end_i = begin;
|
||||
std::tuple<size_t, bool> match_delimiter(line_ptr_type begin,
|
||||
const Delim& delim) {
|
||||
line_ptr_type end = begin;
|
||||
|
||||
trim_if_enabled(end_i);
|
||||
trim_if_enabled(end);
|
||||
|
||||
// just spacing
|
||||
if (*end_i == '\0') {
|
||||
if (*end == '\0') {
|
||||
return {0, false};
|
||||
}
|
||||
|
||||
// not a delimiter
|
||||
if (!match(end_i, delim)) {
|
||||
shift_if_escaped(end_i);
|
||||
return {1 + end_i - begin, false};
|
||||
if (!match(end, delim)) {
|
||||
shift_if_escaped(end);
|
||||
return {1 + end - begin, false};
|
||||
}
|
||||
|
||||
end_i += delimiter_size(delim);
|
||||
trim_if_enabled(end_i);
|
||||
end += delimiter_size(delim);
|
||||
trim_if_enabled(end);
|
||||
|
||||
// delimiter
|
||||
return {end_i - begin, true};
|
||||
return {end - begin, true};
|
||||
}
|
||||
|
||||
////////////////
|
||||
// matching
|
||||
////////////////
|
||||
|
||||
void shift() {
|
||||
if constexpr (!is_const_line) {
|
||||
*curr_ = *end_;
|
||||
}
|
||||
++end_;
|
||||
++curr_;
|
||||
}
|
||||
|
||||
void shift(size_t n) {
|
||||
if constexpr (!is_const_line) {
|
||||
memcpy(curr_, end_, n);
|
||||
}
|
||||
end_ += n;
|
||||
curr_ += n;
|
||||
}
|
||||
|
||||
void push_and_start_next(size_t n) {
|
||||
output_.emplace_back(begin, curr);
|
||||
begin = end + n;
|
||||
output_.emplace_back(begin_, curr_);
|
||||
begin_ = end_ + n;
|
||||
state_ = state::begin;
|
||||
}
|
||||
|
||||
public:
|
||||
bool valid() {
|
||||
return error_.empty();
|
||||
}
|
||||
|
||||
split_input& split(char* new_line, const std::string& d = ",") {
|
||||
line = new_line;
|
||||
output_.clear();
|
||||
switch (d.size()) {
|
||||
split_input& split_impl_select_delim(
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
switch (delimiter.size()) {
|
||||
case 0:
|
||||
// set error
|
||||
set_error_empty_delimiter();
|
||||
return output_;
|
||||
case 1:
|
||||
return split_impl(d[0]);
|
||||
return split_impl(delimiter[0]);
|
||||
default:
|
||||
return split_impl(d);
|
||||
return split_impl(delimiter);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
std::vector<range>& split_impl(const Delim& delim) {
|
||||
split_input& split_impl(const Delim& delim) {
|
||||
state_ = state::begin;
|
||||
begin = line;
|
||||
begin_ = line_;
|
||||
|
||||
trim_if_enabled(begin);
|
||||
trim_if_enabled(begin_);
|
||||
|
||||
while (state_ != state::finished) {
|
||||
curr = end = begin;
|
||||
curr_ = end_ = begin_;
|
||||
switch (state_) {
|
||||
case (state::begin):
|
||||
state_begin();
|
||||
@ -216,10 +325,14 @@ public:
|
||||
return output_;
|
||||
}
|
||||
|
||||
////////////////
|
||||
// states
|
||||
////////////////
|
||||
|
||||
void state_begin() {
|
||||
if constexpr (quote::enabled) {
|
||||
if (quote::match(*begin)) {
|
||||
++begin;
|
||||
if (quote::match(*begin_)) {
|
||||
++begin_;
|
||||
state_ = state::quoting;
|
||||
return;
|
||||
}
|
||||
@ -230,13 +343,13 @@ public:
|
||||
template <typename Delim>
|
||||
void state_reading(const Delim& delim) {
|
||||
while (true) {
|
||||
auto [width, valid] = match_delimiter(end, delim);
|
||||
auto [width, valid] = match_delimiter(end_, delim);
|
||||
|
||||
// not a delimiter
|
||||
if (!valid) {
|
||||
if (width == 0) {
|
||||
// eol
|
||||
output_.emplace_back(begin, curr);
|
||||
output_.emplace_back(begin_, curr_);
|
||||
state_ = state::finished;
|
||||
break;
|
||||
} else {
|
||||
@ -255,16 +368,16 @@ public:
|
||||
void state_quoting(const Delim& delim) {
|
||||
if constexpr (quote::enabled) {
|
||||
while (true) {
|
||||
if (quote::match(*end)) {
|
||||
if (quote::match(*end_)) {
|
||||
// double quote
|
||||
// eg: ...,"hel""lo,... -> hel"lo
|
||||
if (quote::match(end[1])) {
|
||||
++end;
|
||||
if (quote::match(end_[1])) {
|
||||
++end_;
|
||||
shift();
|
||||
continue;
|
||||
}
|
||||
|
||||
auto [width, valid] = match_delimiter(end + 1, delim);
|
||||
auto [width, valid] = match_delimiter(end_ + 1, delim);
|
||||
|
||||
// not a delimiter
|
||||
if (!valid) {
|
||||
@ -272,10 +385,11 @@ public:
|
||||
// eol
|
||||
// eg: ...,"hello" \0 -> hello
|
||||
// eg no trim: ...,"hello"\0 -> hello
|
||||
output_.emplace_back(begin, curr);
|
||||
output_.emplace_back(begin_, curr_);
|
||||
} else {
|
||||
// missmatched quote
|
||||
// eg: ...,"hel"lo,... -> error
|
||||
// or not
|
||||
}
|
||||
state_ = state::finished;
|
||||
break;
|
||||
@ -287,8 +401,8 @@ public:
|
||||
}
|
||||
|
||||
if constexpr (escape::enabled) {
|
||||
if (escape::match(*end)) {
|
||||
++end;
|
||||
if (escape::match(*end_)) {
|
||||
++end_;
|
||||
shift();
|
||||
continue;
|
||||
}
|
||||
@ -296,27 +410,31 @@ public:
|
||||
|
||||
// unterminated error
|
||||
// eg: ..."hell\0 -> quote not terminated
|
||||
if (*end == '\0') {
|
||||
*curr = '\0';
|
||||
if (*end_ == '\0') {
|
||||
set_error_unterminated_quote();
|
||||
output_.emplace_back(line_, begin_);
|
||||
state_ = state::finished;
|
||||
break;
|
||||
}
|
||||
shift();
|
||||
}
|
||||
} else {
|
||||
// set error impossible scenario
|
||||
state_ = state::finished;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<range> output_;
|
||||
std::string error_ = "";
|
||||
////////////////
|
||||
// members
|
||||
////////////////
|
||||
|
||||
std::vector<string_range> output_;
|
||||
std::string string_error_;
|
||||
bool bool_error_;
|
||||
bool unterminated_quote_;
|
||||
enum error_mode error_mode_ { error_mode::error_bool };
|
||||
line_ptr_type begin_;
|
||||
line_ptr_type curr_;
|
||||
line_ptr_type end_;
|
||||
line_ptr_type line_;
|
||||
state state_;
|
||||
char* curr;
|
||||
char* end;
|
||||
char* begin;
|
||||
char* line;
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
|
@ -416,7 +416,7 @@ TEST_CASE("testing splitter escape and trim") {
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter quote and escape and trim") {
|
||||
auto guard = set_combinations_size(4);
|
||||
auto guard = set_combinations_size(3);
|
||||
case_type case1 = spaced({R"("\"")", R"(\")", R"("""")"}, " ");
|
||||
case_type case2 =
|
||||
spaced({R"("x\"x")", R"(x\"x)", R"(x"x)", R"("x""x")"}, " ");
|
||||
@ -467,3 +467,97 @@ TEST_CASE("testing splitter quote and escape and trim") {
|
||||
ss::trim<' ', '\t'>>(p, {","});
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("testing splitter constnes if quoting and escaping are disabled") {
|
||||
// to compile is enough
|
||||
return;
|
||||
const char* const line{};
|
||||
ss::splitter s1;
|
||||
ss::splitter<ss::trim<' '>> s2;
|
||||
s1.split(line);
|
||||
s2.split(line);
|
||||
}
|
||||
|
||||
TEST_CASE("testing error mode") {
|
||||
|
||||
{
|
||||
// empty delimiter
|
||||
ss::splitter s;
|
||||
s.split(buff("just,some,strings"), "");
|
||||
CHECK(!s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
CHECK(s.error_msg().empty());
|
||||
|
||||
s.set_error_mode(ss::error_mode::error_string);
|
||||
s.split(buff("just,some,strings"), "");
|
||||
CHECK(!s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
CHECK(!s.error_msg().empty());
|
||||
}
|
||||
|
||||
{
|
||||
// unterminated quote
|
||||
ss::splitter<ss::quote<'"'>> s;
|
||||
s.split(buff("\"just"));
|
||||
CHECK(!s.valid());
|
||||
CHECK(s.unterminated_quote());
|
||||
CHECK(s.error_msg().empty());
|
||||
|
||||
s.set_error_mode(ss::error_mode::error_string);
|
||||
s.split(buff("\"just"));
|
||||
CHECK(!s.valid());
|
||||
CHECK(s.unterminated_quote());
|
||||
CHECK(!s.error_msg().empty());
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Splitter>
|
||||
auto expect_unterminated_quote(Splitter& s, const std::string& line) {
|
||||
auto vec = s.split(buff(line.c_str()));
|
||||
CHECK(!s.valid());
|
||||
CHECK(s.unterminated_quote());
|
||||
return vec;
|
||||
}
|
||||
|
||||
TEST_CASE("testing unterminated quote") {
|
||||
{
|
||||
ss::splitter<ss::quote<'"'>> s;
|
||||
auto vec = expect_unterminated_quote(s, "\"just");
|
||||
CHECK(vec.size() == 1);
|
||||
|
||||
char new_line[] = R"("just",strings)";
|
||||
vec = s.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
std::vector<std::string> expected{"just", "strings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
|
||||
{
|
||||
ss::splitter<ss::quote<'"'>> s;
|
||||
auto vec = expect_unterminated_quote(s, "just,some,\"random");
|
||||
std::vector<std::string> expected{"just", "some", "just,some,\""};
|
||||
CHECK(words(vec) == expected);
|
||||
|
||||
char new_line[] = R"(just,some,"random",strings)";
|
||||
vec = s.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"just", "some", "random", "strings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
|
||||
{
|
||||
ss::splitter<ss::quote<'"'>> s;
|
||||
auto vec = expect_unterminated_quote(s, R"("just","some","ran)");
|
||||
std::vector<std::string> expected{"just", "some", R"("just","some",")"};
|
||||
CHECK(words(vec) == expected);
|
||||
|
||||
char new_line[] = R"("just","some","ran,dom","strings")";
|
||||
vec = s.resplit(new_line, strlen(new_line));
|
||||
CHECK(s.valid());
|
||||
CHECK(!s.unterminated_quote());
|
||||
expected = {"just", "some", "ran,dom", "strings"};
|
||||
CHECK(words(vec) == expected);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user