refactor code, increase performance

This commit is contained in:
ado 2021-02-05 18:28:10 +01:00
parent 7640c038f3
commit f973f404be
3 changed files with 89 additions and 95 deletions

View File

@ -126,8 +126,8 @@ public:
template <typename... Ts> template <typename... Ts>
no_void_validator_tup_t<Ts...> convert( no_void_validator_tup_t<Ts...> convert(
line_ptr_type line, const std::string& delim = default_delimiter) { line_ptr_type line, const std::string& delim = default_delimiter) {
input_ = split(line, delim); split(line, delim);
return convert<Ts...>(input_); return convert<Ts...>(splitter_.input_);
} }
// parses already split line, returns 'T' object with extracted values // parses already split line, returns 'T' object with extracted values
@ -169,7 +169,7 @@ public:
// same as above, but uses cached split line // same as above, but uses cached split line
template <typename T, typename... Ts> template <typename T, typename... Ts>
no_void_validator_tup_t<T, Ts...> convert() { no_void_validator_tup_t<T, Ts...> convert() {
return convert<T, Ts...>(input_); return convert<T, Ts...>(splitter_.input_);
} }
bool valid() const { bool valid() const {
@ -194,19 +194,17 @@ public:
// contain the beginnings and the ends of each column of the string // contain the beginnings and the ends of each column of the string
const split_input& split(line_ptr_type line, const split_input& split(line_ptr_type line,
const std::string& delim = default_delimiter) { const std::string& delim = default_delimiter) {
input_.clear(); splitter_.input_.clear();
if (line[0] == '\0') { if (line[0] == '\0') {
return input_; return splitter_.input_;
} }
input_ = splitter_.split(line, delim); return splitter_.split(line, delim);
return input_;
} }
const split_input& resplit(line_ptr_type new_line, ssize_t new_size, const split_input& resplit(line_ptr_type new_line, ssize_t new_size,
const std::string& delim = default_delimiter) { const std::string& delim = default_delimiter) {
input_ = splitter_.resplit(new_line, new_size, delim); return splitter_.resplit(new_line, new_size, delim);
return input_;
} }
private: private:
@ -371,7 +369,6 @@ private:
// members // members
//////////////// ////////////////
std::vector<string_range> input_;
std::string string_error_; std::string string_error_;
bool bool_error_; bool bool_error_;
enum error_mode error_mode_ { error_mode::error_bool }; enum error_mode error_mode_ { error_mode::error_bool };

View File

@ -98,7 +98,7 @@ struct setup {
using trim = get_matcher_t<trim, Ts...>; using trim = get_matcher_t<trim, Ts...>;
using escape = get_matcher_t<escape, Ts...>; using escape = get_matcher_t<escape, Ts...>;
#define ASSERT_MSG "cannot have the same character in multiple matchers" #define ASSERT_MSG "cannot have the same match character in multiple matchers"
static_assert(!matches_intersect<quote, trim>(), ASSERT_MSG); static_assert(!matches_intersect<quote, trim>(), ASSERT_MSG);
static_assert(!matches_intersect<trim, escape>(), ASSERT_MSG); static_assert(!matches_intersect<trim, escape>(), ASSERT_MSG);
static_assert(!matches_intersect<escape, quote>(), ASSERT_MSG); static_assert(!matches_intersect<escape, quote>(), ASSERT_MSG);

View File

@ -3,6 +3,7 @@
#include "type_traits.hpp" #include "type_traits.hpp"
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
@ -17,7 +18,6 @@ enum class error_mode { error_string, error_bool };
template <typename... Ts> template <typename... Ts>
class splitter { class splitter {
private: private:
enum class state { begin, reading, quoting, finished };
constexpr static auto default_delimiter = ","; constexpr static auto default_delimiter = ",";
using quote = typename setup<Ts...>::quote; using quote = typename setup<Ts...>::quote;
@ -49,12 +49,12 @@ public:
const split_input& split(line_ptr_type new_line, const split_input& split(line_ptr_type new_line,
const std::string& delimiter = default_delimiter) { const std::string& delimiter = default_delimiter) {
output_.clear(); input_.clear();
return resplit(new_line, -1, delimiter); return resplit(new_line, -1, delimiter);
} }
void adjust_ranges(const char* old_line) { void adjust_ranges(const char* old_line) {
for (auto& [begin, end] : output_) { for (auto& [begin, end] : input_) {
begin = begin - old_line + line_; begin = begin - old_line + line_;
end = end - old_line + line_; end = end - old_line + line_;
} }
@ -66,17 +66,17 @@ public:
line_ = new_line; line_ = new_line;
// resplitting, continue from last slice // resplitting, continue from last slice
if (!output_.empty() && unterminated_quote()) { if (!input_.empty() && unterminated_quote()) {
const auto& last = std::prev(output_.end()); const auto& last = std::prev(input_.end());
const auto [old_line, old_begin] = *last; const auto [old_line, old_begin] = *last;
size_t begin = old_begin - old_line - 1; size_t begin = old_begin - old_line - 1;
output_.pop_back(); input_.pop_back();
adjust_ranges(old_line); adjust_ranges(old_line);
// safety measure // safety measure
if (new_size != -1 && static_cast<size_t>(new_size) < begin) { if (new_size != -1 && static_cast<size_t>(new_size) < begin) {
set_error_invalid_resplit(); set_error_invalid_resplit();
return output_; return input_;
} }
begin_ = line_ + begin; begin_ = line_ + begin;
@ -205,32 +205,31 @@ private:
void shift() { void shift() {
if constexpr (!is_const_line) { if constexpr (!is_const_line) {
*curr_ = *end_; *curr_ = *end_;
++curr_;
} }
++end_; ++end_;
++curr_;
} }
void shift(size_t n) { void shift(size_t n) {
if constexpr (!is_const_line) { if constexpr (!is_const_line) {
memcpy(curr_, end_, n); memcpy(curr_, end_, n);
curr_ += n;
} }
end_ += n; end_ += n;
curr_ += n;
} }
void push_and_start_next(size_t n) { void push_and_start_next(size_t n) {
output_.emplace_back(begin_, curr_); push_range();
begin_ = end_ + n; begin_ = end_ + n;
state_ = state::begin;
} }
split_input& split_impl_select_delim( const split_input& split_impl_select_delim(
const std::string& delimiter = default_delimiter) { const std::string& delimiter = default_delimiter) {
clear_error(); clear_error();
switch (delimiter.size()) { switch (delimiter.size()) {
case 0: case 0:
set_error_empty_delimiter(); set_error_empty_delimiter();
return output_; return input_;
case 1: case 1:
return split_impl(delimiter[0]); return split_impl(delimiter[0]);
default: default:
@ -239,48 +238,43 @@ private:
} }
template <typename Delim> template <typename Delim>
split_input& split_impl(const Delim& delim) { const split_input& split_impl(const Delim& delim) {
state_ = state::begin;
if (output_.empty()) { if (input_.empty()) {
begin_ = line_; begin_ = line_;
} }
trim_if_enabled(begin_); trim_if_enabled(begin_);
while (state_ != state::finished) { for (done_ = false; !done_; state_begin(delim))
curr_ = end_ = begin_; ;
switch (state_) {
case (state::begin):
state_begin();
break;
case (state::reading):
state_reading(delim);
break;
case (state::quoting):
state_quoting(delim);
break;
default:
break;
};
}
return output_; return input_;
} }
//////////////// ////////////////
// states // states
//////////////// ////////////////
void state_begin() { void push_range() {
if constexpr (is_const_line) {
input_.emplace_back(begin_, end_);
} else {
input_.emplace_back(begin_, curr_);
}
}
template <typename Delim>
void state_begin(const Delim& delim) {
if constexpr (quote::enabled) { if constexpr (quote::enabled) {
if (quote::match(*begin_)) { if (quote::match(*begin_)) {
++begin_; curr_ = end_ = ++begin_;
state_ = state::quoting; state_quoting(delim);
return; return;
} }
} }
state_ = state::reading; curr_ = end_ = begin_;
state_reading(delim);
} }
template <typename Delim> template <typename Delim>
@ -288,62 +282,30 @@ private:
while (true) { while (true) {
auto [width, valid] = match_delimiter(end_, delim); auto [width, valid] = match_delimiter(end_, delim);
// not a delimiter
if (!valid) { if (!valid) {
// not a delimiter
if (width == 0) { if (width == 0) {
// eol // eol
output_.emplace_back(begin_, curr_); push_range();
state_ = state::finished; done_ = true;
break; break;
} else { } else {
shift(width); shift(width);
continue; continue;
} }
} } else {
// found delimiter // found delimiter
push_and_start_next(width); push_and_start_next(width);
break; break;
} }
} }
}
template <typename Delim> template <typename Delim>
void state_quoting(const Delim& delim) { void state_quoting(const Delim& delim) {
if constexpr (quote::enabled) { if constexpr (quote::enabled) {
while (true) { while (true) {
if (quote::match(*end_)) { if (!quote::match(*end_)) {
// double quote
// eg: ...,"hel""lo,... -> hel"lo
if (quote::match(end_[1])) {
++end_;
shift();
continue;
}
auto [width, valid] = match_delimiter(end_ + 1, delim);
// not a delimiter
if (!valid) {
if (width == 0) {
// eol
// eg: ...,"hello" \0 -> hello
// eg no trim: ...,"hello"\0 -> hello
output_.emplace_back(begin_, curr_);
} else {
// mismatched quote
// eg: ...,"hel"lo,... -> error
set_error_mismatched_quote(end_ - line_);
output_.emplace_back(line_, begin_);
}
state_ = state::finished;
break;
}
// delimiter
push_and_start_next(width + 1);
break;
}
if constexpr (escape::enabled) { if constexpr (escape::enabled) {
if (escape::match(*end_)) { if (escape::match(*end_)) {
++end_; ++end_;
@ -352,15 +314,48 @@ private:
} }
} }
// unterminated error // unterminated quote error
// eg: ..."hell\0 -> quote not terminated // eg: ..."hell\0 -> quote not terminated
if (*end_ == '\0') { if (*end_ == '\0') {
set_error_unterminated_quote(); set_error_unterminated_quote();
output_.emplace_back(line_, begin_); input_.emplace_back(line_, begin_);
state_ = state::finished; done_ = true;
break; break;
} }
shift(); shift();
continue;
}
auto [width, valid] = match_delimiter(end_ + 1, delim);
// delimiter
if (valid) {
push_and_start_next(width + 1);
break;
}
// double quote
// eg: ...,"hel""lo",... -> hel"lo
if (quote::match(end_[1])) {
++end_;
shift();
continue;
}
// not a delimiter
if (width == 0) {
// eol
// eg: ...,"hello" \0 -> hello
// eg no trim: ...,"hello"\0 -> hello
push_range();
} else {
// mismatched quote
// eg: ...,"hel"lo,... -> error
set_error_mismatched_quote(end_ - line_);
input_.emplace_back(line_, begin_);
}
done_ = true;
break;
} }
} }
} }
@ -369,7 +364,6 @@ private:
// members // members
//////////////// ////////////////
std::vector<string_range> output_;
std::string string_error_; std::string string_error_;
bool bool_error_{false}; bool bool_error_{false};
bool unterminated_quote_{false}; bool unterminated_quote_{false};
@ -378,7 +372,10 @@ private:
line_ptr_type curr_; line_ptr_type curr_;
line_ptr_type end_; line_ptr_type end_;
line_ptr_type line_; line_ptr_type line_;
state state_; bool done_;
public:
split_input input_;
}; };
} /* ss */ } /* ss */