mirror of
https://github.com/red0124/ssp.git
synced 2025-02-02 16:51:12 +01:00
refactor code, increase performance
This commit is contained in:
parent
7640c038f3
commit
f973f404be
@ -126,8 +126,8 @@ public:
|
||||
template <typename... Ts>
|
||||
no_void_validator_tup_t<Ts...> convert(
|
||||
line_ptr_type line, const std::string& delim = default_delimiter) {
|
||||
input_ = split(line, delim);
|
||||
return convert<Ts...>(input_);
|
||||
split(line, delim);
|
||||
return convert<Ts...>(splitter_.input_);
|
||||
}
|
||||
|
||||
// parses already split line, returns 'T' object with extracted values
|
||||
@ -169,7 +169,7 @@ public:
|
||||
// same as above, but uses cached split line
|
||||
template <typename T, typename... Ts>
|
||||
no_void_validator_tup_t<T, Ts...> convert() {
|
||||
return convert<T, Ts...>(input_);
|
||||
return convert<T, Ts...>(splitter_.input_);
|
||||
}
|
||||
|
||||
bool valid() const {
|
||||
@ -194,19 +194,17 @@ public:
|
||||
// contain the beginnings and the ends of each column of the string
|
||||
const split_input& split(line_ptr_type line,
|
||||
const std::string& delim = default_delimiter) {
|
||||
input_.clear();
|
||||
splitter_.input_.clear();
|
||||
if (line[0] == '\0') {
|
||||
return input_;
|
||||
return splitter_.input_;
|
||||
}
|
||||
|
||||
input_ = splitter_.split(line, delim);
|
||||
return input_;
|
||||
return splitter_.split(line, delim);
|
||||
}
|
||||
|
||||
const split_input& resplit(line_ptr_type new_line, ssize_t new_size,
|
||||
const std::string& delim = default_delimiter) {
|
||||
input_ = splitter_.resplit(new_line, new_size, delim);
|
||||
return input_;
|
||||
return splitter_.resplit(new_line, new_size, delim);
|
||||
}
|
||||
|
||||
private:
|
||||
@ -371,7 +369,6 @@ private:
|
||||
// members
|
||||
////////////////
|
||||
|
||||
std::vector<string_range> input_;
|
||||
std::string string_error_;
|
||||
bool bool_error_;
|
||||
enum error_mode error_mode_ { error_mode::error_bool };
|
||||
|
@ -98,7 +98,7 @@ struct setup {
|
||||
using trim = get_matcher_t<trim, Ts...>;
|
||||
using escape = get_matcher_t<escape, Ts...>;
|
||||
|
||||
#define ASSERT_MSG "cannot have the same character in multiple matchers"
|
||||
#define ASSERT_MSG "cannot have the same match character in multiple matchers"
|
||||
static_assert(!matches_intersect<quote, trim>(), ASSERT_MSG);
|
||||
static_assert(!matches_intersect<trim, escape>(), ASSERT_MSG);
|
||||
static_assert(!matches_intersect<escape, quote>(), ASSERT_MSG);
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include "type_traits.hpp"
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -17,7 +18,6 @@ enum class error_mode { error_string, error_bool };
|
||||
template <typename... Ts>
|
||||
class splitter {
|
||||
private:
|
||||
enum class state { begin, reading, quoting, finished };
|
||||
constexpr static auto default_delimiter = ",";
|
||||
|
||||
using quote = typename setup<Ts...>::quote;
|
||||
@ -49,12 +49,12 @@ public:
|
||||
|
||||
const split_input& split(line_ptr_type new_line,
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
output_.clear();
|
||||
input_.clear();
|
||||
return resplit(new_line, -1, delimiter);
|
||||
}
|
||||
|
||||
void adjust_ranges(const char* old_line) {
|
||||
for (auto& [begin, end] : output_) {
|
||||
for (auto& [begin, end] : input_) {
|
||||
begin = begin - old_line + line_;
|
||||
end = end - old_line + line_;
|
||||
}
|
||||
@ -66,17 +66,17 @@ public:
|
||||
line_ = new_line;
|
||||
|
||||
// resplitting, continue from last slice
|
||||
if (!output_.empty() && unterminated_quote()) {
|
||||
const auto& last = std::prev(output_.end());
|
||||
if (!input_.empty() && unterminated_quote()) {
|
||||
const auto& last = std::prev(input_.end());
|
||||
const auto [old_line, old_begin] = *last;
|
||||
size_t begin = old_begin - old_line - 1;
|
||||
output_.pop_back();
|
||||
input_.pop_back();
|
||||
adjust_ranges(old_line);
|
||||
|
||||
// safety measure
|
||||
if (new_size != -1 && static_cast<size_t>(new_size) < begin) {
|
||||
set_error_invalid_resplit();
|
||||
return output_;
|
||||
return input_;
|
||||
}
|
||||
|
||||
begin_ = line_ + begin;
|
||||
@ -205,32 +205,31 @@ private:
|
||||
void shift() {
|
||||
if constexpr (!is_const_line) {
|
||||
*curr_ = *end_;
|
||||
++curr_;
|
||||
}
|
||||
++end_;
|
||||
++curr_;
|
||||
}
|
||||
|
||||
void shift(size_t n) {
|
||||
if constexpr (!is_const_line) {
|
||||
memcpy(curr_, end_, n);
|
||||
curr_ += n;
|
||||
}
|
||||
end_ += n;
|
||||
curr_ += n;
|
||||
}
|
||||
|
||||
void push_and_start_next(size_t n) {
|
||||
output_.emplace_back(begin_, curr_);
|
||||
push_range();
|
||||
begin_ = end_ + n;
|
||||
state_ = state::begin;
|
||||
}
|
||||
|
||||
split_input& split_impl_select_delim(
|
||||
const split_input& split_impl_select_delim(
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
clear_error();
|
||||
switch (delimiter.size()) {
|
||||
case 0:
|
||||
set_error_empty_delimiter();
|
||||
return output_;
|
||||
return input_;
|
||||
case 1:
|
||||
return split_impl(delimiter[0]);
|
||||
default:
|
||||
@ -239,48 +238,43 @@ private:
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
split_input& split_impl(const Delim& delim) {
|
||||
state_ = state::begin;
|
||||
const split_input& split_impl(const Delim& delim) {
|
||||
|
||||
if (output_.empty()) {
|
||||
if (input_.empty()) {
|
||||
begin_ = line_;
|
||||
}
|
||||
|
||||
trim_if_enabled(begin_);
|
||||
|
||||
while (state_ != state::finished) {
|
||||
curr_ = end_ = begin_;
|
||||
switch (state_) {
|
||||
case (state::begin):
|
||||
state_begin();
|
||||
break;
|
||||
case (state::reading):
|
||||
state_reading(delim);
|
||||
break;
|
||||
case (state::quoting):
|
||||
state_quoting(delim);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
};
|
||||
}
|
||||
for (done_ = false; !done_; state_begin(delim))
|
||||
;
|
||||
|
||||
return output_;
|
||||
return input_;
|
||||
}
|
||||
|
||||
////////////////
|
||||
// states
|
||||
////////////////
|
||||
|
||||
void state_begin() {
|
||||
void push_range() {
|
||||
if constexpr (is_const_line) {
|
||||
input_.emplace_back(begin_, end_);
|
||||
} else {
|
||||
input_.emplace_back(begin_, curr_);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
void state_begin(const Delim& delim) {
|
||||
if constexpr (quote::enabled) {
|
||||
if (quote::match(*begin_)) {
|
||||
++begin_;
|
||||
state_ = state::quoting;
|
||||
curr_ = end_ = ++begin_;
|
||||
state_quoting(delim);
|
||||
return;
|
||||
}
|
||||
}
|
||||
state_ = state::reading;
|
||||
curr_ = end_ = begin_;
|
||||
state_reading(delim);
|
||||
}
|
||||
|
||||
template <typename Delim>
|
||||
@ -288,22 +282,22 @@ private:
|
||||
while (true) {
|
||||
auto [width, valid] = match_delimiter(end_, delim);
|
||||
|
||||
// not a delimiter
|
||||
if (!valid) {
|
||||
// not a delimiter
|
||||
if (width == 0) {
|
||||
// eol
|
||||
output_.emplace_back(begin_, curr_);
|
||||
state_ = state::finished;
|
||||
push_range();
|
||||
done_ = true;
|
||||
break;
|
||||
} else {
|
||||
shift(width);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
// found delimiter
|
||||
push_and_start_next(width);
|
||||
break;
|
||||
}
|
||||
|
||||
// found delimiter
|
||||
push_and_start_next(width);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -311,56 +305,57 @@ private:
|
||||
void state_quoting(const Delim& delim) {
|
||||
if constexpr (quote::enabled) {
|
||||
while (true) {
|
||||
if (quote::match(*end_)) {
|
||||
// double quote
|
||||
// eg: ...,"hel""lo,... -> hel"lo
|
||||
if (quote::match(end_[1])) {
|
||||
++end_;
|
||||
shift();
|
||||
continue;
|
||||
if (!quote::match(*end_)) {
|
||||
if constexpr (escape::enabled) {
|
||||
if (escape::match(*end_)) {
|
||||
++end_;
|
||||
shift();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
auto [width, valid] = match_delimiter(end_ + 1, delim);
|
||||
|
||||
// not a delimiter
|
||||
if (!valid) {
|
||||
if (width == 0) {
|
||||
// eol
|
||||
// eg: ...,"hello" \0 -> hello
|
||||
// eg no trim: ...,"hello"\0 -> hello
|
||||
output_.emplace_back(begin_, curr_);
|
||||
} else {
|
||||
// mismatched quote
|
||||
// eg: ...,"hel"lo,... -> error
|
||||
set_error_mismatched_quote(end_ - line_);
|
||||
output_.emplace_back(line_, begin_);
|
||||
}
|
||||
state_ = state::finished;
|
||||
// unterminated quote error
|
||||
// eg: ..."hell\0 -> quote not terminated
|
||||
if (*end_ == '\0') {
|
||||
set_error_unterminated_quote();
|
||||
input_.emplace_back(line_, begin_);
|
||||
done_ = true;
|
||||
break;
|
||||
}
|
||||
shift();
|
||||
continue;
|
||||
}
|
||||
|
||||
// delimiter
|
||||
auto [width, valid] = match_delimiter(end_ + 1, delim);
|
||||
|
||||
// delimiter
|
||||
if (valid) {
|
||||
push_and_start_next(width + 1);
|
||||
break;
|
||||
}
|
||||
|
||||
if constexpr (escape::enabled) {
|
||||
if (escape::match(*end_)) {
|
||||
++end_;
|
||||
shift();
|
||||
continue;
|
||||
}
|
||||
// double quote
|
||||
// eg: ...,"hel""lo",... -> hel"lo
|
||||
if (quote::match(end_[1])) {
|
||||
++end_;
|
||||
shift();
|
||||
continue;
|
||||
}
|
||||
|
||||
// unterminated error
|
||||
// eg: ..."hell\0 -> quote not terminated
|
||||
if (*end_ == '\0') {
|
||||
set_error_unterminated_quote();
|
||||
output_.emplace_back(line_, begin_);
|
||||
state_ = state::finished;
|
||||
break;
|
||||
// not a delimiter
|
||||
if (width == 0) {
|
||||
// eol
|
||||
// eg: ...,"hello" \0 -> hello
|
||||
// eg no trim: ...,"hello"\0 -> hello
|
||||
push_range();
|
||||
} else {
|
||||
// mismatched quote
|
||||
// eg: ...,"hel"lo,... -> error
|
||||
set_error_mismatched_quote(end_ - line_);
|
||||
input_.emplace_back(line_, begin_);
|
||||
}
|
||||
shift();
|
||||
done_ = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -369,7 +364,6 @@ private:
|
||||
// members
|
||||
////////////////
|
||||
|
||||
std::vector<string_range> output_;
|
||||
std::string string_error_;
|
||||
bool bool_error_{false};
|
||||
bool unterminated_quote_{false};
|
||||
@ -378,7 +372,10 @@ private:
|
||||
line_ptr_type curr_;
|
||||
line_ptr_type end_;
|
||||
line_ptr_type line_;
|
||||
state state_;
|
||||
bool done_;
|
||||
|
||||
public:
|
||||
split_input input_;
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
|
Loading…
Reference in New Issue
Block a user