diff --git a/include/ss/converter.hpp b/include/ss/converter.hpp index fff5fbd..e5b3de3 100644 --- a/include/ss/converter.hpp +++ b/include/ss/converter.hpp @@ -126,8 +126,8 @@ public: template no_void_validator_tup_t convert( line_ptr_type line, const std::string& delim = default_delimiter) { - input_ = split(line, delim); - return convert(input_); + split(line, delim); + return convert(splitter_.input_); } // parses already split line, returns 'T' object with extracted values @@ -169,7 +169,7 @@ public: // same as above, but uses cached split line template no_void_validator_tup_t convert() { - return convert(input_); + return convert(splitter_.input_); } bool valid() const { @@ -194,19 +194,17 @@ public: // contain the beginnings and the ends of each column of the string const split_input& split(line_ptr_type line, const std::string& delim = default_delimiter) { - input_.clear(); + splitter_.input_.clear(); if (line[0] == '\0') { - return input_; + return splitter_.input_; } - input_ = splitter_.split(line, delim); - return input_; + return splitter_.split(line, delim); } const split_input& resplit(line_ptr_type new_line, ssize_t new_size, const std::string& delim = default_delimiter) { - input_ = splitter_.resplit(new_line, new_size, delim); - return input_; + return splitter_.resplit(new_line, new_size, delim); } private: @@ -371,7 +369,6 @@ private: // members //////////////// - std::vector input_; std::string string_error_; bool bool_error_; enum error_mode error_mode_ { error_mode::error_bool }; diff --git a/include/ss/setup.hpp b/include/ss/setup.hpp index 4474d96..9d1c02b 100644 --- a/include/ss/setup.hpp +++ b/include/ss/setup.hpp @@ -98,7 +98,7 @@ struct setup { using trim = get_matcher_t; using escape = get_matcher_t; -#define ASSERT_MSG "cannot have the same character in multiple matchers" +#define ASSERT_MSG "cannot have the same match character in multiple matchers" static_assert(!matches_intersect(), ASSERT_MSG); static_assert(!matches_intersect(), ASSERT_MSG); static_assert(!matches_intersect(), ASSERT_MSG); diff --git a/include/ss/splitter.hpp b/include/ss/splitter.hpp index aa35109..ac24984 100644 --- a/include/ss/splitter.hpp +++ b/include/ss/splitter.hpp @@ -3,6 +3,7 @@ #include "type_traits.hpp" #include #include +#include #include #include @@ -17,7 +18,6 @@ enum class error_mode { error_string, error_bool }; template class splitter { private: - enum class state { begin, reading, quoting, finished }; constexpr static auto default_delimiter = ","; using quote = typename setup::quote; @@ -49,12 +49,12 @@ public: const split_input& split(line_ptr_type new_line, const std::string& delimiter = default_delimiter) { - output_.clear(); + input_.clear(); return resplit(new_line, -1, delimiter); } void adjust_ranges(const char* old_line) { - for (auto& [begin, end] : output_) { + for (auto& [begin, end] : input_) { begin = begin - old_line + line_; end = end - old_line + line_; } @@ -66,17 +66,17 @@ public: line_ = new_line; // resplitting, continue from last slice - if (!output_.empty() && unterminated_quote()) { - const auto& last = std::prev(output_.end()); + if (!input_.empty() && unterminated_quote()) { + const auto& last = std::prev(input_.end()); const auto [old_line, old_begin] = *last; size_t begin = old_begin - old_line - 1; - output_.pop_back(); + input_.pop_back(); adjust_ranges(old_line); // safety measure if (new_size != -1 && static_cast(new_size) < begin) { set_error_invalid_resplit(); - return output_; + return input_; } begin_ = line_ + begin; @@ -205,32 +205,31 @@ private: void shift() { if constexpr (!is_const_line) { *curr_ = *end_; + ++curr_; } ++end_; - ++curr_; } void shift(size_t n) { if constexpr (!is_const_line) { memcpy(curr_, end_, n); + curr_ += n; } end_ += n; - curr_ += n; } void push_and_start_next(size_t n) { - output_.emplace_back(begin_, curr_); + push_range(); begin_ = end_ + n; - state_ = state::begin; } - split_input& split_impl_select_delim( + const split_input& split_impl_select_delim( const std::string& delimiter = default_delimiter) { clear_error(); switch (delimiter.size()) { case 0: set_error_empty_delimiter(); - return output_; + return input_; case 1: return split_impl(delimiter[0]); default: @@ -239,48 +238,43 @@ private: } template - split_input& split_impl(const Delim& delim) { - state_ = state::begin; + const split_input& split_impl(const Delim& delim) { - if (output_.empty()) { + if (input_.empty()) { begin_ = line_; } trim_if_enabled(begin_); - while (state_ != state::finished) { - curr_ = end_ = begin_; - switch (state_) { - case (state::begin): - state_begin(); - break; - case (state::reading): - state_reading(delim); - break; - case (state::quoting): - state_quoting(delim); - break; - default: - break; - }; - } + for (done_ = false; !done_; state_begin(delim)) + ; - return output_; + return input_; } //////////////// // states //////////////// - void state_begin() { + void push_range() { + if constexpr (is_const_line) { + input_.emplace_back(begin_, end_); + } else { + input_.emplace_back(begin_, curr_); + } + } + + template + void state_begin(const Delim& delim) { if constexpr (quote::enabled) { if (quote::match(*begin_)) { - ++begin_; - state_ = state::quoting; + curr_ = end_ = ++begin_; + state_quoting(delim); return; } } - state_ = state::reading; + curr_ = end_ = begin_; + state_reading(delim); } template @@ -288,22 +282,22 @@ private: while (true) { auto [width, valid] = match_delimiter(end_, delim); - // not a delimiter if (!valid) { + // not a delimiter if (width == 0) { // eol - output_.emplace_back(begin_, curr_); - state_ = state::finished; + push_range(); + done_ = true; break; } else { shift(width); continue; } + } else { + // found delimiter + push_and_start_next(width); + break; } - - // found delimiter - push_and_start_next(width); - break; } } @@ -311,56 +305,57 @@ private: void state_quoting(const Delim& delim) { if constexpr (quote::enabled) { while (true) { - if (quote::match(*end_)) { - // double quote - // eg: ...,"hel""lo,... -> hel"lo - if (quote::match(end_[1])) { - ++end_; - shift(); - continue; + if (!quote::match(*end_)) { + if constexpr (escape::enabled) { + if (escape::match(*end_)) { + ++end_; + shift(); + continue; + } } - auto [width, valid] = match_delimiter(end_ + 1, delim); - - // not a delimiter - if (!valid) { - if (width == 0) { - // eol - // eg: ...,"hello" \0 -> hello - // eg no trim: ...,"hello"\0 -> hello - output_.emplace_back(begin_, curr_); - } else { - // mismatched quote - // eg: ...,"hel"lo,... -> error - set_error_mismatched_quote(end_ - line_); - output_.emplace_back(line_, begin_); - } - state_ = state::finished; + // unterminated quote error + // eg: ..."hell\0 -> quote not terminated + if (*end_ == '\0') { + set_error_unterminated_quote(); + input_.emplace_back(line_, begin_); + done_ = true; break; } + shift(); + continue; + } - // delimiter + auto [width, valid] = match_delimiter(end_ + 1, delim); + + // delimiter + if (valid) { push_and_start_next(width + 1); break; } - if constexpr (escape::enabled) { - if (escape::match(*end_)) { - ++end_; - shift(); - continue; - } + // double quote + // eg: ...,"hel""lo",... -> hel"lo + if (quote::match(end_[1])) { + ++end_; + shift(); + continue; } - // unterminated error - // eg: ..."hell\0 -> quote not terminated - if (*end_ == '\0') { - set_error_unterminated_quote(); - output_.emplace_back(line_, begin_); - state_ = state::finished; - break; + // not a delimiter + if (width == 0) { + // eol + // eg: ...,"hello" \0 -> hello + // eg no trim: ...,"hello"\0 -> hello + push_range(); + } else { + // mismatched quote + // eg: ...,"hel"lo,... -> error + set_error_mismatched_quote(end_ - line_); + input_.emplace_back(line_, begin_); } - shift(); + done_ = true; + break; } } } @@ -369,7 +364,6 @@ private: // members //////////////// - std::vector output_; std::string string_error_; bool bool_error_{false}; bool unterminated_quote_{false}; @@ -378,7 +372,10 @@ private: line_ptr_type curr_; line_ptr_type end_; line_ptr_type line_; - state state_; + bool done_; + +public: + split_input input_; }; } /* ss */