diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 4806b06..d8b19b5 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -285,10 +285,10 @@ private: void undo_remove_eol(size_t& string_end) { if (crlf) { - memcpy(next_line_buffer_ + string_end, "\r\n\0", 3); + std::copy_n("\r\n\0", 3, next_line_buffer_ + string_end); string_end += 2; } else { - memcpy(next_line_buffer_ + string_end, "\n\0", 2); + std::copy_n("\n\0", 2, next_line_buffer_ + string_end); string_end += 1; } } @@ -311,7 +311,7 @@ private: first = static_cast(realloc(static_cast(first), first_size + second_size + 2)); - memcpy(first + first_size, second, second_size + 1); + std::copy_n(second, second_size + 1, first + first_size); first_size += second_size; } diff --git a/include/ss/splitter.hpp b/include/ss/splitter.hpp index ac24984..5c32367 100644 --- a/include/ss/splitter.hpp +++ b/include/ss/splitter.hpp @@ -1,6 +1,7 @@ #pragma once #include "setup.hpp" #include "type_traits.hpp" +#include #include #include #include @@ -167,8 +168,7 @@ private: void shift_if_escaped(line_ptr_type& curr) { if constexpr (escape::enabled) { if (escape::match(*curr)) { - *curr_ = end_[1]; - ++end_; + shift_and_jump_escape(); } } } @@ -199,30 +199,38 @@ private: } //////////////// - // matching + // shifting //////////////// - void shift() { - if constexpr (!is_const_line) { - *curr_ = *end_; - ++curr_; + void shift_and_set_current() { + if (escaped_ > 0) { + if constexpr (!is_const_line) { + std::copy_n(curr_ + escaped_, end_ - curr_, curr_); + } } + curr_ = end_ - escaped_; + } + + void shift_and_push() { + shift_and_set_current(); + input_.emplace_back(begin_, curr_); + } + + void shift_and_jump_escape() { + shift_and_set_current(); ++end_; + ++escaped_; } - void shift(size_t n) { - if constexpr (!is_const_line) { - memcpy(curr_, end_, n); - curr_ += n; - } - end_ += n; - } - - void push_and_start_next(size_t n) { - push_range(); + void shift_push_and_start_next(size_t n) { + shift_and_push(); begin_ = end_ + n; } + //////////////// + // split impl + //////////////// + const split_input& split_impl_select_delim( const std::string& delimiter = default_delimiter) { clear_error(); @@ -246,39 +254,32 @@ private: trim_if_enabled(begin_); - for (done_ = false; !done_; state_begin(delim)) + for (done_ = false; !done_; read(delim)) ; return input_; } //////////////// - // states + // reading //////////////// - void push_range() { - if constexpr (is_const_line) { - input_.emplace_back(begin_, end_); - } else { - input_.emplace_back(begin_, curr_); - } - } - template - void state_begin(const Delim& delim) { + void read(const Delim& delim) { + escaped_ = 0; if constexpr (quote::enabled) { if (quote::match(*begin_)) { curr_ = end_ = ++begin_; - state_quoting(delim); + read_quoted(delim); return; } } curr_ = end_ = begin_; - state_reading(delim); + read_normal(delim); } template - void state_reading(const Delim& delim) { + void read_normal(const Delim& delim) { while (true) { auto [width, valid] = match_delimiter(end_, delim); @@ -286,30 +287,30 @@ private: // not a delimiter if (width == 0) { // eol - push_range(); + shift_and_push(); done_ = true; break; } else { - shift(width); + end_ += width; continue; } } else { // found delimiter - push_and_start_next(width); + shift_push_and_start_next(width); break; } } } template - void state_quoting(const Delim& delim) { + void read_quoted(const Delim& delim) { if constexpr (quote::enabled) { while (true) { if (!quote::match(*end_)) { if constexpr (escape::enabled) { if (escape::match(*end_)) { + shift_and_jump_escape(); ++end_; - shift(); continue; } } @@ -322,7 +323,7 @@ private: done_ = true; break; } - shift(); + ++end_; continue; } @@ -330,15 +331,15 @@ private: // delimiter if (valid) { - push_and_start_next(width + 1); + shift_push_and_start_next(width + 1); break; } // double quote // eg: ...,"hel""lo",... -> hel"lo if (quote::match(end_[1])) { + shift_and_jump_escape(); ++end_; - shift(); continue; } @@ -347,7 +348,7 @@ private: // eol // eg: ...,"hello" \0 -> hello // eg no trim: ...,"hello"\0 -> hello - push_range(); + shift_and_push(); } else { // mismatched quote // eg: ...,"hel"lo,... -> error @@ -373,6 +374,7 @@ private: line_ptr_type end_; line_ptr_type line_; bool done_; + size_t escaped_{0}; public: split_input input_;