refactore a bit more, increase escape shifting performance

This commit is contained in:
ado 2021-02-06 00:55:05 +01:00
parent f973f404be
commit 42629c39c4
2 changed files with 45 additions and 43 deletions

View File

@ -285,10 +285,10 @@ private:
void undo_remove_eol(size_t& string_end) { void undo_remove_eol(size_t& string_end) {
if (crlf) { if (crlf) {
memcpy(next_line_buffer_ + string_end, "\r\n\0", 3); std::copy_n("\r\n\0", 3, next_line_buffer_ + string_end);
string_end += 2; string_end += 2;
} else { } else {
memcpy(next_line_buffer_ + string_end, "\n\0", 2); std::copy_n("\n\0", 2, next_line_buffer_ + string_end);
string_end += 1; string_end += 1;
} }
} }
@ -311,7 +311,7 @@ private:
first = static_cast<char*>(realloc(static_cast<void*>(first), first = static_cast<char*>(realloc(static_cast<void*>(first),
first_size + second_size + 2)); first_size + second_size + 2));
memcpy(first + first_size, second, second_size + 1); std::copy_n(second, second_size + 1, first + first_size);
first_size += second_size; first_size += second_size;
} }

View File

@ -1,6 +1,7 @@
#pragma once #pragma once
#include "setup.hpp" #include "setup.hpp"
#include "type_traits.hpp" #include "type_traits.hpp"
#include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <memory> #include <memory>
@ -167,8 +168,7 @@ private:
void shift_if_escaped(line_ptr_type& curr) { void shift_if_escaped(line_ptr_type& curr) {
if constexpr (escape::enabled) { if constexpr (escape::enabled) {
if (escape::match(*curr)) { if (escape::match(*curr)) {
*curr_ = end_[1]; shift_and_jump_escape();
++end_;
} }
} }
} }
@ -199,30 +199,38 @@ private:
} }
//////////////// ////////////////
// matching // shifting
//////////////// ////////////////
void shift() { void shift_and_set_current() {
if constexpr (!is_const_line) { if (escaped_ > 0) {
*curr_ = *end_; if constexpr (!is_const_line) {
++curr_; std::copy_n(curr_ + escaped_, end_ - curr_, curr_);
}
} }
curr_ = end_ - escaped_;
}
void shift_and_push() {
shift_and_set_current();
input_.emplace_back(begin_, curr_);
}
void shift_and_jump_escape() {
shift_and_set_current();
++end_; ++end_;
++escaped_;
} }
void shift(size_t n) { void shift_push_and_start_next(size_t n) {
if constexpr (!is_const_line) { shift_and_push();
memcpy(curr_, end_, n);
curr_ += n;
}
end_ += n;
}
void push_and_start_next(size_t n) {
push_range();
begin_ = end_ + n; begin_ = end_ + n;
} }
////////////////
// split impl
////////////////
const split_input& split_impl_select_delim( const split_input& split_impl_select_delim(
const std::string& delimiter = default_delimiter) { const std::string& delimiter = default_delimiter) {
clear_error(); clear_error();
@ -246,39 +254,32 @@ private:
trim_if_enabled(begin_); trim_if_enabled(begin_);
for (done_ = false; !done_; state_begin(delim)) for (done_ = false; !done_; read(delim))
; ;
return input_; return input_;
} }
//////////////// ////////////////
// states // reading
//////////////// ////////////////
void push_range() {
if constexpr (is_const_line) {
input_.emplace_back(begin_, end_);
} else {
input_.emplace_back(begin_, curr_);
}
}
template <typename Delim> template <typename Delim>
void state_begin(const Delim& delim) { void read(const Delim& delim) {
escaped_ = 0;
if constexpr (quote::enabled) { if constexpr (quote::enabled) {
if (quote::match(*begin_)) { if (quote::match(*begin_)) {
curr_ = end_ = ++begin_; curr_ = end_ = ++begin_;
state_quoting(delim); read_quoted(delim);
return; return;
} }
} }
curr_ = end_ = begin_; curr_ = end_ = begin_;
state_reading(delim); read_normal(delim);
} }
template <typename Delim> template <typename Delim>
void state_reading(const Delim& delim) { void read_normal(const Delim& delim) {
while (true) { while (true) {
auto [width, valid] = match_delimiter(end_, delim); auto [width, valid] = match_delimiter(end_, delim);
@ -286,30 +287,30 @@ private:
// not a delimiter // not a delimiter
if (width == 0) { if (width == 0) {
// eol // eol
push_range(); shift_and_push();
done_ = true; done_ = true;
break; break;
} else { } else {
shift(width); end_ += width;
continue; continue;
} }
} else { } else {
// found delimiter // found delimiter
push_and_start_next(width); shift_push_and_start_next(width);
break; break;
} }
} }
} }
template <typename Delim> template <typename Delim>
void state_quoting(const Delim& delim) { void read_quoted(const Delim& delim) {
if constexpr (quote::enabled) { if constexpr (quote::enabled) {
while (true) { while (true) {
if (!quote::match(*end_)) { if (!quote::match(*end_)) {
if constexpr (escape::enabled) { if constexpr (escape::enabled) {
if (escape::match(*end_)) { if (escape::match(*end_)) {
shift_and_jump_escape();
++end_; ++end_;
shift();
continue; continue;
} }
} }
@ -322,7 +323,7 @@ private:
done_ = true; done_ = true;
break; break;
} }
shift(); ++end_;
continue; continue;
} }
@ -330,15 +331,15 @@ private:
// delimiter // delimiter
if (valid) { if (valid) {
push_and_start_next(width + 1); shift_push_and_start_next(width + 1);
break; break;
} }
// double quote // double quote
// eg: ...,"hel""lo",... -> hel"lo // eg: ...,"hel""lo",... -> hel"lo
if (quote::match(end_[1])) { if (quote::match(end_[1])) {
shift_and_jump_escape();
++end_; ++end_;
shift();
continue; continue;
} }
@ -347,7 +348,7 @@ private:
// eol // eol
// eg: ...,"hello" \0 -> hello // eg: ...,"hello" \0 -> hello
// eg no trim: ...,"hello"\0 -> hello // eg no trim: ...,"hello"\0 -> hello
push_range(); shift_and_push();
} else { } else {
// mismatched quote // mismatched quote
// eg: ...,"hel"lo,... -> error // eg: ...,"hel"lo,... -> error
@ -373,6 +374,7 @@ private:
line_ptr_type end_; line_ptr_type end_;
line_ptr_type line_; line_ptr_type line_;
bool done_; bool done_;
size_t escaped_{0};
public: public:
split_input input_; split_input input_;