[skip ci] Add escaping functionality to new parsing method

This commit is contained in:
ado 2023-08-13 16:51:31 +02:00
parent 4e4c3a6e02
commit b618384054
2 changed files with 69 additions and 51 deletions

View File

@ -2,7 +2,6 @@
#include <vector> #include <vector>
namespace ss { namespace ss {
struct none {}; struct none {};
using string_range = std::pair<const char*, const char*>; using string_range = std::pair<const char*, const char*>;
@ -21,5 +20,4 @@ inline void assert_throw_on_error_not_defined() {
static_assert(!ThrowOnError, "cannot handle errors manually if " static_assert(!ThrowOnError, "cannot handle errors manually if "
"'throw_on_error' is enabled"); "'throw_on_error' is enabled");
} }
} /* ss */ } /* ss */

View File

@ -792,7 +792,7 @@ private:
// just spacing // just spacing
// TODO handle \r\n // TODO handle \r\n
if (*curr == '\n' || (*curr == '\r' && *(curr + 1) == '\n')) { if (*curr == '\n' || *curr == '\r') {
return {0, false}; return {0, false};
} }
@ -812,6 +812,7 @@ private:
void shift_if_escaped(line_ptr_type& curr) { void shift_if_escaped(line_ptr_type& curr) {
if constexpr (escape::enabled) { if constexpr (escape::enabled) {
if (escape::match(*curr)) { if (escape::match(*curr)) {
// TODO handle differently
if (curr[1] == '\0') { if (curr[1] == '\0') {
if constexpr (!multiline::enabled) { if constexpr (!multiline::enabled) {
// TODO handle // TODO handle
@ -825,7 +826,7 @@ private:
} }
} }
void shift_and_set_current() { void shift_and_set_shifted_current() {
if constexpr (!is_const_line) { if constexpr (!is_const_line) {
if (escaped_ > 0) { if (escaped_ > 0) {
// shift by number of escapes // shift by number of escapes
@ -840,7 +841,18 @@ private:
} }
void shift_and_jump_escape() { void shift_and_jump_escape() {
shift_and_set_current(); if constexpr (!is_const_line && escape::enabled) {
if (curr_[1] == '\r' && curr_[2] == '\n') {
shift_and_set_shifted_current();
++escaped_;
++curr_;
shift_and_set_shifted_current();
++curr_;
return;
}
}
shift_and_set_shifted_current();
if constexpr (!is_const_line) { if constexpr (!is_const_line) {
++escaped_; ++escaped_;
} }
@ -853,36 +865,40 @@ private:
} }
void shift_and_push() { void shift_and_push() {
shift_and_set_current(); shift_and_set_shifted_current();
split_data_.emplace_back(begin_, shifted_curr_); split_data_.emplace_back(begin_, shifted_curr_);
} }
void parse_next_line() { // TODO check attribute
escaped_ = 0; __attribute__((always_inline)) void check_buff_end() {
if (curr_ == end_) {
auto old_buff = buff_;
auto check_buff_end = [&] { if (last_read_) {
if (curr_ == end_) { // TODO handle
auto old_buff = buff_; throw "no new line at eof";
if (last_read_) {
// TODO handle
throw "no new line at eof";
}
handle_buffer_end_reached();
end_ = buff_ + buff_filled_;
for (auto& [begin, end] : split_data_) {
begin = begin - old_buff + buff_;
end = end - old_buff + buff_;
}
begin_ = begin_ - old_buff + buff_;
curr_ = curr_ - old_buff + buff_;
} }
};
handle_buffer_end_reached();
end_ = buff_ + buff_filled_;
for (auto& [begin, end] : split_data_) {
begin = begin - old_buff + buff_;
end = end - old_buff + buff_;
}
begin_ = begin_ - old_buff + buff_;
curr_ = curr_ - old_buff + buff_;
shifted_curr_ = shifted_curr_ - old_buff + buff_;
}
}
void parse_next_line() {
while (true) { while (true) {
if constexpr (quote::enabled || escape::enabled) {
escaped_ = 0;
}
// quoted string // quoted string
if constexpr (quote::enabled) { if constexpr (quote::enabled) {
if (quote::match(*curr_)) { if (quote::match(*curr_)) {
@ -906,19 +922,17 @@ private:
continue; continue;
} }
auto [width, is_delim] =
auto [width, valid] =
match_delimiter(curr_ + 1, delim_char_); match_delimiter(curr_ + 1, delim_char_);
// delimiter // delimiter
if (valid) { if (is_delim) {
shift_push_and_start_next(width + 1); shift_push_and_start_next(width + 1);
curr_ += width + 1; curr_ += width + 1;
check_buff_end(); check_buff_end();
break; break;
} }
// double quote // double quote
// eg: ...,"hel""lo",... -> hel"lo // eg: ...,"hel""lo",... -> hel"lo
if (quote::match(*(curr_ + 1))) { if (quote::match(*(curr_ + 1))) {
@ -928,7 +942,6 @@ private:
continue; continue;
} }
if (width == 0) { if (width == 0) {
// eol // eol
// eg: ...,"hello" \n -> hello // eg: ...,"hello" \n -> hello
@ -953,30 +966,37 @@ private:
} }
// not quoted // not quoted
begin_ = shifted_curr_ = curr_;
while (true) { while (true) {
if (*curr_ == '\n') { // std::cout << "* " << *curr_ << std::endl;
split_data_.emplace_back(begin_, curr_);
return;
}
if (*curr_ == '\r' && *(curr_ + 1) == '\n') { auto [width, is_delim] =
split_data_.emplace_back(begin_, curr_); match_delimiter(curr_, delim_char_);
++curr_;
check_buff_end();
return;
}
if (*curr_ == delim_char_) { if (!is_delim) {
split_data_.emplace_back(begin_, curr_); // not a delimiter
begin_ = curr_ + 1;
++curr_; if (width == 0) {
// eol
shift_and_push();
// ++curr_;
// TODO handle differently
if (curr_[0] == '\r') {
++curr_;
}
return;
} else {
curr_ += width;
check_buff_end();
continue;
}
} else {
// found delimiter
shift_push_and_start_next(width);
curr_ += width;
check_buff_end(); check_buff_end();
break; break;
} }
++curr_;
check_buff_end();
} }
} }
} }