WIP, added multiline restriction, fixed a few bugs for multiline, updated unit tests

This commit is contained in:
ado
2021-02-20 15:53:18 +01:00
parent 2985027505
commit 2dbc21780f
8 changed files with 288 additions and 98 deletions

View File

@@ -230,6 +230,15 @@ private:
}
}
void set_error_multiline_limit_reached() {
if constexpr (string_error) {
error_.clear();
error_.append("multiline limit reached.");
} else {
error_ = true;
}
}
void set_error_invalid_conversion(const string_range msg, size_t pos) {
if constexpr (string_error) {
error_.clear();

View File

@@ -9,13 +9,16 @@
#include <string>
#include <vector>
// TODO remove
#include <iostream>
namespace ss {
template <typename... Matchers>
class parser {
constexpr static auto string_error = setup<Matchers...>::string_error;
constexpr static auto multiline = setup<Matchers...>::multiline;
using multiline = typename setup<Matchers...>::multiline;
using error_type = ss::ternary_t<string_error, std::string, bool>;
public:
@@ -324,7 +327,7 @@ private:
helper_buffer_{other.helper_buffer_}, converter_{std::move(
other.converter_)},
next_line_converter_{std::move(other.next_line_converter_)},
size_{other.size_},
size_{other.size_}, next_line_size_{other.size_},
helper_size_{other.helper_size_}, delim_{std::move(other.delim_)},
file_{other.file_}, crlf_{other.crlf_} {
other.buffer_ = nullptr;
@@ -341,6 +344,7 @@ private:
converter_ = std::move(other.converter_);
next_line_converter_ = std::move(other.next_line_converter_);
size_ = other.size_;
next_line_size_ = other.next_line_size_;
helper_size_ = other.helper_size_;
delim_ = std::move(other.delim_);
file_ = other.file_;
@@ -370,16 +374,23 @@ private:
reader& operator=(const reader& other) = delete;
bool read_next() {
ssize_t ssize = getline(&next_line_buffer_, &size_, file_);
memset(next_line_buffer_, '\0', next_line_size_);
ssize_t ssize =
getline(&next_line_buffer_, &next_line_size_, file_);
if (ssize == -1) {
return false;
}
size_t size = remove_eol(next_line_buffer_, ssize);
size_t limit = 0;
if constexpr (multiline && setup<Matchers...>::escape::enabled) {
if constexpr (multiline::enabled &&
setup<Matchers...>::escape::enabled) {
while (escaped_eol(size)) {
if (multiline_limit_reached(limit)) {
return true;
}
if (!append_line(next_line_buffer_, size)) {
return false;
}
@@ -388,8 +399,12 @@ private:
next_line_converter_.split(next_line_buffer_, delim_);
if constexpr (multiline && setup<Matchers...>::quote::enabled) {
if constexpr (multiline::enabled &&
setup<Matchers...>::quote::enabled) {
while (unterminated_quote()) {
if (multiline_limit_reached(limit)) {
return true;
}
if (!append_line(next_line_buffer_, size)) {
return false;
}
@@ -402,9 +417,20 @@ private:
void update() {
std::swap(buffer_, next_line_buffer_);
std::swap(size_, next_line_size_);
std::swap(converter_, next_line_converter_);
}
bool multiline_limit_reached(size_t& limit) {
if constexpr (multiline::size > 0) {
if (limit++ >= multiline::size) {
next_line_converter_.set_error_multiline_limit_reached();
return true;
}
}
return false;
}
bool escaped_eol(size_t size) {
const char* curr;
for (curr = next_line_buffer_ + size - 1;
@@ -422,12 +448,15 @@ private:
return false;
}
void undo_remove_eol(size_t& string_end) {
void undo_remove_eol(char* buffer, size_t& string_end) {
if (next_line_converter_.unterminated_quote()) {
string_end -= next_line_converter_.splitter_.escaped_;
}
if (crlf_) {
std::copy_n("\r\n\0", 3, next_line_buffer_ + string_end);
std::copy_n("\r\n\0", 3, buffer + string_end);
string_end += 2;
} else {
std::copy_n("\n\0", 2, next_line_buffer_ + string_end);
std::copy_n("\n\0", 2, buffer + string_end);
string_end += 1;
}
}
@@ -447,15 +476,15 @@ private:
void realloc_concat(char*& first, size_t& first_size,
const char* const second, size_t second_size) {
first = static_cast<char*>(realloc(static_cast<void*>(first),
first_size + second_size + 2));
next_line_size_ = first_size + second_size + 2;
first = static_cast<char*>(
realloc(static_cast<void*>(first), next_line_size_));
std::copy_n(second, second_size + 1, first + first_size);
first_size += second_size;
}
bool append_line(char*& dst_buffer, size_t& dst_size) {
undo_remove_eol(dst_size);
undo_remove_eol(dst_buffer, dst_size);
ssize_t ssize = getline(&helper_buffer_, &helper_size_, file_);
if (ssize == -1) {
@@ -478,6 +507,7 @@ private:
converter<Matchers...> next_line_converter_;
size_t size_{0};
size_t next_line_size_{0};
size_t helper_size_{0};
std::string delim_;

View File

@@ -67,7 +67,11 @@ public:
};
////////////////
// setup parameters
// setup
////////////////
////////////////
// matcher
////////////////
template <char C>
@@ -85,15 +89,6 @@ struct trim_right : matcher<Cs...> {};
template <char... Cs>
struct escape : matcher<Cs...> {};
// TODO add limit
class multiline;
class string_error;
////////////////
// setup implementation
////////////////
template <typename T, template <char...> class Template>
struct is_instance_of_matcher : std::false_type {};
@@ -128,6 +123,54 @@ struct get_matcher<Matcher> {
template <template <char...> class Matcher, typename... Ts>
using get_matcher_t = typename get_matcher<Matcher, Ts...>::type;
////////////////
// multiline
////////////////
template <size_t S, bool B = true>
struct multiline_restricted {
constexpr static auto size = S;
constexpr static auto enabled = B;
};
using multiline = multiline_restricted<0>;
template <typename T>
struct is_instance_of_multiline : std::false_type {};
template <size_t S, bool B>
struct is_instance_of_multiline<multiline_restricted<S, B>> : std::true_type {};
template <typename T>
using is_instance_of_multiline_t = typename is_instance_of_multiline<T>::type;
template <typename... Ts>
struct get_multiline;
template <typename T, typename... Ts>
struct get_multiline<T, Ts...> {
using type = ternary_t<is_instance_of_multiline<T>::value, T,
typename get_multiline<Ts...>::type>;
};
template <>
struct get_multiline<> {
using type = multiline_restricted<0, false>;
};
template <typename... Ts>
using get_multiline_t = typename get_multiline<Ts...>::type;
////////////////
// string_error
////////////////
class string_error;
////////////////
// setup implementation
////////////////
template <typename... Ts>
struct setup {
private:
@@ -139,14 +182,12 @@ private:
is_instance_of_matcher_t<T, trim_left>,
is_instance_of_matcher_t<T, trim_right>> {};
template <typename T>
struct is_multiline : std::is_same<T, multiline> {};
template <typename T>
struct is_string_error : std::is_same<T, string_error> {};
constexpr static auto count_matcher = count_v<is_matcher, Ts...>;
constexpr static auto count_multiline = count_v<is_multiline, Ts...>;
constexpr static auto count_multiline =
count_v<is_instance_of_multiline, Ts...>;
constexpr static auto count_string_error = count_v<is_string_error, Ts...>;
constexpr static auto number_of_valid_setup_types =
@@ -163,7 +204,7 @@ public:
using trim_left = ternary_t<trim_all::enabled, trim_all, trim_left_only>;
using trim_right = ternary_t<trim_all::enabled, trim_all, trim_right_only>;
constexpr static bool multiline = (count_multiline == 1);
using multiline = get_multiline_t<Ts...>;
constexpr static bool string_error = (count_string_error == 1);
private:
@@ -181,7 +222,8 @@ private:
#undef ASSERT_MSG
static_assert(
!multiline || (multiline && (quote::enabled || escape::enabled)),
!multiline::enabled ||
(multiline::enabled && (quote::enabled || escape::enabled)),
"to enable multiline either quote or escape need to be enabled");
static_assert(!(trim_all::enabled && trim_left_only::enabled) &&

View File

@@ -9,6 +9,9 @@
#include <string>
#include <vector>
// TODO remove
#include <iostream>
namespace ss {
template <typename... Ts>
@@ -82,7 +85,13 @@ private:
return split_data_;
}
std::cout << "======================" << std::endl;
std::cout << "resplitting" << std::endl;
resplitting_ = true;
begin_ = line_ + begin;
size_t end = end_ - old_line - escaped_;
end_ = line_ + end;
curr_ = end_;
}
}
@@ -284,6 +293,12 @@ private:
void read(const Delim& delim) {
escaped_ = 0;
if constexpr (quote::enabled) {
if (resplitting_) {
resplitting_ = false;
++begin_;
read_quoted(delim);
return;
}
if (quote::match(*begin_)) {
curr_ = end_ = ++begin_;
read_quoted(delim);
@@ -321,7 +336,9 @@ private:
template <typename Delim>
void read_quoted(const Delim& delim) {
if constexpr (quote::enabled) {
std::cout << "start loop: " << std::endl;
while (true) {
std::cout << "- " << *end_ << std::endl;
if (!quote::match(*end_)) {
if constexpr (escape::enabled) {
if (escape::match(*end_)) {
@@ -334,6 +351,7 @@ private:
// unterminated quote error
// eg: ..."hell\0 -> quote not terminated
if (*end_ == '\0') {
shift_and_set_current();
set_error_unterminated_quote();
split_data_.emplace_back(line_, begin_);
done_ = true;
@@ -381,9 +399,11 @@ private:
// members
////////////////
public:
error_type error_{};
bool unterminated_quote_{false};
bool done_;
bool done_{true};
bool resplitting_{false};
size_t escaped_{0};
split_data split_data_;