diff --git a/README.md b/README.md index e8f9be6..1044574 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ Bill (Heath) Gates 65 3.3 * Works on any type * Easy to use * No exceptions + * Works with quotes, escapes and spacings * Columns and rows can be ignored * Works with any type of delimiter * Can return whole objects composed of converted values @@ -216,6 +217,15 @@ inline bool ss::extract(const char* begin, const char* end, shape& dst) { ``` The shape enum will be used in an example below. The **inline** is there just to prevent multiple definition errors. The function returns **true** if the conversion was a success, and **false** otherwise. The function uses **const char*** begin and end for performance reasons. +## Quoting +Not yet documented. + +## Escaping +Not yet documented. + +## Spacing +Not yet documented. + ## Error handling Detailed error messages can be accessed via the **error_msg** method, and to enable them the error mode has to be changed to **error_mode::error_string** using the **set_error_mode** method: diff --git a/include/ss/converter.hpp b/include/ss/converter.hpp index 25be27c..e73c5db 100644 --- a/include/ss/converter.hpp +++ b/include/ss/converter.hpp @@ -1,8 +1,8 @@ #pragma once - #include "extract.hpp" #include "function_traits.hpp" #include "restrictions.hpp" +#include "splitter.hpp" #include "type_traits.hpp" #include #include @@ -21,7 +21,7 @@ INIT_HAS_METHOD(error) // eg. no_validator_tup_t> <=> std::tuple // where ss::nx is a validator '(n)one e(x)cept' which // checks if the returned character is either 'A' or 'B', returns error if not -// additionaly if one element is left in the pack, it will be unwraped from +// additionally if one element is left in the pack, it will be unwrapped from // the tuple eg. no_void_validator_tup_t <=> int instead of std::tuple template struct no_validator; @@ -103,34 +103,31 @@ struct tied_class { template constexpr bool tied_class_v = tied_class::value; -// the error can be set inside a string, or a bool -enum class error_mode { error_string, error_bool }; - //////////////// // converter //////////////// +template class converter { - using string_range = std::pair; - constexpr static auto default_delimiter = ','; + constexpr static auto default_delimiter = ","; + using line_ptr_type = typename splitter::line_ptr_type; public: - using split_input = std::vector; - // parses line with given delimiter, returns a 'T' object created with // extracted values of type 'Ts' template - T convert_object(const char* const line, const std::string& delim = "") { + T convert_object(line_ptr_type line, + const std::string& delim = default_delimiter) { return to_object(convert(line, delim)); } // parses line with given delimiter, returns tuple of objects with // extracted values of type 'Ts' template - no_void_validator_tup_t convert(const char* const line, - const std::string& delim = "") { - input_ = split(line, delim); - return convert(input_); + no_void_validator_tup_t convert( + line_ptr_type line, const std::string& delim = default_delimiter) { + split(line, delim); + return convert(splitter_.split_input_); } // parses already split line, returns 'T' object with extracted values @@ -139,6 +136,12 @@ public: return to_object(convert(elems)); } + // same as above, but uses cached split line + template + T convert_object() { + return to_object(convert()); + } + // parses already split line, returns either a tuple of objects with // parsed values (returns raw element (no tuple) if Ts is empty), or if // one argument is given which is a class which has a tied @@ -163,35 +166,53 @@ public: } } + // same as above, but uses cached split line + template + no_void_validator_tup_t convert() { + return convert(splitter_.split_input_); + } + bool valid() const { return (error_mode_ == error_mode::error_string) ? string_error_.empty() : bool_error_ == false; } - const std::string& error_msg() const { return string_error_; } + bool unterminated_quote() const { + return splitter_.unterminated_quote(); + } - void set_error_mode(error_mode mode) { error_mode_ = mode; } + const std::string& error_msg() const { + return string_error_; + } + + void set_error_mode(error_mode mode) { + splitter_.set_error_mode(mode); + error_mode_ = mode; + } // 'splits' string by given delimiter, returns vector of pairs which - // contain the beginings and the ends of each column of the string - const split_input& split(const char* const line, - const std::string& delim = "") { - input_.clear(); + // contain the beginnings and the ends of each column of the string + const split_input& split(line_ptr_type line, + const std::string& delim = default_delimiter) { + splitter_.split_input_.clear(); if (line[0] == '\0') { - return input_; + return splitter_.split_input_; } - switch (delim.size()) { - case 0: - return split_impl(line, ','); - case 1: - return split_impl(line, delim[0]); - default: - return split_impl(line, delim, delim.size()); - }; + return splitter_.split(line, delim); } private: + + //////////////// + // resplit + //////////////// + + const split_input& resplit(line_ptr_type new_line, ssize_t new_size, + const std::string& delim = default_delimiter) { + return splitter_.resplit(new_line, new_size, delim); + } + //////////////// // error //////////////// @@ -212,6 +233,15 @@ private: return error; } + void set_error_unterminated_quote() { + if (error_mode_ == error_mode::error_string) { + string_error_.clear(); + string_error_.append(splitter_.error_msg()); + } else { + bool_error_ = true; + } + } + void set_error_invalid_conversion(const string_range msg, size_t pos) { if (error_mode_ == error_mode::error_string) { string_error_.clear(); @@ -252,11 +282,19 @@ private: template no_void_validator_tup_t convert_impl(const split_input& elems) { clear_error(); - no_void_validator_tup_t ret{}; - if (sizeof...(Ts) != elems.size()) { - set_error_number_of_colums(sizeof...(Ts), elems.size()); + + if (!splitter_.valid()) { + set_error_unterminated_quote(); + no_void_validator_tup_t ret{}; return ret; } + + if (sizeof...(Ts) != elems.size()) { + set_error_number_of_colums(sizeof...(Ts), elems.size()); + no_void_validator_tup_t ret{}; + return ret; + } + return extract_tuple(elems); } @@ -267,37 +305,6 @@ private: return convert_impl(elems); } - //////////////// - // substring - //////////////// - - template - const split_input& split_impl(const char* const line, Delim delim, - size_t delim_size = 1) { - auto range = substring(line, delim); - input_.push_back(range); - while (range.second[0] != '\0') { - range = substring(range.second + delim_size, delim); - input_.push_back(range); - } - return input_; - } - - bool no_match(const char* end, char delim) const { return *end != delim; } - - bool no_match(const char* end, const std::string& delim) const { - return strncmp(end, delim.c_str(), delim.size()) != 0; - } - - template - string_range substring(const char* const begin, Delim delim) const { - const char* end; - for (end = begin; *end != '\0' && no_match(end, delim); ++end) - ; - - return string_range{begin, end}; - } - //////////////// // conversion //////////////// @@ -309,6 +316,11 @@ private: return; } + if constexpr (std::is_same_v) { + extract(msg.first, msg.second, dst); + return; + } + if (!extract(msg.first, msg.second, dst)) { set_error_invalid_conversion(msg, pos); return; @@ -353,7 +365,7 @@ private: no_void_validator_tup_t extract_tuple(const split_input& elems) { static_assert(!all_of::value, "at least one parameter must be non void"); - no_void_validator_tup_t ret; + no_void_validator_tup_t ret{}; extract_multiple<0, 0, Ts...>(ret, elems); return ret; } @@ -362,21 +374,13 @@ private: // members //////////////// - std::vector input_; std::string string_error_; bool bool_error_; enum error_mode error_mode_ { error_mode::error_bool }; + splitter splitter_; + + template + friend class parser; }; -template <> -inline void converter::extract_one(std::string& dst, - const string_range msg, - size_t) { - if (!valid()) { - return; - } - - extract(msg.first, msg.second, dst); -} - } /* ss */ diff --git a/include/ss/extract.hpp b/include/ss/extract.hpp index f994430..b10759d 100644 --- a/include/ss/extract.hpp +++ b/include/ss/extract.hpp @@ -33,6 +33,7 @@ std::enable_if_t, T> pow10(int n) { return ret; } +// TODO not working with large number of digits template std::enable_if_t, std::optional> to_num( const char* begin, const char* const end) { diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index a0ed111..d8b19b5 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -9,13 +9,14 @@ #include #include +// TODO rule of 5-3-1 +// TODO threads namespace ss { -struct none {}; -template -class composite; - +template class parser { + struct none {}; + public: parser(const std::string& file_name, const std::string& delimiter) : file_name_{file_name}, delim_{delimiter}, @@ -41,7 +42,7 @@ public: void set_error_mode(error_mode mode) { error_mode_ = mode; - converter_.set_error_mode(mode); + reader_.set_error_mode(mode); } const std::string& error_msg() const { @@ -53,7 +54,7 @@ public: } bool ignore_next() { - return buff_.read(file_); + return reader_.read(file_); } template @@ -63,17 +64,16 @@ public: template no_void_validator_tup_t get_next() { - buff_.update(); + reader_.update(); clear_error(); if (eof_) { set_error_eof_reached(); return {}; } - split_input_ = converter_.split(buff_.get(), delim_); - auto value = converter_.convert(split_input_); + auto value = reader_.get_converter().template convert(); - if (!converter_.valid()) { + if (!reader_.get_converter().valid()) { set_error_invalid_conversion(); } @@ -162,8 +162,8 @@ public: no_void_validator_tup_t try_same() { parser_.clear_error(); auto value = - parser_.converter_.convert(parser_.split_input_); - if (!parser_.converter_.valid()) { + parser_.reader_.get_converter().template convert(); + if (!parser_.reader_.get_converter().valid()) { parser_.set_error_invalid_conversion(); } return value; @@ -192,9 +192,6 @@ public: } private: - template - friend class composite; - // tries to invoke the given function (see below), if the function // returns a value which can be used as a conditional, and it returns // false, the function sets an error, and allows the invoke of the @@ -249,44 +246,146 @@ private: // line reading //////////////// - class buffer { + class reader { char* buffer_{nullptr}; - char* new_buffer_{nullptr}; - size_t size_{0}; + char* next_line_buffer_{nullptr}; + char* helper_buffer_{nullptr}; - public: - ~buffer() { - free(buffer_); - free(new_buffer_); + converter converter_; + converter next_line_converter_; + + size_t size_{0}; + size_t helper_size_{0}; + const std::string& delim_; + + bool crlf; + + bool escaped_eol(size_t size) { + if constexpr (setup::escape::enabled) { + const char* curr; + for (curr = next_line_buffer_ + size - 1; + curr >= next_line_buffer_ && + setup::escape::match(*curr); + --curr) { + } + return (next_line_buffer_ - curr + size) % 2 == 0; + } + + return false; } - bool read(FILE* file) { - ssize_t size = getline(&new_buffer_, &size_, file); - size_t string_end = size - 1; + bool unterminated_quote() { + if constexpr (ss::setup::quote::enabled) { + if (next_line_converter_.unterminated_quote()) { + return true; + } + } + return false; + } - if (size == -1) { + void undo_remove_eol(size_t& string_end) { + if (crlf) { + std::copy_n("\r\n\0", 3, next_line_buffer_ + string_end); + string_end += 2; + } else { + std::copy_n("\n\0", 2, next_line_buffer_ + string_end); + string_end += 1; + } + } + + size_t remove_eol(char*& buffer, size_t size) { + size_t new_size = size - 1; + if (size >= 2 && buffer[size - 2] == '\r') { + crlf = true; + new_size--; + } else { + crlf = false; + } + + buffer[new_size] = '\0'; + return new_size; + } + + void realloc_concat(char*& first, size_t& first_size, + const char* const second, size_t second_size) { + first = static_cast(realloc(static_cast(first), + first_size + second_size + 2)); + + std::copy_n(second, second_size + 1, first + first_size); + first_size += second_size; + } + + bool append_line(FILE* file, char*& dst_buffer, size_t& dst_size) { + undo_remove_eol(dst_size); + + ssize_t ssize = getline(&helper_buffer_, &helper_size_, file); + if (ssize == -1) { return false; } - if (size >= 2 && new_buffer_[size - 2] == '\r') { - string_end--; - } - - new_buffer_[string_end] = '\0'; + size_t size = remove_eol(helper_buffer_, ssize); + realloc_concat(dst_buffer, dst_size, helper_buffer_, size); return true; } - const char* get() const { + public: + reader(const std::string& delimiter) : delim_{delimiter} { + } + + ~reader() { + free(buffer_); + free(next_line_buffer_); + free(helper_buffer_); + } + + bool read(FILE* file) { + ssize_t ssize = getline(&next_line_buffer_, &size_, file); + + if (ssize == -1) { + return false; + } + + size_t size = remove_eol(next_line_buffer_, ssize); + + while (escaped_eol(size)) { + if (!append_line(file, next_line_buffer_, size)) { + return false; + } + } + + next_line_converter_.split(next_line_buffer_, delim_); + + while (unterminated_quote()) { + if (!append_line(file, next_line_buffer_, size)) { + return false; + } + next_line_converter_.resplit(next_line_buffer_, size); + } + + return true; + } + + void set_error_mode(error_mode mode) { + converter_.set_error_mode(mode); + next_line_converter_.set_error_mode(mode); + } + + converter& get_converter() { + return converter_; + } + + const char* get_buffer() const { return buffer_; } void update() { - std::swap(buffer_, new_buffer_); + std::swap(buffer_, next_line_buffer_); + std::swap(converter_, next_line_converter_); } }; void read_line() { - eof_ = !buff_.read(file_); + eof_ = !reader_.read(file_); ++line_number_; } @@ -326,9 +425,9 @@ private: .append(" ") .append(std::to_string(line_number_)) .append(": ") - .append(converter_.error_msg()) + .append(reader_.get_converter().error_msg()) .append(": \"") - .append(buff_.get()) + .append(reader_.get_buffer()) .append("\""); } else { bool_error_ = true; @@ -344,10 +443,8 @@ private: std::string string_error_; bool bool_error_{false}; error_mode error_mode_{error_mode::error_bool}; - converter converter_; - converter::split_input split_input_; FILE* file_{nullptr}; - buffer buff_; + reader reader_{delim_}; size_t line_number_{0}; bool eof_{false}; }; diff --git a/include/ss/setup.hpp b/include/ss/setup.hpp new file mode 100644 index 0000000..9d1c02b --- /dev/null +++ b/include/ss/setup.hpp @@ -0,0 +1,111 @@ +#pragma once +#include "type_traits.hpp" +#include + +namespace ss { + +template +struct matcher { +private: + template + static bool match_impl(char c) { + if constexpr (sizeof...(Xs) != 0) { + return (c == X) || match_impl(c); + } + return (c == X); + } + + constexpr static bool contains_string_terminator() { + for (const auto& match : matches) { + if (match == '\0') { + return false; + } + } + return true; + } + +public: + static bool match(char c) { + return match_impl(c); + } + + constexpr static bool enabled = true; + constexpr static std::array matches{Cs...}; + static_assert(contains_string_terminator(), + "string terminator cannot be used as a match character"); +}; + +template +constexpr bool matches_intersect() { + for (const auto& first_match : FirstMatcher::matches) { + for (const auto& second_match : SecondMatcher::matches) { + if (first_match != '\0' && first_match == second_match) { + return true; + } + } + } + return false; +} + +template <> +class matcher<'\0'> { +public: + constexpr static bool enabled = false; + constexpr static std::array matches{'\0'}; + static bool match(char c) = delete; +}; + +template +struct quote : matcher {}; + +template +struct trim : matcher {}; + +template +struct escape : matcher {}; + +template class Template> +struct is_instance_of_matcher { + constexpr static bool value = false; +}; + +template class Template> +struct is_instance_of_matcher, Template> { + constexpr static bool value = true; +}; + +template