diff --git a/include/ss/converter.hpp b/include/ss/converter.hpp index 4c6c0c9..9adc536 100644 --- a/include/ss/converter.hpp +++ b/include/ss/converter.hpp @@ -1,5 +1,17 @@ #pragma once +// TODO remove +#include +#ifndef DBG +void log(const std::string& log) { + std::cout << log << std::endl; +} +#else +void log(const std::string&) { +} +#endif +// +// #include "extract.hpp" #include "function_traits.hpp" #include "restrictions.hpp" @@ -8,6 +20,10 @@ #include #include +constexpr auto space = '_'; +constexpr auto escaping = true; +constexpr auto quote = '"'; + namespace ss { INIT_HAS_METHOD(tied); INIT_HAS_METHOD(ss_valid); @@ -21,7 +37,7 @@ INIT_HAS_METHOD(error); // eg. no_validator_tup_t> <=> std::tuple // where ss::nx is a validator '(n)one e(x)cept' which // checks if the returned character is either 'A' or 'B', returns error if not -// additionaly if one element is left in the pack, it will be unwraped from +// additionally if one element is left in the pack, it will be unwrapped from // the tuple eg. no_void_validator_tup_t <=> int instead of std::tuple template struct no_validator; @@ -139,6 +155,12 @@ public: return to_object(convert(elems)); } + // same as above, but uses cached split line + template + T convert_object() { + return to_object(convert()); + } + // parses already split line, returns either a tuple of objects with // parsed values (returns raw element (no tuple) if Ts is empty), or if // one argument is given which is a class which has a tied @@ -162,6 +184,12 @@ public: } } + // same as above, but uses cached split line + template + no_void_validator_tup_t convert() { + return convert(input_); + } + bool valid() const { return (error_mode_ == error_mode::error_string) ? string_error_.empty() : bool_error_ == false; @@ -176,7 +204,7 @@ public: } // 'splits' string by given delimiter, returns vector of pairs which - // contain the beginings and the ends of each column of the string + // contain the beginnings and the ends of each column of the string const split_input& split(const char* const line, const std::string& delim = "") { input_.clear(); @@ -215,6 +243,24 @@ private: return error; } + void set_error_invalid_quotation() { + if (error_mode_ == error_mode::error_string) { + string_error_.clear(); + string_error_.append("invalid quotation"); + } else { + bool_error_ = true; + } + } + + void set_error_unterminated_quote() { + if (error_mode_ == error_mode::error_string) { + string_error_.clear(); + string_error_.append("unterminated quote"); + } else { + bool_error_ = true; + } + } + void set_error_invalid_conversion(const string_range msg, size_t pos) { if (error_mode_ == error_mode::error_string) { string_error_.clear(); @@ -277,30 +323,107 @@ private: template const split_input& split_impl(const char* const line, Delim delim, size_t delim_size = 1) { - auto range = substring(line, delim); + auto [range, begin] = substring(line, delim); input_.push_back(range); while (range.second[0] != '\0') { - range = substring(range.second + delim_size, delim); + if constexpr (quote != '\0') { + if (*begin == quote) { + ++begin; + } + if (*begin == '\0') { + break; + } + } + + std::tie(range, begin) = substring(begin + delim_size, delim); + log("-> " + std::string{range.first, range.second}); input_.push_back(range); } return input_; } - bool no_match(const char* end, char delim) const { - return *end != delim; + size_t match(const char* begin, char delim) const { + const char* p = begin; + if constexpr (space == '\0') { + if (*p == delim) { + return 1; + } + } else { + while (*p == space) { + ++p; + } + if (*p == '\0') { + return p - begin; + } + if (*p != delim) { + return 0; + } + do + ++p; + while (*p == space); + return p - begin; + } } - bool no_match(const char* end, const std::string& delim) const { + size_t match(const char* end, const std::string& delim) const { + // TODO + log("ahamm"); return strncmp(end, delim.c_str(), delim.size()) != 0; } template - string_range substring(const char* const begin, Delim delim) const { + std::tuple substring(const char* begin, + Delim delim) { const char* end; - for (end = begin; *end != '\0' && no_match(end, delim); ++end) + const char* i; + for (i = begin; *i != '\0'; ++i) ; + log(">> " + std::string{begin, i}); + if constexpr (quote != '\0') { + if (*begin == quote) { + ++begin; - return string_range{begin, end}; + for (end = begin; true; ++end) { + + if (*end == '\0') { + log("error"); + set_error_unterminated_quote(); + return {string_range{begin, end}, end}; + } + + if constexpr (escaping) { + if (end[-1] == '\\') { + continue; + } + } + + if (*end == quote) { + break; + } + } + + // end is not \0 + size_t to_ignore = match(end + 1, delim); + log(std::to_string(to_ignore)); + if (to_ignore != 0) { + return {string_range{begin, end}, end + to_ignore}; + } + + log("error"); + set_error_invalid_quotation(); + return {string_range{begin, end}, end}; + } + } + + for (end = begin; *end != '\0'; ++end) { + size_t to_ignore = match(end, delim); + log(std::to_string(to_ignore)); + if (to_ignore != 0) { + return {string_range{begin, end}, end + to_ignore}; + } + } + + return {string_range{begin, end}, end}; } //////////////// diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 20385d5..adb35c0 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -3,9 +3,9 @@ #include "converter.hpp" #include "extract.hpp" #include "restrictions.hpp" +#include #include #include -#include #include #include @@ -39,7 +39,7 @@ public: void set_error_mode(error_mode mode) { error_mode_ = mode; - converter_.set_error_mode(mode); + buff_.set_error_mode(mode); } const std::string& error_msg() const { @@ -68,10 +68,9 @@ public: return {}; } - split_input_ = converter_.split(buff_.get(), delim_); - auto value = converter_.convert(split_input_); + auto value = buff_.get_converter().convert(); - if (!converter_.valid()) { + if (!buff_.get_converter().valid()) { set_error_invalid_conversion(); } @@ -134,7 +133,7 @@ public: composite composite_with(T&& new_value) { auto merged_values = std::tuple_cat(std::move(values_), - std::tuple{parser_.valid() + std::tuple{parser_.valid() ? std::forward(new_value) : std::nullopt}); return {std::move(merged_values), parser_}; @@ -160,8 +159,8 @@ public: no_void_validator_tup_t try_same() { parser_.clear_error(); auto value = - parser_.converter_.convert(parser_.split_input_); - if (!parser_.converter_.valid()) { + parser_.buff_.get_converter().template convert(); + if (!parser_.buff_.get_converter().valid()) { parser_.set_error_invalid_conversion(); } return value; @@ -249,37 +248,57 @@ private: class buffer { char* buffer_{nullptr}; - char* new_buffer_{nullptr}; + char* next_line_buffer_{nullptr}; + + converter converter_; + converter next_line_converter_; + size_t size_{0}; + const std::string& delim_; public: + buffer(const std::string& delimiter) : delim_{delimiter} { + } + ~buffer() { free(buffer_); - free(new_buffer_); + free(next_line_buffer_); } bool read(FILE* file) { - ssize_t size = getline(&new_buffer_, &size_, file); + ssize_t size = getline(&next_line_buffer_, &size_, file); size_t string_end = size - 1; if (size == -1) { return false; } - if (size >= 2 && new_buffer_[size - 2] == '\r') { + if (size >= 2 && next_line_buffer_[size - 2] == '\r') { string_end--; } - new_buffer_[string_end] = '\0'; + next_line_buffer_[string_end] = '\0'; + next_line_converter_.split(next_line_buffer_, delim_); + return true; } + void set_error_mode(error_mode mode) { + converter_.set_error_mode(mode); + next_line_converter_.set_error_mode(mode); + } + + converter& get_converter() { + return converter_; + } + const char* get() const { return buffer_; } void update() { - std::swap(buffer_, new_buffer_); + std::swap(buffer_, next_line_buffer_); + std::swap(converter_, next_line_converter_); } }; @@ -324,7 +343,7 @@ private: .append(" ") .append(std::to_string(line_number_)) .append(": ") - .append(converter_.error_msg()) + .append(buff_.get_converter().error_msg()) .append(": \"") .append(buff_.get()) .append("\""); @@ -342,10 +361,8 @@ private: std::string string_error_; bool bool_error_{false}; error_mode error_mode_{error_mode::error_bool}; - converter converter_; - converter::split_input split_input_; FILE* file_{nullptr}; - buffer buff_; + buffer buff_{delim_}; size_t line_number_{0}; bool eof_{false}; }; diff --git a/test/makefile b/test/makefile index 9bbe159..2652fca 100644 --- a/test/makefile +++ b/test/makefile @@ -1,6 +1,6 @@ -CXX=clang++ -CXXFLAGS=-Wall -Wextra -std=c++17 -lstdc++fs -TESTS=test_parser test_converter test_extractions +CXX=clang++-9 +CXXFLAGS=-Wall -Wextra -std=c++17 -O0 -lstdc++fs +TESTS=test_converter all: $(TESTS) diff --git a/test/test_converter b/test/test_converter new file mode 100755 index 0000000..a24c1b7 Binary files /dev/null and b/test/test_converter differ diff --git a/test/test_converter.cpp b/test/test_converter.cpp index 82dc020..cf469fb 100644 --- a/test/test_converter.cpp +++ b/test/test_converter.cpp @@ -1,8 +1,123 @@ +#include #define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN #include "../include/ss/converter.hpp" #include "doctest.h" #include +/* TODO +TEST_CASE("testing quoting with escaping") { + std::vector values{"10", "he\\\"llo", "\\\"", + "\\\"a,a\\\"", "3.33", "a\\\""}; + + // with quote + ss::converter c; + for (size_t i = 0; i < values.size() * values.size(); ++i) { + std::string input1; + std::string input2; + for (size_t j = 0; j < values.size(); ++j) { + if (i & (1 << j) && j != 2 && j != 3) { + input1.append(values[j]); + input2.append(values.at(values.size() - 1 - j)); + } else { + input1.append("\"" + values[j] + "\""); + input2.append("\"" + values.at(values.size() - 1 - j) + "\""); + } + input1.push_back(','); + input2.push_back(','); + } + input1.pop_back(); + input2.pop_back(); + input1.append("\0\""); + input2.append("\0\""); + + auto tup1 = c.convert(input1.c_str(), ","); + if (!c.valid()) { + FAIL("invalid: " + input1); + } else { + auto [a, b, c, d, e, f] = tup1; + CHECK(a == 10); + CHECK(b == "he\"llo"); + CHECK(c == "\""); + CHECK(d == "\"a,a\""); + CHECK(e == 3.33); + CHECK(f == "a\""); + std::cout << a << ' ' << b << ' ' << c << ' ' << d << ' ' << e + << ' ' << f << std::endl; + CHECK(tup1 == + std::make_tuple(10, "he\"llo", "\"", "\"a,a\"", 3.33, "a\"")); + } + + auto tup2 = c.convert(input2.c_str(), ","); + if (!c.valid()) { + FAIL("invalid: " + input2); + } else { + CHECK(tup2 == + std::make_tuple("a\"", 3.33, "\"a,a\"", "\"", "he\"llo", 10)); + } + } +} +*/ + +TEST_CASE("testing quoting without escaping") { + std::vector values{"10", "hello", ",", "a,a", "3.33", "a"}; + + // with quote + ss::converter c; + for (size_t i = 0; i < values.size() * values.size(); ++i) { + std::string input1; + std::string input2; + for (size_t j = 0; j < values.size(); ++j) { + if (i & (1 << j) && j != 2 && j != 3) { + input1.append(values[j]); + input2.append(values.at(values.size() - 1 - j)); + } else { + input1.append("\"" + values[j] + "\""); + input2.append("\"" + values.at(values.size() - 1 - j) + "\""); + } + input1.append("__"); + input1.push_back(','); + input1.append("__"); + input2.push_back(','); + } + input1.pop_back(); + input1.pop_back(); + input1.pop_back(); + input2.pop_back(); + input1.append("\0\""); + input2.append("\0\""); + + auto tup1 = c.convert(input1.c_str(), ","); + if (!c.valid()) { + FAIL("invalid: " + input1); + } else { + auto [a, b, c, d, e, f] = tup1; + CHECK(a == 10); + CHECK(b == "hello"); + CHECK(c == ","); + CHECK(d == "a,a"); + CHECK(e == 3.33); + CHECK(f == 'a'); + } + + auto tup2 = c.convert(input2.c_str(), ","); + if (!c.valid()) { + FAIL("invalid: " + input2); + } else { + auto [f, e, d, c, b, a] = tup2; + CHECK(a == 10); + CHECK(b == "hello"); + CHECK(c == ","); + CHECK(d == "a,a"); + CHECK(e == 3.33); + CHECK(f == 'a'); + } + } +} + TEST_CASE("testing split") { ss::converter c; for (const auto& [s, expected, delim] : @@ -48,7 +163,8 @@ TEST_CASE("testing valid conversions") { CHECK(tup == 5); } { - auto tup = c.convert("junk 5 junk", " "); + // TODO make \t -> ' ' + auto tup = c.convert("junk\t5\tjunk", "\t"); REQUIRE(c.valid()); CHECK(tup == 5); } diff --git a/test/test_parser.cpp b/test/test_parser.cpp index 4f87622..341a453 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -481,8 +481,8 @@ TEST_CASE("testing the moving of parsed values") { TEST_CASE("testing the moving of parsed composite values") { // to compile is enough return; - ss::parser* p; - p->try_next() + ss::parser p{"", ""}; + p.try_next() .or_else([](auto&&) {}) .or_else([](auto&) {}) .or_else([](auto&&) {})