From 9b90803f6f5a811c9e14edd572d0b6a0746a3e46 Mon Sep 17 00:00:00 2001 From: ado Date: Sat, 6 Feb 2021 01:44:46 +0100 Subject: [PATCH] make split_input private, make resplit private, update tests and the converter --- README.md | 10 +++++++ include/ss/converter.hpp | 18 +++++++---- include/ss/splitter.hpp | 35 ++++++++++++---------- test/test_splitter.cpp | 64 +++++++++++++++++++++++++++------------- 4 files changed, 86 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index e8f9be6..1044574 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ Bill (Heath) Gates 65 3.3 * Works on any type * Easy to use * No exceptions + * Works with quotes, escapes and spacings * Columns and rows can be ignored * Works with any type of delimiter * Can return whole objects composed of converted values @@ -216,6 +217,15 @@ inline bool ss::extract(const char* begin, const char* end, shape& dst) { ``` The shape enum will be used in an example below. The **inline** is there just to prevent multiple definition errors. The function returns **true** if the conversion was a success, and **false** otherwise. The function uses **const char*** begin and end for performance reasons. +## Quoting +Not yet documented. + +## Escaping +Not yet documented. + +## Spacing +Not yet documented. + ## Error handling Detailed error messages can be accessed via the **error_msg** method, and to enable them the error mode has to be changed to **error_mode::error_string** using the **set_error_mode** method: diff --git a/include/ss/converter.hpp b/include/ss/converter.hpp index e5b3de3..e73c5db 100644 --- a/include/ss/converter.hpp +++ b/include/ss/converter.hpp @@ -127,7 +127,7 @@ public: no_void_validator_tup_t convert( line_ptr_type line, const std::string& delim = default_delimiter) { split(line, delim); - return convert(splitter_.input_); + return convert(splitter_.split_input_); } // parses already split line, returns 'T' object with extracted values @@ -169,7 +169,7 @@ public: // same as above, but uses cached split line template no_void_validator_tup_t convert() { - return convert(splitter_.input_); + return convert(splitter_.split_input_); } bool valid() const { @@ -194,20 +194,25 @@ public: // contain the beginnings and the ends of each column of the string const split_input& split(line_ptr_type line, const std::string& delim = default_delimiter) { - splitter_.input_.clear(); + splitter_.split_input_.clear(); if (line[0] == '\0') { - return splitter_.input_; + return splitter_.split_input_; } return splitter_.split(line, delim); } +private: + + //////////////// + // resplit + //////////////// + const split_input& resplit(line_ptr_type new_line, ssize_t new_size, const std::string& delim = default_delimiter) { return splitter_.resplit(new_line, new_size, delim); } -private: //////////////// // error //////////////// @@ -373,6 +378,9 @@ private: bool bool_error_; enum error_mode error_mode_ { error_mode::error_bool }; splitter splitter_; + + template + friend class parser; }; } /* ss */ diff --git a/include/ss/splitter.hpp b/include/ss/splitter.hpp index 5c32367..dd0c0ec 100644 --- a/include/ss/splitter.hpp +++ b/include/ss/splitter.hpp @@ -50,34 +50,39 @@ public: const split_input& split(line_ptr_type new_line, const std::string& delimiter = default_delimiter) { - input_.clear(); + split_input_.clear(); return resplit(new_line, -1, delimiter); } void adjust_ranges(const char* old_line) { - for (auto& [begin, end] : input_) { + for (auto& [begin, end] : split_input_) { begin = begin - old_line + line_; end = end - old_line + line_; } } +private: + //////////////// + // resplit + //////////////// + const split_input& resplit( line_ptr_type new_line, ssize_t new_size, const std::string& delimiter = default_delimiter) { line_ = new_line; // resplitting, continue from last slice - if (!input_.empty() && unterminated_quote()) { - const auto& last = std::prev(input_.end()); + if (!split_input_.empty() && unterminated_quote()) { + const auto& last = std::prev(split_input_.end()); const auto [old_line, old_begin] = *last; size_t begin = old_begin - old_line - 1; - input_.pop_back(); + split_input_.pop_back(); adjust_ranges(old_line); // safety measure if (new_size != -1 && static_cast(new_size) < begin) { set_error_invalid_resplit(); - return input_; + return split_input_; } begin_ = line_ + begin; @@ -86,7 +91,6 @@ public: return split_impl_select_delim(delimiter); } -private: //////////////// // error //////////////// @@ -213,7 +217,7 @@ private: void shift_and_push() { shift_and_set_current(); - input_.emplace_back(begin_, curr_); + split_input_.emplace_back(begin_, curr_); } void shift_and_jump_escape() { @@ -237,7 +241,7 @@ private: switch (delimiter.size()) { case 0: set_error_empty_delimiter(); - return input_; + return split_input_; case 1: return split_impl(delimiter[0]); default: @@ -248,7 +252,7 @@ private: template const split_input& split_impl(const Delim& delim) { - if (input_.empty()) { + if (split_input_.empty()) { begin_ = line_; } @@ -257,7 +261,7 @@ private: for (done_ = false; !done_; read(delim)) ; - return input_; + return split_input_; } //////////////// @@ -319,7 +323,7 @@ private: // eg: ..."hell\0 -> quote not terminated if (*end_ == '\0') { set_error_unterminated_quote(); - input_.emplace_back(line_, begin_); + split_input_.emplace_back(line_, begin_); done_ = true; break; } @@ -353,7 +357,7 @@ private: // mismatched quote // eg: ...,"hel"lo,... -> error set_error_mismatched_quote(end_ - line_); - input_.emplace_back(line_, begin_); + split_input_.emplace_back(line_, begin_); } done_ = true; break; @@ -375,9 +379,10 @@ private: line_ptr_type line_; bool done_; size_t escaped_{0}; + split_input split_input_; -public: - split_input input_; + template + friend class converter; }; } /* ss */ diff --git a/test/test_splitter.cpp b/test/test_splitter.cpp index 82b2b10..50ea6af 100644 --- a/test/test_splitter.cpp +++ b/test/test_splitter.cpp @@ -517,14 +517,27 @@ auto expect_unterminated_quote(Splitter& s, const std::string& line) { return vec; } +namespace ss { +// Used to test resplit since it is only accessible via friend class converter +template +class converter { +public: + ss::splitter splitter; + auto resplit(char* new_line, size_t new_line_size) { + return splitter.resplit(new_line, new_line_size); + } +}; +} /* ss */ + TEST_CASE("testing unterminated quote") { { - ss::splitter> s; + ss::converter> c; + auto& s = c.splitter; auto vec = expect_unterminated_quote(s, "\"just"); CHECK(vec.size() == 1); auto new_line = buff.append(R"(",strings)"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(s.valid()); CHECK(!s.unterminated_quote()); std::vector expected{"just", "strings"}; @@ -532,13 +545,14 @@ TEST_CASE("testing unterminated quote") { } { - ss::splitter> s; + ss::converter> c; + auto& s = c.splitter; auto vec = expect_unterminated_quote(s, "just,some,\"random"); std::vector expected{"just", "some", "just,some,\""}; CHECK(words(vec) == expected); auto new_line = buff.append(R"(",strings)"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(s.valid()); CHECK(!s.unterminated_quote()); expected = {"just", "some", "random", "strings"}; @@ -546,13 +560,14 @@ TEST_CASE("testing unterminated quote") { } { - ss::splitter> s; + ss::converter> c; + auto& s = c.splitter; auto vec = expect_unterminated_quote(s, R"("just","some","ran"")"); std::vector expected{"just", "some", R"("just","some",")"}; CHECK(words(vec) == expected); auto new_line = buff.append(R"(,dom","strings")"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(s.valid()); CHECK(!s.unterminated_quote()); expected = {"just", "some", "ran\",dom", "strings"}; @@ -560,14 +575,15 @@ TEST_CASE("testing unterminated quote") { } { - ss::splitter> s; + ss::converter> c; + auto& s = c.splitter; auto vec = expect_unterminated_quote(s, R"("just","some","ran)"); std::vector expected{"just", "some", R"("just","some",")"}; CHECK(words(vec) == expected); { auto new_line = buff.append(R"(,dom)"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(!s.valid()); CHECK(s.unterminated_quote()); CHECK(words(vec) == expected); @@ -575,7 +591,7 @@ TEST_CASE("testing unterminated quote") { { auto new_line = buff.append(R"(",strings)"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(s.valid()); CHECK(!s.unterminated_quote()); expected = {"just", "some", "ran,dom", "strings"}; @@ -584,7 +600,8 @@ TEST_CASE("testing unterminated quote") { } { - ss::splitter, ss::escape<'\\'>> s; + ss::converter, ss::escape<'\\'>> c; + auto& s = c.splitter; auto vec = expect_unterminated_quote(s, R"("just\"some","ra)"); std::vector expected{"just\"some"}; auto w = words(vec); @@ -592,7 +609,7 @@ TEST_CASE("testing unterminated quote") { CHECK(w == expected); { auto new_line = buff.append(R"(n,dom",str\"ings)"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(s.valid()); CHECK(!s.unterminated_quote()); expected = {"just\"some", "ran,dom", "str\"ings"}; @@ -601,7 +618,8 @@ TEST_CASE("testing unterminated quote") { } { - ss::splitter, ss::escape<'\\'>> s; + ss::converter, ss::escape<'\\'>> c; + auto& s = c.splitter; auto vec = expect_unterminated_quote(s, R"("just\"some","ra"")"); std::vector expected{"just\"some"}; auto w = words(vec); @@ -609,7 +627,7 @@ TEST_CASE("testing unterminated quote") { CHECK(w == expected); { auto new_line = buff.append(R"(n,dom",str\"ings)"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(s.valid()); CHECK(!s.unterminated_quote()); expected = {"just\"some", "ra\"n,dom", "str\"ings"}; @@ -618,7 +636,8 @@ TEST_CASE("testing unterminated quote") { } { - ss::splitter, ss::escape<'\\'>> s; + ss::converter, ss::escape<'\\'>> c; + auto& s = c.splitter; auto vec = expect_unterminated_quote(s, R"("just\"some","ra\")"); std::vector expected{"just\"some"}; auto w = words(vec); @@ -626,7 +645,7 @@ TEST_CASE("testing unterminated quote") { CHECK(w == expected); { auto new_line = buff.append(R"(n,dom",str\"ings)"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(s.valid()); CHECK(!s.unterminated_quote()); expected = {"just\"some", "ra\"n,dom", "str\"ings"}; @@ -635,7 +654,8 @@ TEST_CASE("testing unterminated quote") { } { - ss::splitter, ss::trim<' '>> s; + ss::converter, ss::trim<' '>> c; + auto& s = c.splitter; auto vec = expect_unterminated_quote(s, R"( "just" ,some, "ra )"); std::vector expected{"just", "some"}; auto w = words(vec); @@ -643,7 +663,7 @@ TEST_CASE("testing unterminated quote") { CHECK(w == expected); { auto new_line = buff.append(R"( n,dom" , strings )"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(s.valid()); CHECK(!s.unterminated_quote()); expected = {"just", "some", "ra n,dom", "strings"}; @@ -652,7 +672,8 @@ TEST_CASE("testing unterminated quote") { } { - ss::splitter, ss::trim<' '>, ss::escape<'\\'>> s; + ss::converter, ss::trim<' '>, ss::escape<'\\'>> c; + auto& s = c.splitter; auto vec = expect_unterminated_quote(s, R"( "ju\"st" ,some, "ra \")"); std::vector expected{"ju\"st", "some"}; auto w = words(vec); @@ -660,7 +681,7 @@ TEST_CASE("testing unterminated quote") { CHECK(w == expected); { auto new_line = buff.append(R"( n,dom" , strings )"); - vec = s.resplit(new_line, strlen(new_line)); + vec = c.resplit(new_line, strlen(new_line)); CHECK(s.valid()); CHECK(!s.unterminated_quote()); expected = {"ju\"st", "some", "ra \" n,dom", "strings"}; @@ -670,7 +691,8 @@ TEST_CASE("testing unterminated quote") { } TEST_CASE("testing invalid splits") { - ss::splitter, ss::trim<' '>, ss::escape<'\\'>> s; + ss::converter, ss::trim<' '>, ss::escape<'\\'>> c; + auto& s = c.splitter; // empty delimiter s.split(buff("some,random,strings"), ""); @@ -689,7 +711,7 @@ TEST_CASE("testing invalid splits") { // invalid resplit char new_line[] = "some"; - auto a = s.resplit(new_line, strlen(new_line)); + auto a = c.resplit(new_line, strlen(new_line)); CHECK(!s.valid()); CHECK(!s.unterminated_quote()); }