make split_input private, make resplit private, update tests and the converter

This commit is contained in:
ado 2021-02-06 01:44:46 +01:00
parent 42629c39c4
commit 9b90803f6f
4 changed files with 86 additions and 41 deletions

View File

@ -48,6 +48,7 @@ Bill (Heath) Gates 65 3.3
* Works on any type
* Easy to use
* No exceptions
* Works with quotes, escapes and spacings
* Columns and rows can be ignored
* Works with any type of delimiter
* Can return whole objects composed of converted values
@ -216,6 +217,15 @@ inline bool ss::extract(const char* begin, const char* end, shape& dst) {
```
The shape enum will be used in an example below. The **inline** is there just to prevent multiple definition errors. The function returns **true** if the conversion was a success, and **false** otherwise. The function uses **const char*** begin and end for performance reasons.
## Quoting
Not yet documented.
## Escaping
Not yet documented.
## Spacing
Not yet documented.
## Error handling
Detailed error messages can be accessed via the **error_msg** method, and to enable them the error mode has to be changed to **error_mode::error_string** using the **set_error_mode** method:

View File

@ -127,7 +127,7 @@ public:
no_void_validator_tup_t<Ts...> convert(
line_ptr_type line, const std::string& delim = default_delimiter) {
split(line, delim);
return convert<Ts...>(splitter_.input_);
return convert<Ts...>(splitter_.split_input_);
}
// parses already split line, returns 'T' object with extracted values
@ -169,7 +169,7 @@ public:
// same as above, but uses cached split line
template <typename T, typename... Ts>
no_void_validator_tup_t<T, Ts...> convert() {
return convert<T, Ts...>(splitter_.input_);
return convert<T, Ts...>(splitter_.split_input_);
}
bool valid() const {
@ -194,20 +194,25 @@ public:
// contain the beginnings and the ends of each column of the string
const split_input& split(line_ptr_type line,
const std::string& delim = default_delimiter) {
splitter_.input_.clear();
splitter_.split_input_.clear();
if (line[0] == '\0') {
return splitter_.input_;
return splitter_.split_input_;
}
return splitter_.split(line, delim);
}
private:
////////////////
// resplit
////////////////
const split_input& resplit(line_ptr_type new_line, ssize_t new_size,
const std::string& delim = default_delimiter) {
return splitter_.resplit(new_line, new_size, delim);
}
private:
////////////////
// error
////////////////
@ -373,6 +378,9 @@ private:
bool bool_error_;
enum error_mode error_mode_ { error_mode::error_bool };
splitter<Matchers...> splitter_;
template <typename ...>
friend class parser;
};
} /* ss */

View File

@ -50,34 +50,39 @@ public:
const split_input& split(line_ptr_type new_line,
const std::string& delimiter = default_delimiter) {
input_.clear();
split_input_.clear();
return resplit(new_line, -1, delimiter);
}
void adjust_ranges(const char* old_line) {
for (auto& [begin, end] : input_) {
for (auto& [begin, end] : split_input_) {
begin = begin - old_line + line_;
end = end - old_line + line_;
}
}
private:
////////////////
// resplit
////////////////
const split_input& resplit(
line_ptr_type new_line, ssize_t new_size,
const std::string& delimiter = default_delimiter) {
line_ = new_line;
// resplitting, continue from last slice
if (!input_.empty() && unterminated_quote()) {
const auto& last = std::prev(input_.end());
if (!split_input_.empty() && unterminated_quote()) {
const auto& last = std::prev(split_input_.end());
const auto [old_line, old_begin] = *last;
size_t begin = old_begin - old_line - 1;
input_.pop_back();
split_input_.pop_back();
adjust_ranges(old_line);
// safety measure
if (new_size != -1 && static_cast<size_t>(new_size) < begin) {
set_error_invalid_resplit();
return input_;
return split_input_;
}
begin_ = line_ + begin;
@ -86,7 +91,6 @@ public:
return split_impl_select_delim(delimiter);
}
private:
////////////////
// error
////////////////
@ -213,7 +217,7 @@ private:
void shift_and_push() {
shift_and_set_current();
input_.emplace_back(begin_, curr_);
split_input_.emplace_back(begin_, curr_);
}
void shift_and_jump_escape() {
@ -237,7 +241,7 @@ private:
switch (delimiter.size()) {
case 0:
set_error_empty_delimiter();
return input_;
return split_input_;
case 1:
return split_impl(delimiter[0]);
default:
@ -248,7 +252,7 @@ private:
template <typename Delim>
const split_input& split_impl(const Delim& delim) {
if (input_.empty()) {
if (split_input_.empty()) {
begin_ = line_;
}
@ -257,7 +261,7 @@ private:
for (done_ = false; !done_; read(delim))
;
return input_;
return split_input_;
}
////////////////
@ -319,7 +323,7 @@ private:
// eg: ..."hell\0 -> quote not terminated
if (*end_ == '\0') {
set_error_unterminated_quote();
input_.emplace_back(line_, begin_);
split_input_.emplace_back(line_, begin_);
done_ = true;
break;
}
@ -353,7 +357,7 @@ private:
// mismatched quote
// eg: ...,"hel"lo,... -> error
set_error_mismatched_quote(end_ - line_);
input_.emplace_back(line_, begin_);
split_input_.emplace_back(line_, begin_);
}
done_ = true;
break;
@ -375,9 +379,10 @@ private:
line_ptr_type line_;
bool done_;
size_t escaped_{0};
split_input split_input_;
public:
split_input input_;
template <typename ...>
friend class converter;
};
} /* ss */

View File

@ -517,14 +517,27 @@ auto expect_unterminated_quote(Splitter& s, const std::string& line) {
return vec;
}
namespace ss {
// Used to test resplit since it is only accessible via friend class converter
template <typename... Matchers>
class converter {
public:
ss::splitter<Matchers...> splitter;
auto resplit(char* new_line, size_t new_line_size) {
return splitter.resplit(new_line, new_line_size);
}
};
} /* ss */
TEST_CASE("testing unterminated quote") {
{
ss::splitter<ss::quote<'"'>> s;
ss::converter<ss::quote<'"'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, "\"just");
CHECK(vec.size() == 1);
auto new_line = buff.append(R"(",strings)");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
std::vector<std::string> expected{"just", "strings"};
@ -532,13 +545,14 @@ TEST_CASE("testing unterminated quote") {
}
{
ss::splitter<ss::quote<'"'>> s;
ss::converter<ss::quote<'"'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, "just,some,\"random");
std::vector<std::string> expected{"just", "some", "just,some,\""};
CHECK(words(vec) == expected);
auto new_line = buff.append(R"(",strings)");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just", "some", "random", "strings"};
@ -546,13 +560,14 @@ TEST_CASE("testing unterminated quote") {
}
{
ss::splitter<ss::quote<'"'>> s;
ss::converter<ss::quote<'"'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just","some","ran"")");
std::vector<std::string> expected{"just", "some", R"("just","some",")"};
CHECK(words(vec) == expected);
auto new_line = buff.append(R"(,dom","strings")");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just", "some", "ran\",dom", "strings"};
@ -560,14 +575,15 @@ TEST_CASE("testing unterminated quote") {
}
{
ss::splitter<ss::quote<'"'>> s;
ss::converter<ss::quote<'"'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just","some","ran)");
std::vector<std::string> expected{"just", "some", R"("just","some",")"};
CHECK(words(vec) == expected);
{
auto new_line = buff.append(R"(,dom)");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(!s.valid());
CHECK(s.unterminated_quote());
CHECK(words(vec) == expected);
@ -575,7 +591,7 @@ TEST_CASE("testing unterminated quote") {
{
auto new_line = buff.append(R"(",strings)");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just", "some", "ran,dom", "strings"};
@ -584,7 +600,8 @@ TEST_CASE("testing unterminated quote") {
}
{
ss::splitter<ss::quote<'"'>, ss::escape<'\\'>> s;
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just\"some","ra)");
std::vector<std::string> expected{"just\"some"};
auto w = words(vec);
@ -592,7 +609,7 @@ TEST_CASE("testing unterminated quote") {
CHECK(w == expected);
{
auto new_line = buff.append(R"(n,dom",str\"ings)");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just\"some", "ran,dom", "str\"ings"};
@ -601,7 +618,8 @@ TEST_CASE("testing unterminated quote") {
}
{
ss::splitter<ss::quote<'"'>, ss::escape<'\\'>> s;
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just\"some","ra"")");
std::vector<std::string> expected{"just\"some"};
auto w = words(vec);
@ -609,7 +627,7 @@ TEST_CASE("testing unterminated quote") {
CHECK(w == expected);
{
auto new_line = buff.append(R"(n,dom",str\"ings)");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just\"some", "ra\"n,dom", "str\"ings"};
@ -618,7 +636,8 @@ TEST_CASE("testing unterminated quote") {
}
{
ss::splitter<ss::quote<'"'>, ss::escape<'\\'>> s;
ss::converter<ss::quote<'"'>, ss::escape<'\\'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"("just\"some","ra\")");
std::vector<std::string> expected{"just\"some"};
auto w = words(vec);
@ -626,7 +645,7 @@ TEST_CASE("testing unterminated quote") {
CHECK(w == expected);
{
auto new_line = buff.append(R"(n,dom",str\"ings)");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just\"some", "ra\"n,dom", "str\"ings"};
@ -635,7 +654,8 @@ TEST_CASE("testing unterminated quote") {
}
{
ss::splitter<ss::quote<'"'>, ss::trim<' '>> s;
ss::converter<ss::quote<'"'>, ss::trim<' '>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"( "just" ,some, "ra )");
std::vector<std::string> expected{"just", "some"};
auto w = words(vec);
@ -643,7 +663,7 @@ TEST_CASE("testing unterminated quote") {
CHECK(w == expected);
{
auto new_line = buff.append(R"( n,dom" , strings )");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"just", "some", "ra n,dom", "strings"};
@ -652,7 +672,8 @@ TEST_CASE("testing unterminated quote") {
}
{
ss::splitter<ss::quote<'"'>, ss::trim<' '>, ss::escape<'\\'>> s;
ss::converter<ss::quote<'"'>, ss::trim<' '>, ss::escape<'\\'>> c;
auto& s = c.splitter;
auto vec = expect_unterminated_quote(s, R"( "ju\"st" ,some, "ra \")");
std::vector<std::string> expected{"ju\"st", "some"};
auto w = words(vec);
@ -660,7 +681,7 @@ TEST_CASE("testing unterminated quote") {
CHECK(w == expected);
{
auto new_line = buff.append(R"( n,dom" , strings )");
vec = s.resplit(new_line, strlen(new_line));
vec = c.resplit(new_line, strlen(new_line));
CHECK(s.valid());
CHECK(!s.unterminated_quote());
expected = {"ju\"st", "some", "ra \" n,dom", "strings"};
@ -670,7 +691,8 @@ TEST_CASE("testing unterminated quote") {
}
TEST_CASE("testing invalid splits") {
ss::splitter<ss::quote<'"'>, ss::trim<' '>, ss::escape<'\\'>> s;
ss::converter<ss::quote<'"'>, ss::trim<' '>, ss::escape<'\\'>> c;
auto& s = c.splitter;
// empty delimiter
s.split(buff("some,random,strings"), "");
@ -689,7 +711,7 @@ TEST_CASE("testing invalid splits") {
// invalid resplit
char new_line[] = "some";
auto a = s.resplit(new_line, strlen(new_line));
auto a = c.resplit(new_line, strlen(new_line));
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
}