create seperate header for the splitter, move splitter tests to different cpp file

This commit is contained in:
ado 2021-01-17 02:15:06 +01:00
parent 69d6df12be
commit 263dba7626
5 changed files with 391 additions and 441 deletions

View File

@ -2,6 +2,7 @@
#include "extract.hpp"
#include "function_traits.hpp"
#include "restrictions.hpp"
#include "splitter.hpp"
#include "type_traits.hpp"
#include <string>
#include <type_traits>
@ -105,347 +106,6 @@ constexpr bool tied_class_v = tied_class<Ts...>::value;
// the error can be set inside a string, or a bool
enum class error_mode { error_string, error_bool };
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
template <char... Cs>
struct matcher {
private:
template <char X, char... Xs>
static bool match_impl(char c) {
if constexpr (sizeof...(Xs) != 0) {
return (c == X) || match_impl<Xs...>(c);
}
return (c == X);
}
public:
static bool match(char c) {
return match_impl<Cs...>(c);
}
constexpr static bool enabled = true;
};
template <>
class matcher<'\0'> {
public:
constexpr static bool enabled = false;
static bool match(char c) = delete;
};
////////////////
// is instance of
////////////////
template <typename T, template <char...> class Template>
struct is_instance_of_char {
constexpr static bool value = false;
};
template <char... Ts, template <char...> class Template>
struct is_instance_of_char<Template<Ts...>, Template> {
constexpr static bool value = true;
};
///////////////////////////////////////////////////
template <char... Cs>
struct quote : matcher<Cs...> {};
template <char... Cs>
struct trim : matcher<Cs...> {};
template <char... Cs>
struct escape : matcher<Cs...> {};
/////////////////////////////////////////////////
// -> type traits
template <bool B, typename T, typename U>
struct if_then_else;
template <typename T, typename U>
struct if_then_else<true, T, U> {
using type = T;
};
template <typename T, typename U>
struct if_then_else<false, T, U> {
using type = U;
};
//////////////////////////////////////////////
template <template <char...> class Matcher, typename... Ts>
struct get_matcher;
template <template <char...> class Matcher, typename T, typename... Ts>
struct get_matcher<Matcher, T, Ts...> {
using type =
typename if_then_else<is_instance_of_char<T, Matcher>::value, T,
typename get_matcher<Matcher, Ts...>::type>::type;
};
template <template <char...> class Matcher>
struct get_matcher<Matcher> {
using type = Matcher<'\0'>;
};
///////////////////////////////////////////////
// TODO add restriction
template <typename... Ts>
struct setup {
using quote = typename get_matcher<quote, Ts...>::type;
using trim = typename get_matcher<trim, Ts...>::type;
using escape = typename get_matcher<escape, Ts...>::type;
};
template <typename... Ts>
struct setup<setup<Ts...>> : setup<Ts...> {};
/////////////////////////////////////////////////////////////////////////////
enum class State { finished, begin, reading, quoting };
using range = std::pair<const char*, const char*>;
using string_range = std::pair<const char*, const char*>;
using split_input = std::vector<string_range>;
template <typename... Ts>
class splitter {
using Setup = setup<Ts...>;
using quote = typename Setup::quote;
using trim = typename Setup::trim;
using escape = typename Setup::escape;
bool match(const char* end_i, char delim) {
return *end_i == delim;
};
bool match(const char* end_i, const std::string& delim) {
return strncmp(end_i, delim.c_str(), delim.size()) == 0;
};
size_t delimiter_size(char) {
return 1;
}
size_t delimiter_size(const std::string& delim) {
return delim.size();
}
void trim_if_enabled(char*& curr) {
if constexpr (trim::enabled) {
while (trim::match(*curr)) {
++curr;
}
}
}
void shift_if_escaped(char*& curr_i) {
if constexpr (escape::enabled) {
if (escape::match(*curr_i)) {
*curr = end[1];
++end;
}
}
}
void shift() {
*curr = *end;
++end;
++curr;
}
void shift(size_t n) {
memcpy(curr, end, n);
end += n;
curr += n;
}
template <typename Delim>
std::tuple<size_t, bool> match_delimiter(char* begin, const Delim& delim) {
char* end_i = begin;
trim_if_enabled(end_i);
// just spacing
if (*end_i == '\0') {
return {0, false};
}
// not a delimiter
if (!match(end_i, delim)) {
shift_if_escaped(end_i);
return {1 + end_i - begin, false};
}
end_i += delimiter_size(delim);
trim_if_enabled(end_i);
// delimiter
return {end_i - begin, true};
}
public:
bool valid() {
return error_.empty();
}
split_input& split(char* new_line, const std::string& d = ",") {
line = new_line;
output_.clear();
switch (d.size()) {
case 0:
// set error
return output_;
case 1:
return split_impl(d[0]);
default:
return split_impl(d);
}
}
template <typename Delim>
std::vector<range>& split_impl(const Delim& delim) {
state = State::begin;
begin = line;
trim_if_enabled(begin);
while (state != State::finished) {
curr = end = begin;
switch (state) {
case (State::begin):
state_begin();
break;
case (State::reading):
state_reading(delim);
break;
case (State::quoting):
state_quoting(delim);
break;
default:
break;
};
}
return output_;
}
void state_begin() {
if constexpr (quote::enabled) {
if (quote::match(*begin)) {
++begin;
state = State::quoting;
return;
}
}
state = State::reading;
}
template <typename Delim>
void state_reading(const Delim& delim) {
while (true) {
auto [width, valid] = match_delimiter(end, delim);
// not a delimiter
if (!valid) {
if (width == 0) {
// eol
output_.emplace_back(begin, curr);
state = State::finished;
break;
} else {
shift(width);
continue;
}
}
// found delimiter
push_and_start_next(width);
break;
}
}
template <typename Delim>
void state_quoting(const Delim& delim) {
if constexpr (quote::enabled) {
while (true) {
if (quote::match(*end)) {
// double quote
// eg: ...,"hel""lo,... -> hel"lo
if (quote::match(end[1])) {
++end;
shift();
continue;
}
auto [width, valid] = match_delimiter(end + 1, delim);
// not a delimiter
if (!valid) {
if (width == 0) {
// eol
// eg: ...,"hello" \0 -> hello
// eg no trim: ...,"hello"\0 -> hello
output_.emplace_back(begin, curr);
} else {
// missmatched quote
// eg: ...,"hel"lo,... -> error
}
state = State::finished;
break;
}
// delimiter
push_and_start_next(width + 1);
break;
}
if constexpr (escape::enabled) {
if (escape::match(*end)) {
++end;
shift();
continue;
}
}
// unterminated error
// eg: ..."hell\0 -> quote not terminated
if (*end == '\0') {
*curr = '\0';
state = State::finished;
break;
}
shift();
}
} else {
// set error impossible scenario
state = State::finished;
}
}
void push_and_start_next(size_t n) {
output_.emplace_back(begin, curr);
begin = end + n;
state = State::begin;
}
private:
std::vector<range> output_;
std::string error_ = "";
State state;
char* curr;
char* end;
char* begin;
char* line;
};
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
////////////////////////////////////////////////////////
////////////////
// converter
////////////////

345
include/ss/splitter.hpp Normal file
View File

@ -0,0 +1,345 @@
#pragma once
#include "type_traits.hpp"
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
namespace ss {
template <char... Cs>
struct matcher {
private:
template <char X, char... Xs>
static bool match_impl(char c) {
if constexpr (sizeof...(Xs) != 0) {
return (c == X) || match_impl<Xs...>(c);
}
return (c == X);
}
public:
static bool match(char c) {
return match_impl<Cs...>(c);
}
constexpr static bool enabled = true;
};
template <>
class matcher<'\0'> {
public:
constexpr static bool enabled = false;
static bool match(char c) = delete;
};
////////////////
// is instance of
////////////////
template <typename T, template <char...> class Template>
struct is_instance_of_char {
constexpr static bool value = false;
};
template <char... Ts, template <char...> class Template>
struct is_instance_of_char<Template<Ts...>, Template> {
constexpr static bool value = true;
};
///////////////////////////////////////////////////
template <char... Cs>
struct quote : matcher<Cs...> {};
template <char... Cs>
struct trim : matcher<Cs...> {};
template <char... Cs>
struct escape : matcher<Cs...> {};
/////////////////////////////////////////////////
// -> type traits
template <bool B, typename T, typename U>
struct if_then_else;
template <typename T, typename U>
struct if_then_else<true, T, U> {
using type = T;
};
template <typename T, typename U>
struct if_then_else<false, T, U> {
using type = U;
};
//////////////////////////////////////////////
template <template <char...> class Matcher, typename... Ts>
struct get_matcher;
template <template <char...> class Matcher, typename T, typename... Ts>
struct get_matcher<Matcher, T, Ts...> {
using type =
typename if_then_else<is_instance_of_char<T, Matcher>::value, T,
typename get_matcher<Matcher, Ts...>::type>::type;
};
template <template <char...> class Matcher>
struct get_matcher<Matcher> {
using type = Matcher<'\0'>;
};
///////////////////////////////////////////////
// TODO add restriction
template <typename... Ts>
struct setup {
using quote = typename get_matcher<quote, Ts...>::type;
using trim = typename get_matcher<trim, Ts...>::type;
using escape = typename get_matcher<escape, Ts...>::type;
};
template <typename... Ts>
struct setup<setup<Ts...>> : setup<Ts...> {};
/////////////////////////////////////////////////////////////////////////////
enum class State { finished, begin, reading, quoting };
using range = std::pair<const char*, const char*>;
using string_range = std::pair<const char*, const char*>;
using split_input = std::vector<string_range>;
template <typename... Ts>
class splitter {
using Setup = setup<Ts...>;
using quote = typename Setup::quote;
using trim = typename Setup::trim;
using escape = typename Setup::escape;
bool match(const char* end_i, char delim) {
return *end_i == delim;
};
bool match(const char* end_i, const std::string& delim) {
return strncmp(end_i, delim.c_str(), delim.size()) == 0;
};
size_t delimiter_size(char) {
return 1;
}
size_t delimiter_size(const std::string& delim) {
return delim.size();
}
void trim_if_enabled(char*& curr) {
if constexpr (trim::enabled) {
while (trim::match(*curr)) {
++curr;
}
}
}
void shift_if_escaped(char*& curr_i) {
if constexpr (escape::enabled) {
if (escape::match(*curr_i)) {
*curr = end[1];
++end;
}
}
}
void shift() {
if constexpr (escape::enabled || quote::enabled) {
*curr = *end;
}
++end;
++curr;
}
void shift(size_t n) {
if constexpr (escape::enabled || quote::enabled) {
memcpy(curr, end, n);
}
end += n;
curr += n;
}
template <typename Delim>
std::tuple<size_t, bool> match_delimiter(char* begin, const Delim& delim) {
char* end_i = begin;
trim_if_enabled(end_i);
// just spacing
if (*end_i == '\0') {
return {0, false};
}
// not a delimiter
if (!match(end_i, delim)) {
shift_if_escaped(end_i);
return {1 + end_i - begin, false};
}
end_i += delimiter_size(delim);
trim_if_enabled(end_i);
// delimiter
return {end_i - begin, true};
}
public:
bool valid() {
return error_.empty();
}
split_input& split(char* new_line, const std::string& d = ",") {
line = new_line;
output_.clear();
switch (d.size()) {
case 0:
// set error
return output_;
case 1:
return split_impl(d[0]);
default:
return split_impl(d);
}
}
template <typename Delim>
std::vector<range>& split_impl(const Delim& delim) {
state = State::begin;
begin = line;
trim_if_enabled(begin);
while (state != State::finished) {
curr = end = begin;
switch (state) {
case (State::begin):
state_begin();
break;
case (State::reading):
state_reading(delim);
break;
case (State::quoting):
state_quoting(delim);
break;
default:
break;
};
}
return output_;
}
void state_begin() {
if constexpr (quote::enabled) {
if (quote::match(*begin)) {
++begin;
state = State::quoting;
return;
}
}
state = State::reading;
}
template <typename Delim>
void state_reading(const Delim& delim) {
while (true) {
auto [width, valid] = match_delimiter(end, delim);
// not a delimiter
if (!valid) {
if (width == 0) {
// eol
output_.emplace_back(begin, curr);
state = State::finished;
break;
} else {
shift(width);
continue;
}
}
// found delimiter
push_and_start_next(width);
break;
}
}
template <typename Delim>
void state_quoting(const Delim& delim) {
if constexpr (quote::enabled) {
while (true) {
if (quote::match(*end)) {
// double quote
// eg: ...,"hel""lo,... -> hel"lo
if (quote::match(end[1])) {
++end;
shift();
continue;
}
auto [width, valid] = match_delimiter(end + 1, delim);
// not a delimiter
if (!valid) {
if (width == 0) {
// eol
// eg: ...,"hello" \0 -> hello
// eg no trim: ...,"hello"\0 -> hello
output_.emplace_back(begin, curr);
} else {
// missmatched quote
// eg: ...,"hel"lo,... -> error
}
state = State::finished;
break;
}
// delimiter
push_and_start_next(width + 1);
break;
}
if constexpr (escape::enabled) {
if (escape::match(*end)) {
++end;
shift();
continue;
}
}
// unterminated error
// eg: ..."hell\0 -> quote not terminated
if (*end == '\0') {
*curr = '\0';
state = State::finished;
break;
}
shift();
}
} else {
// set error impossible scenario
state = State::finished;
}
}
void push_and_start_next(size_t n) {
output_.emplace_back(begin, curr);
begin = end + n;
state = State::begin;
}
private:
std::vector<range> output_;
std::string error_ = "";
State state;
char* curr;
char* end;
char* begin;
char* line;
};
} /* ss */

View File

@ -1,6 +1,6 @@
CXX=clang++
CXXFLAGS=-Wall -Wextra -std=c++17 -lstdc++fs
TESTS=test_parser test_converter test_extractions
TESTS=test_parser test_converter test_extractions test_splitter
all: $(TESTS)

View File

@ -16,105 +16,7 @@ public:
}
};
buffer buff;
TEST_CASE("testing splitter with escaping") {
std::vector<std::string> values{"10", "he\\\"llo",
"\\\"", "\\\"a\\,a\\\"",
"3.33", "a\\\""};
char buff[128];
// with quote
ss::splitter<ss::quote<'"'>, ss::escape<'\\'>> s;
std::string delim = ",";
for (size_t i = 0; i < values.size() * values.size(); ++i) {
std::string input1;
std::string input2;
for (size_t j = 0; j < values.size(); ++j) {
if (i & (1 << j) && j != 2 && j != 3) {
input1.append(values[j]);
input2.append(values.at(values.size() - 1 - j));
} else {
input1.append("\"" + values[j] + "\"");
input2.append("\"" + values.at(values.size() - 1 - j) + "\"");
}
input1.append(delim);
input2.append(delim);
}
input1.pop_back();
input2.pop_back();
input1.append("\0\"");
input2.append("\0\"");
memcpy(buff, input1.c_str(), input1.size() + 1);
auto tup1 = s.split(buff, delim);
CHECK(tup1.size() == 6);
memcpy(buff, input2.c_str(), input2.size() + 1);
auto tup2 = s.split(buff, delim);
CHECK(tup2.size() == 6);
}
}
/*
TEST_CASE("testing quoting without escaping") {
std::vector<std::string> values{"10", "hello", ",", "a,a", "3.33", "a"};
// with quote
ss::converter c;
for (size_t i = 0; i < values.size() * values.size(); ++i) {
std::string input1;
std::string input2;
for (size_t j = 0; j < values.size(); ++j) {
if (i & (1 << j) && j != 2 && j != 3) {
input1.append(values[j]);
input2.append(values.at(values.size() - 1 - j));
} else {
input1.append("\"" + values[j] + "\"");
input2.append("\"" + values.at(values.size() - 1 - j) + "\"");
}
input1.append("__");
input1.push_back(',');
input1.append("__");
input2.push_back(',');
}
input1.pop_back();
input1.pop_back();
input1.pop_back();
input2.pop_back();
input1.append("\0\"");
input2.append("\0\"");
auto tup1 = c.convert<int, std::string, std::string, std::string,
double, char>(input1.c_str(), ",");
if (!c.valid()) {
FAIL("invalid: " + input1);
} else {
auto [a, b, c, d, e, f] = tup1;
CHECK(a == 10);
CHECK(b == "hello");
CHECK(c == ",");
CHECK(d == "a,a");
CHECK(e == 3.33);
CHECK(f == 'a');
}
auto tup2 = c.convert<char, double, std::string, std::string,
std::string, int>(input2.c_str(), ",");
if (!c.valid()) {
FAIL("invalid: " + input2);
} else {
auto [f, e, d, c, b, a] = tup2;
CHECK(a == 10);
CHECK(b == "hello");
CHECK(c == ",");
CHECK(d == "a,a");
CHECK(e == 3.33);
CHECK(f == 'a');
}
}
}
*/
static buffer buff;
TEST_CASE("testing split") {
ss::converter c;

43
test/test_splitter.cpp Normal file
View File

@ -0,0 +1,43 @@
#include <iostream>
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "../include/ss/splitter.hpp"
#include "doctest.h"
#include <algorithm>
TEST_CASE("testing splitter with escaping") {
std::vector<std::string> values{"10", "he\\\"llo",
"\\\"", "\\\"a\\,a\\\"",
"3.33", "a\\\""};
char buff[128];
// with quote
ss::splitter<ss::quote<'"'>, ss::escape<'\\'>> s;
std::string delim = ",";
for (size_t i = 0; i < values.size() * values.size(); ++i) {
std::string input1;
std::string input2;
for (size_t j = 0; j < values.size(); ++j) {
if (i & (1 << j) && j != 2 && j != 3) {
input1.append(values[j]);
input2.append(values.at(values.size() - 1 - j));
} else {
input1.append("\"" + values[j] + "\"");
input2.append("\"" + values.at(values.size() - 1 - j) + "\"");
}
input1.append(delim);
input2.append(delim);
}
input1.pop_back();
input2.pop_back();
input1.append("\0\"");
input2.append("\0\"");
memcpy(buff, input1.c_str(), input1.size() + 1);
auto tup1 = s.split(buff, delim);
CHECK(tup1.size() == 6);
memcpy(buff, input2.c_str(), input2.size() + 1);
auto tup2 = s.split(buff, delim);
CHECK(tup2.size() == 6);
}
}