add string_error and multiline within the setup, remove set_error_mode, update unit tests, update documentation

This commit is contained in:
ado 2021-02-13 01:14:25 +01:00
parent 0f178658bb
commit ea42948c42
9 changed files with 208 additions and 162 deletions

View File

@ -22,7 +22,6 @@ Bill (Heath) Gates,65,3.3
int main() {
ss::parser p{"students.csv", ","};
if (!p.valid()) {
std::cout << p.error_msg() << std::endl;
exit(EXIT_FAILURE);
}
@ -232,14 +231,16 @@ Not yet documented.
## Error handling
Detailed error messages can be accessed via the **error_msg** method, and to enable them the error mode has to be changed to **error_mode::error_string** using the **set_error_mode** method:
Detailed error messages can be accessed via the **error_msg** method, and to enable them ss::string_error needs to be included in the setup.
```cpp
void parser::set_error_mode(ss::error_mode);
const std::string& parser::error_msg();
bool parser::valid();
bool parser::eof();
// ...
ss::parser<ss::string_error> parser;
```
Error messages can always be disabled by setting the error mode to **error_mode::error_bool**. An error can be detected using the **valid** method which would return **false** if the file could not be opened, or if the conversion could not be made (invalid types, invalid number of columns, ...). The **eof** method can be used to detect if the end of the file was reached.
An error can be detected using the **valid** method which would return **false** if the file could not be opened, or if the conversion could not be made (invalid types, invalid number of columns, ...). The **eof** method can be used to detect if the end of the file was reached.
## Substitute conversions
@ -340,7 +341,7 @@ p.try_next<ss::nx<shape, shape::circle, shape::square>, udbl>(
}
});
```
It is a bit less readable, but it removes the need to check which conversion was invoked. The **composite** also has an **on_error** method which accepts a lambda which will be invoked if no previous conversions were successful. The lambda can take no arguments or just one argument, an **std::string**, in which the error message is stored if **error_mode** is set to **error_mode::error_string**:
It is a bit less readable, but it removes the need to check which conversion was invoked. The **composite** also has an **on_error** method which accepts a lambda which will be invoked if no previous conversions were successful. The lambda can take no arguments or just one argument, an **std::string**, in which the error message is stored if **string_error** is enabled:
```cpp
p.try_next<int>()
.on_error([](const std::string& e) { /* int conversion failed */ })
@ -371,8 +372,8 @@ if (c.valid()) {
// do something with s
}
```
All special types and restrictions work on the converter too. Error handling is
also identical to error handling of the parser.
All setup parameters, special types and restrictions work on the converter too.
Error handling is also identical to error handling of the parser.
The converter has also the ability to just split the line, tho it does not change it (kinda statically), hence the name of the library. It returns an **std::vector** of pairs of pointers, begin and end, each pair representing a split segment (column) of the whole string. The vector can then be used in a overloaded **convert** method. This allows the reuse of the same line without splitting it on every conversion.
```cpp

View File

@ -109,9 +109,13 @@ constexpr bool tied_class_v = tied_class<Ts...>::value;
template <typename... Matchers>
class converter {
constexpr static auto default_delimiter = ",";
using line_ptr_type = typename splitter<Matchers...>::line_ptr_type;
constexpr static auto string_error = setup<Matchers...>::string_error;
constexpr static auto default_delimiter = ",";
using error_type = ss::ternary_t<string_error, std::string, bool>;
public:
// parses line with given delimiter, returns a 'T' object created with
// extracted values of type 'Ts'
@ -173,23 +177,23 @@ public:
}
bool valid() const {
return (error_mode_ == error_mode::error_string) ? string_error_.empty()
: bool_error_ == false;
if constexpr (string_error) {
return error_.empty();
} else {
return !error_;
}
}
const std::string& error_msg() const {
static_assert(string_error,
"'string_error' needs to be enabled to use 'error_msg'");
return error_;
}
bool unterminated_quote() const {
return splitter_.unterminated_quote();
}
const std::string& error_msg() const {
return string_error_;
}
void set_error_mode(error_mode mode) {
splitter_.set_error_mode(mode);
error_mode_ = mode;
}
// 'splits' string by given delimiter, returns vector of pairs which
// contain the beginnings and the ends of each column of the string
const split_input& split(line_ptr_type line,
@ -203,7 +207,6 @@ public:
}
private:
////////////////
// resplit
////////////////
@ -218,8 +221,11 @@ private:
////////////////
void clear_error() {
string_error_.clear();
bool_error_ = false;
if constexpr (string_error) {
error_.clear();
} else {
error_ = false;
}
}
std::string error_sufix(const string_range msg, size_t pos) const {
@ -234,44 +240,43 @@ private:
}
void set_error_unterminated_quote() {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append(splitter_.error_msg());
if constexpr (string_error) {
error_.clear();
error_.append(splitter_.error_msg());
} else {
bool_error_ = true;
error_ = true;
}
}
void set_error_invalid_conversion(const string_range msg, size_t pos) {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("invalid conversion for parameter ")
if constexpr (string_error) {
error_.clear();
error_.append("invalid conversion for parameter ")
.append(error_sufix(msg, pos));
} else {
bool_error_ = true;
error_ = true;
}
}
void set_error_validate(const char* const error, const string_range msg,
size_t pos) {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append(error).append(" ").append(
error_sufix(msg, pos));
if constexpr (string_error) {
error_.clear();
error_.append(error).append(" ").append(error_sufix(msg, pos));
} else {
bool_error_ = true;
error_ = true;
}
}
void set_error_number_of_colums(size_t expected_pos, size_t pos) {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("invalid number of columns, expected: ")
if constexpr (string_error) {
error_.clear();
error_.append("invalid number of columns, expected: ")
.append(std::to_string(expected_pos))
.append(", got: ")
.append(std::to_string(pos));
} else {
bool_error_ = true;
error_ = true;
}
}
@ -374,12 +379,10 @@ private:
// members
////////////////
std::string string_error_;
bool bool_error_;
enum error_mode error_mode_ { error_mode::error_bool };
error_type error_;
splitter<Matchers...> splitter_;
template <typename ...>
template <typename...>
friend class parser;
};

View File

@ -15,6 +15,11 @@ template <typename... Matchers>
class parser {
struct none {};
constexpr static auto string_error = setup<Matchers...>::string_error;
constexpr static auto multiline = setup<Matchers...>::multiline;
using error_type = ss::ternary_t<string_error, std::string, bool>;
public:
parser(const std::string& file_name,
const std::string& delim = ss::default_delimiter)
@ -35,17 +40,17 @@ public:
parser& operator=(const parser& other) = delete;
bool valid() const {
return (error_mode_ == error_mode::error_string) ? string_error_.empty()
: bool_error_ == false;
}
void set_error_mode(error_mode mode) {
error_mode_ = mode;
reader_.set_error_mode(mode);
if constexpr (string_error) {
return error_.empty();
} else {
return !error_;
}
}
const std::string& error_msg() const {
return string_error_;
static_assert(string_error,
"'string_error' needs to be enabled to use 'error_msg'");
return error_;
}
bool eof() const {
@ -124,6 +129,10 @@ public:
if constexpr (std::is_invocable_v<Fun>) {
fun();
} else {
static_assert(string_error,
"to enable error messages within the "
"on_error method "
"callback string_error needs to be enabled");
std::invoke(std::forward<Fun>(fun), parser_.error_msg());
}
}
@ -246,34 +255,40 @@ private:
////////////////
void clear_error() {
string_error_.clear();
bool_error_ = false;
if constexpr (string_error) {
error_.clear();
} else {
error_ = false;
}
}
void set_error_failed_check() {
if (error_mode_ == error_mode::error_string) {
string_error_.append(file_name_).append(" failed check.");
if constexpr (string_error) {
error_.append(file_name_).append(" failed check.");
} else {
bool_error_ = true;
error_ = true;
}
}
void set_error_file_not_open() {
string_error_.append(file_name_).append(" could not be opened.");
bool_error_ = true;
if constexpr (string_error) {
error_.append(file_name_).append(" could not be opened.");
} else {
error_ = true;
}
}
void set_error_eof_reached() {
if (error_mode_ == error_mode::error_string) {
string_error_.append(file_name_).append(" reached end of file.");
if constexpr (string_error) {
error_.append(file_name_).append(" reached end of file.");
} else {
bool_error_ = true;
error_ = true;
}
}
void set_error_invalid_conversion() {
if (error_mode_ == error_mode::error_string) {
string_error_.append(file_name_)
if constexpr (string_error) {
error_.append(file_name_)
.append(" ")
.append(std::to_string(line_number_))
.append(": ")
@ -282,7 +297,7 @@ private:
.append(reader_.buffer_)
.append("\"");
} else {
bool_error_ = true;
error_ = true;
}
}
@ -360,7 +375,7 @@ private:
size_t size = remove_eol(next_line_buffer_, ssize);
if constexpr (setup<Matchers...>::escape::enabled) {
if constexpr (multiline && setup<Matchers...>::escape::enabled) {
while (escaped_eol(size)) {
if (!append_line(next_line_buffer_, size)) {
return false;
@ -370,7 +385,7 @@ private:
next_line_converter_.split(next_line_buffer_, delim_);
if constexpr (setup<Matchers...>::quote::enabled) {
if constexpr (multiline && setup<Matchers...>::quote::enabled) {
while (unterminated_quote()) {
if (!append_line(next_line_buffer_, size)) {
return false;
@ -382,11 +397,6 @@ private:
return true;
}
void set_error_mode(error_mode mode) {
converter_.set_error_mode(mode);
next_line_converter_.set_error_mode(mode);
}
void update() {
std::swap(buffer_, next_line_buffer_);
std::swap(converter_, next_line_converter_);
@ -478,9 +488,7 @@ private:
////////////////
std::string file_name_;
std::string string_error_;
bool bool_error_{false};
error_mode error_mode_{error_mode::error_bool};
error_type error_;
reader reader_;
size_t line_number_{0};
bool eof_{false};

View File

@ -79,9 +79,8 @@ struct get_matcher;
template <template <char...> class Matcher, typename T, typename... Ts>
struct get_matcher<Matcher, T, Ts...> {
using type =
typename ternary<is_instance_of_matcher<T, Matcher>::value, T,
typename get_matcher<Matcher, Ts...>::type>::type;
using type = ternary_t<is_instance_of_matcher<T, Matcher>::value, T,
typename get_matcher<Matcher, Ts...>::type>;
};
template <template <char...> class Matcher>
@ -92,11 +91,33 @@ struct get_matcher<Matcher> {
template <template <char...> class Matcher, typename... Ts>
using get_matcher_t = typename get_matcher<Matcher, Ts...>::type;
class multiline;
class string_error;
template <typename... Ts>
struct setup {
private:
template <typename T>
struct is_multiline : std::is_same<T, multiline> {};
constexpr static auto count_multiline = count<is_multiline, Ts...>::size;
template <typename T>
struct is_string_error : std::is_same<T, string_error> {};
constexpr static auto count_string_error =
count<is_string_error, Ts...>::size;
public:
using quote = get_matcher_t<quote, Ts...>;
using trim = get_matcher_t<trim, Ts...>;
using escape = get_matcher_t<escape, Ts...>;
constexpr static bool multiline = (count_multiline == 1);
constexpr static bool string_error = (count_string_error == 1);
static_assert(
!multiline || (multiline && (quote::enabled || escape::enabled)),
"to enable multiline either quote or escape need to be enabled");
#define ASSERT_MSG "cannot have the same match character in multiple matchers"
static_assert(!matches_intersect<quote, trim>(), ASSERT_MSG);

View File

@ -10,14 +10,11 @@
namespace ss {
// TODO move to common
// TODO move to common or something
using string_range = std::pair<const char*, const char*>;
using split_input = std::vector<string_range>;
constexpr static auto default_delimiter = ",";
// the error can be set inside a string, or a bool
enum class error_mode { error_string, error_bool };
template <typename... Ts>
class splitter {
private:
@ -25,35 +22,43 @@ private:
using trim = typename setup<Ts...>::trim;
using escape = typename setup<Ts...>::escape;
constexpr static auto string_error = setup<Ts...>::string_error;
constexpr static auto is_const_line = !quote::enabled && !escape::enabled;
using error_type = ss::ternary_t<string_error, std::string, bool>;
public:
using line_ptr_type =
typename ternary<is_const_line, const char*, char*>::type;
using line_ptr_type = ternary_t<is_const_line, const char*, char*>;
bool valid() const {
return (error_mode_ == error_mode::error_string) ? string_error_.empty()
: bool_error_ == false;
if constexpr (string_error) {
return error_.empty();
} else {
return !error_;
}
}
const std::string& error_msg() const {
static_assert(string_error,
"'string_error' needs to be enabled to use 'error_msg'");
return error_;
}
bool unterminated_quote() const {
return unterminated_quote_;
}
const std::string& error_msg() const {
return string_error_;
}
void set_error_mode(error_mode mode) {
error_mode_ = mode;
}
const split_input& split(line_ptr_type new_line,
const std::string& delimiter = default_delimiter) {
split_input_.clear();
return resplit(new_line, -1, delimiter);
}
private:
////////////////
// resplit
////////////////
void adjust_ranges(const char* old_line) {
for (auto& [begin, end] : split_input_) {
begin = begin - old_line + line_;
@ -61,11 +66,6 @@ public:
}
}
private:
////////////////
// resplit
////////////////
const split_input& resplit(
line_ptr_type new_line, ssize_t new_size,
const std::string& delimiter = default_delimiter) {
@ -96,48 +96,50 @@ private:
////////////////
void clear_error() {
string_error_.clear();
bool_error_ = false;
if constexpr (string_error) {
error_.clear();
} else {
error_ = false;
}
unterminated_quote_ = false;
}
void set_error_empty_delimiter() {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("empty delimiter");
if constexpr (string_error) {
error_.clear();
error_.append("empt delimiter");
} else {
bool_error_ = true;
error_ = true;
}
}
void set_error_mismatched_quote(size_t n) {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("mismatched quote at position: " +
std::to_string(n));
if constexpr (string_error) {
error_.clear();
error_.append("mismatched quote at position: " + std::to_string(n));
} else {
bool_error_ = true;
error_ = true;
}
}
void set_error_unterminated_quote() {
unterminated_quote_ = true;
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("unterminated quote");
if constexpr (string_error) {
error_.clear();
error_.append("unterminated quote");
} else {
bool_error_ = true;
error_ = true;
}
}
void set_error_invalid_resplit() {
unterminated_quote_ = false;
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("invalid resplit, new line must be longer"
"than the end of the last slice");
if constexpr (string_error) {
error_.clear();
error_.append("invalid resplit, new line must be longer"
"than the end of the last slice");
} else {
bool_error_ = true;
error_ = true;
}
}
@ -373,10 +375,10 @@ private:
// members
////////////////
std::string string_error_;
bool bool_error_{false};
static_assert(std::is_same_v<error_type, bool> ||
std::is_same_v<error_type, std::string>);
error_type error_;
bool unterminated_quote_{false};
enum error_mode error_mode_ { error_mode::error_bool };
line_ptr_type begin_;
line_ptr_type curr_;
line_ptr_type end_;

View File

@ -230,8 +230,11 @@ using filter_not_t = typename filter_not<Trait, Ts...>::type;
// count
////////////////
template <template <typename...> class Trait, typename... Ts>
struct count;
template <template <typename...> class Trait, typename T, typename... Ts>
struct count {
struct count<Trait, T, Ts...> {
static constexpr size_t size =
std::tuple_size<filter_if_t<Trait, T, Ts...>>::value;
};
@ -241,12 +244,20 @@ struct count<Trait, T> {
static constexpr size_t size = Trait<T>::value;
};
template <template <typename...> class Trait>
struct count<Trait> {
static constexpr size_t size = 0;
};
////////////////
// count not
////////////////
template <template <typename...> class Trait, typename... Ts>
struct count_not;
template <template <typename...> class Trait, typename T, typename... Ts>
struct count_not {
struct count_not<Trait, T, Ts...> {
static constexpr size_t size =
std::tuple_size<filter_not_t<Trait, T, Ts...>>::value;
};
@ -256,6 +267,11 @@ struct count_not<Trait, T> {
static constexpr size_t size = !Trait<T>::value;
};
template <template <typename...> class Trait>
struct count_not<Trait> {
static constexpr size_t size = 0;
};
////////////////
// all of
////////////////
@ -331,6 +347,9 @@ struct ternary<false, T, U> {
using type = U;
};
template <bool B, typename T, typename U>
using ternary_t = typename ternary<B, T, U>::type;
////////////////
// tuple to struct
////////////////

View File

@ -384,13 +384,7 @@ TEST_CASE(
}
TEST_CASE("converter test error mode") {
ss::converter c;
c.convert<int>("junk");
CHECK(!c.valid());
CHECK(c.error_msg().empty());
c.set_error_mode(ss::error_mode::error_string);
ss::converter<ss::string_error> c;
c.convert<int>("junk");
CHECK(!c.valid());
CHECK(!c.error_msg().empty());
@ -444,8 +438,9 @@ TEST_CASE("converter test converter with quotes spacing and escaping") {
}
TEST_CASE("converter test invalid split conversions") {
ss::converter<ss::escape<'\\'>, ss::trim<' '>, ss::quote<'"'>> c;
c.set_error_mode(ss::error_mode::error_string);
ss::converter<ss::string_error, ss::escape<'\\'>, ss::trim<' '>,
ss::quote<'"'>>
c;
{
// mismatched quote
@ -453,6 +448,7 @@ TEST_CASE("converter test invalid split conversions") {
buff(R"( "just , some , "12.3","a" )"));
CHECK(!c.valid());
CHECK(!c.unterminated_quote());
CHECK(!c.error_msg().empty());
}
{
@ -461,5 +457,6 @@ TEST_CASE("converter test invalid split conversions") {
buff(R"( ju\,st , "so,me" , 12.34 , "str""ings)"));
CHECK(!c.valid());
CHECK(c.unterminated_quote());
CHECK(!c.error_msg().empty());
}
}

View File

@ -59,7 +59,6 @@ TEST_CASE("parser test various cases") {
ss::parser p0{std::move(p)};
p = std::move(p0);
p.set_error_mode(ss::error_mode::error_string);
std::vector<X> i;
while (!p.eof()) {
@ -190,8 +189,7 @@ TEST_CASE("parser test composite conversion") {
}
}
ss::parser p{f.name, ","};
p.set_error_mode(ss::error_mode::error_string);
ss::parser<ss::string_error> p{f.name, ","};
auto fail = [] { FAIL(""); };
auto expect_error = [](auto error) { CHECK(!error.empty()); };
@ -503,14 +501,7 @@ TEST_CASE("parser test error mode") {
out << "junk" << std::endl;
}
ss::parser p(f.name, ",");
REQUIRE(!p.eof());
p.get_next<int>();
CHECK(!p.valid());
CHECK(p.error_msg().empty());
p.set_error_mode(ss::error_mode::error_string);
ss::parser<ss::string_error> p(f.name, ",");
REQUIRE(!p.eof());
p.get_next<int>();
@ -538,8 +529,7 @@ TEST_CASE("parser test csv on multiple lines with quotes") {
}
}
ss::parser<ss::quote<'"'>> p{f.name, ","};
p.set_error_mode(ss::error_mode::error_string);
ss::parser<ss::multiline, ss::quote<'"'>> p{f.name, ","};
std::vector<X> i;
while (!p.eof()) {
@ -548,6 +538,12 @@ TEST_CASE("parser test csv on multiple lines with quotes") {
}
CHECK(std::equal(i.begin(), i.end(), data.begin()));
ss::parser<ss::quote<'"'>> p_no_multiline{f.name, ","};
while (!p.eof()) {
auto a = p_no_multiline.get_next<int, double, std::string>();
CHECK(!p.valid());
}
}
std::string no_escape(std::string& s) {
@ -569,8 +565,7 @@ TEST_CASE("parser test csv on multiple lines with escapes") {
}
}
ss::parser<ss::escape<'\\'>> p{f.name, ","};
p.set_error_mode(ss::error_mode::error_string);
ss::parser<ss::multiline, ss::escape<'\\'>> p{f.name, ","};
std::vector<X> i;
while (!p.eof()) {
@ -579,4 +574,10 @@ TEST_CASE("parser test csv on multiple lines with escapes") {
}
CHECK(std::equal(i.begin(), i.end(), data.begin()));
ss::parser<ss::escape<'\\'>> p_no_multiline{f.name, ","};
while (!p.eof()) {
auto a = p_no_multiline.get_next<int, double, std::string>();
CHECK(!p.valid());
}
}

View File

@ -480,13 +480,7 @@ TEST_CASE("splitter test error mode") {
{
// empty delimiter
ss::splitter s;
s.split(buff("just,some,strings"), "");
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
CHECK(s.error_msg().empty());
s.set_error_mode(ss::error_mode::error_string);
ss::splitter<ss::string_error> s;
s.split(buff("just,some,strings"), "");
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
@ -495,13 +489,7 @@ TEST_CASE("splitter test error mode") {
{
// unterminated quote
ss::splitter<ss::quote<'"'>> s;
s.split(buff("\"just"));
CHECK(!s.valid());
CHECK(s.unterminated_quote());
CHECK(s.error_msg().empty());
s.set_error_mode(ss::error_mode::error_string);
ss::splitter<ss::string_error, ss::quote<'"'>> s;
s.split(buff("\"just"));
CHECK(!s.valid());
CHECK(s.unterminated_quote());
@ -691,27 +679,33 @@ TEST_CASE("splitter test unterminated quote") {
}
TEST_CASE("splitter test invalid splits") {
ss::converter<ss::quote<'"'>, ss::trim<' '>, ss::escape<'\\'>> c;
ss::converter<ss::string_error, ss::quote<'"'>, ss::trim<' '>,
ss::escape<'\\'>>
c;
auto& s = c.splitter;
// empty delimiter
s.split(buff("some,random,strings"), "");
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
CHECK(!s.error_msg().empty());
// mismatched delimiter
s.split(buff(R"(some,"random,"strings")"));
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
CHECK(!s.error_msg().empty());
// unterminated quote
s.split(buff("some,random,\"strings"));
CHECK(!s.valid());
CHECK(s.unterminated_quote());
CHECK(!s.error_msg().empty());
// invalid resplit
char new_line[] = "some";
auto a = c.resplit(new_line, strlen(new_line));
CHECK(!s.valid());
CHECK(!s.unterminated_quote());
CHECK(!s.error_msg().empty());
}