add buffer to converter

This commit is contained in:
ado 2021-01-10 23:51:20 +01:00
parent f2cc7265b6
commit cd264faa70
6 changed files with 290 additions and 34 deletions

View File

@ -1,5 +1,17 @@
#pragma once
// TODO remove
#include <iostream>
#ifndef DBG
void log(const std::string& log) {
std::cout << log << std::endl;
}
#else
void log(const std::string&) {
}
#endif
//
//
#include "extract.hpp"
#include "function_traits.hpp"
#include "restrictions.hpp"
@ -8,6 +20,10 @@
#include <type_traits>
#include <vector>
constexpr auto space = '_';
constexpr auto escaping = true;
constexpr auto quote = '"';
namespace ss {
INIT_HAS_METHOD(tied);
INIT_HAS_METHOD(ss_valid);
@ -21,7 +37,7 @@ INIT_HAS_METHOD(error);
// eg. no_validator_tup_t<int, ss::nx<char, 'A', 'B'>> <=> std::tuple<int, char>
// where ss::nx<char, 'A', 'B'> is a validator '(n)one e(x)cept' which
// checks if the returned character is either 'A' or 'B', returns error if not
// additionaly if one element is left in the pack, it will be unwraped from
// additionally if one element is left in the pack, it will be unwrapped from
// the tuple eg. no_void_validator_tup_t<int> <=> int instead of std::tuple<int>
template <typename T, typename U = void>
struct no_validator;
@ -139,6 +155,12 @@ public:
return to_object<T>(convert<Ts...>(elems));
}
// same as above, but uses cached split line
template <typename T, typename... Ts>
T convert_object() {
return to_object<T>(convert<Ts...>());
}
// parses already split line, returns either a tuple of objects with
// parsed values (returns raw element (no tuple) if Ts is empty), or if
// one argument is given which is a class which has a tied
@ -162,6 +184,12 @@ public:
}
}
// same as above, but uses cached split line
template <typename T, typename... Ts>
no_void_validator_tup_t<T, Ts...> convert() {
return convert<T, Ts...>(input_);
}
bool valid() const {
return (error_mode_ == error_mode::error_string) ? string_error_.empty()
: bool_error_ == false;
@ -176,7 +204,7 @@ public:
}
// 'splits' string by given delimiter, returns vector of pairs which
// contain the beginings and the ends of each column of the string
// contain the beginnings and the ends of each column of the string
const split_input& split(const char* const line,
const std::string& delim = "") {
input_.clear();
@ -215,6 +243,24 @@ private:
return error;
}
void set_error_invalid_quotation() {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("invalid quotation");
} else {
bool_error_ = true;
}
}
void set_error_unterminated_quote() {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
string_error_.append("unterminated quote");
} else {
bool_error_ = true;
}
}
void set_error_invalid_conversion(const string_range msg, size_t pos) {
if (error_mode_ == error_mode::error_string) {
string_error_.clear();
@ -277,30 +323,107 @@ private:
template <typename Delim>
const split_input& split_impl(const char* const line, Delim delim,
size_t delim_size = 1) {
auto range = substring(line, delim);
auto [range, begin] = substring(line, delim);
input_.push_back(range);
while (range.second[0] != '\0') {
range = substring(range.second + delim_size, delim);
if constexpr (quote != '\0') {
if (*begin == quote) {
++begin;
}
if (*begin == '\0') {
break;
}
}
std::tie(range, begin) = substring(begin + delim_size, delim);
log("-> " + std::string{range.first, range.second});
input_.push_back(range);
}
return input_;
}
bool no_match(const char* end, char delim) const {
return *end != delim;
size_t match(const char* begin, char delim) const {
const char* p = begin;
if constexpr (space == '\0') {
if (*p == delim) {
return 1;
}
} else {
while (*p == space) {
++p;
}
if (*p == '\0') {
return p - begin;
}
if (*p != delim) {
return 0;
}
do
++p;
while (*p == space);
return p - begin;
}
}
bool no_match(const char* end, const std::string& delim) const {
size_t match(const char* end, const std::string& delim) const {
// TODO
log("ahamm");
return strncmp(end, delim.c_str(), delim.size()) != 0;
}
template <typename Delim>
string_range substring(const char* const begin, Delim delim) const {
std::tuple<string_range, const char*> substring(const char* begin,
Delim delim) {
const char* end;
for (end = begin; *end != '\0' && no_match(end, delim); ++end)
const char* i;
for (i = begin; *i != '\0'; ++i)
;
log(">> " + std::string{begin, i});
if constexpr (quote != '\0') {
if (*begin == quote) {
++begin;
return string_range{begin, end};
for (end = begin; true; ++end) {
if (*end == '\0') {
log("error");
set_error_unterminated_quote();
return {string_range{begin, end}, end};
}
if constexpr (escaping) {
if (end[-1] == '\\') {
continue;
}
}
if (*end == quote) {
break;
}
}
// end is not \0
size_t to_ignore = match(end + 1, delim);
log(std::to_string(to_ignore));
if (to_ignore != 0) {
return {string_range{begin, end}, end + to_ignore};
}
log("error");
set_error_invalid_quotation();
return {string_range{begin, end}, end};
}
}
for (end = begin; *end != '\0'; ++end) {
size_t to_ignore = match(end, delim);
log(std::to_string(to_ignore));
if (to_ignore != 0) {
return {string_range{begin, end}, end + to_ignore};
}
}
return {string_range{begin, end}, end};
}
////////////////

View File

@ -3,9 +3,9 @@
#include "converter.hpp"
#include "extract.hpp"
#include "restrictions.hpp"
#include <cstdlib>
#include <cstring>
#include <optional>
#include <cstdlib>
#include <string>
#include <vector>
@ -39,7 +39,7 @@ public:
void set_error_mode(error_mode mode) {
error_mode_ = mode;
converter_.set_error_mode(mode);
buff_.set_error_mode(mode);
}
const std::string& error_msg() const {
@ -68,10 +68,9 @@ public:
return {};
}
split_input_ = converter_.split(buff_.get(), delim_);
auto value = converter_.convert<T, Ts...>(split_input_);
auto value = buff_.get_converter().convert<T, Ts...>();
if (!converter_.valid()) {
if (!buff_.get_converter().valid()) {
set_error_invalid_conversion();
}
@ -134,7 +133,7 @@ public:
composite<Ts..., T> composite_with(T&& new_value) {
auto merged_values =
std::tuple_cat(std::move(values_),
std::tuple{parser_.valid()
std::tuple<T>{parser_.valid()
? std::forward<T>(new_value)
: std::nullopt});
return {std::move(merged_values), parser_};
@ -160,8 +159,8 @@ public:
no_void_validator_tup_t<U, Us...> try_same() {
parser_.clear_error();
auto value =
parser_.converter_.convert<U, Us...>(parser_.split_input_);
if (!parser_.converter_.valid()) {
parser_.buff_.get_converter().template convert<U, Us...>();
if (!parser_.buff_.get_converter().valid()) {
parser_.set_error_invalid_conversion();
}
return value;
@ -249,37 +248,57 @@ private:
class buffer {
char* buffer_{nullptr};
char* new_buffer_{nullptr};
char* next_line_buffer_{nullptr};
converter converter_;
converter next_line_converter_;
size_t size_{0};
const std::string& delim_;
public:
buffer(const std::string& delimiter) : delim_{delimiter} {
}
~buffer() {
free(buffer_);
free(new_buffer_);
free(next_line_buffer_);
}
bool read(FILE* file) {
ssize_t size = getline(&new_buffer_, &size_, file);
ssize_t size = getline(&next_line_buffer_, &size_, file);
size_t string_end = size - 1;
if (size == -1) {
return false;
}
if (size >= 2 && new_buffer_[size - 2] == '\r') {
if (size >= 2 && next_line_buffer_[size - 2] == '\r') {
string_end--;
}
new_buffer_[string_end] = '\0';
next_line_buffer_[string_end] = '\0';
next_line_converter_.split(next_line_buffer_, delim_);
return true;
}
void set_error_mode(error_mode mode) {
converter_.set_error_mode(mode);
next_line_converter_.set_error_mode(mode);
}
converter& get_converter() {
return converter_;
}
const char* get() const {
return buffer_;
}
void update() {
std::swap(buffer_, new_buffer_);
std::swap(buffer_, next_line_buffer_);
std::swap(converter_, next_line_converter_);
}
};
@ -324,7 +343,7 @@ private:
.append(" ")
.append(std::to_string(line_number_))
.append(": ")
.append(converter_.error_msg())
.append(buff_.get_converter().error_msg())
.append(": \"")
.append(buff_.get())
.append("\"");
@ -342,10 +361,8 @@ private:
std::string string_error_;
bool bool_error_{false};
error_mode error_mode_{error_mode::error_bool};
converter converter_;
converter::split_input split_input_;
FILE* file_{nullptr};
buffer buff_;
buffer buff_{delim_};
size_t line_number_{0};
bool eof_{false};
};

View File

@ -1,6 +1,6 @@
CXX=clang++
CXXFLAGS=-Wall -Wextra -std=c++17 -lstdc++fs
TESTS=test_parser test_converter test_extractions
CXX=clang++-9
CXXFLAGS=-Wall -Wextra -std=c++17 -O0 -lstdc++fs
TESTS=test_converter
all: $(TESTS)

BIN
test/test_converter Executable file

Binary file not shown.

View File

@ -1,8 +1,123 @@
#include <iostream>
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "../include/ss/converter.hpp"
#include "doctest.h"
#include <algorithm>
/* TODO
TEST_CASE("testing quoting with escaping") {
std::vector<std::string> values{"10", "he\\\"llo", "\\\"",
"\\\"a,a\\\"", "3.33", "a\\\""};
// with quote
ss::converter c;
for (size_t i = 0; i < values.size() * values.size(); ++i) {
std::string input1;
std::string input2;
for (size_t j = 0; j < values.size(); ++j) {
if (i & (1 << j) && j != 2 && j != 3) {
input1.append(values[j]);
input2.append(values.at(values.size() - 1 - j));
} else {
input1.append("\"" + values[j] + "\"");
input2.append("\"" + values.at(values.size() - 1 - j) + "\"");
}
input1.push_back(',');
input2.push_back(',');
}
input1.pop_back();
input2.pop_back();
input1.append("\0\"");
input2.append("\0\"");
auto tup1 = c.convert<int, std::string, std::string, std::string,
double, std::string>(input1.c_str(), ",");
if (!c.valid()) {
FAIL("invalid: " + input1);
} else {
auto [a, b, c, d, e, f] = tup1;
CHECK(a == 10);
CHECK(b == "he\"llo");
CHECK(c == "\"");
CHECK(d == "\"a,a\"");
CHECK(e == 3.33);
CHECK(f == "a\"");
std::cout << a << ' ' << b << ' ' << c << ' ' << d << ' ' << e
<< ' ' << f << std::endl;
CHECK(tup1 ==
std::make_tuple(10, "he\"llo", "\"", "\"a,a\"", 3.33, "a\""));
}
auto tup2 = c.convert<std::string, double, std::string, std::string,
std::string, int>(input2.c_str(), ",");
if (!c.valid()) {
FAIL("invalid: " + input2);
} else {
CHECK(tup2 ==
std::make_tuple("a\"", 3.33, "\"a,a\"", "\"", "he\"llo", 10));
}
}
}
*/
TEST_CASE("testing quoting without escaping") {
std::vector<std::string> values{"10", "hello", ",", "a,a", "3.33", "a"};
// with quote
ss::converter c;
for (size_t i = 0; i < values.size() * values.size(); ++i) {
std::string input1;
std::string input2;
for (size_t j = 0; j < values.size(); ++j) {
if (i & (1 << j) && j != 2 && j != 3) {
input1.append(values[j]);
input2.append(values.at(values.size() - 1 - j));
} else {
input1.append("\"" + values[j] + "\"");
input2.append("\"" + values.at(values.size() - 1 - j) + "\"");
}
input1.append("__");
input1.push_back(',');
input1.append("__");
input2.push_back(',');
}
input1.pop_back();
input1.pop_back();
input1.pop_back();
input2.pop_back();
input1.append("\0\"");
input2.append("\0\"");
auto tup1 = c.convert<int, std::string, std::string, std::string,
double, char>(input1.c_str(), ",");
if (!c.valid()) {
FAIL("invalid: " + input1);
} else {
auto [a, b, c, d, e, f] = tup1;
CHECK(a == 10);
CHECK(b == "hello");
CHECK(c == ",");
CHECK(d == "a,a");
CHECK(e == 3.33);
CHECK(f == 'a');
}
auto tup2 = c.convert<char, double, std::string, std::string,
std::string, int>(input2.c_str(), ",");
if (!c.valid()) {
FAIL("invalid: " + input2);
} else {
auto [f, e, d, c, b, a] = tup2;
CHECK(a == 10);
CHECK(b == "hello");
CHECK(c == ",");
CHECK(d == "a,a");
CHECK(e == 3.33);
CHECK(f == 'a');
}
}
}
TEST_CASE("testing split") {
ss::converter c;
for (const auto& [s, expected, delim] :
@ -48,7 +163,8 @@ TEST_CASE("testing valid conversions") {
CHECK(tup == 5);
}
{
auto tup = c.convert<void, int, void>("junk 5 junk", " ");
// TODO make \t -> ' '
auto tup = c.convert<void, int, void>("junk\t5\tjunk", "\t");
REQUIRE(c.valid());
CHECK(tup == 5);
}

View File

@ -481,8 +481,8 @@ TEST_CASE("testing the moving of parsed values") {
TEST_CASE("testing the moving of parsed composite values") {
// to compile is enough
return;
ss::parser* p;
p->try_next<my_string, my_string, my_string>()
ss::parser p{"", ""};
p.try_next<my_string, my_string, my_string>()
.or_else<my_string, my_string, my_string, my_string>([](auto&&) {})
.or_else<my_string>([](auto&) {})
.or_else<xyz>([](auto&&) {})