mirror of
https://github.com/red0124/ssp.git
synced 2025-02-02 16:51:12 +01:00
add converter tests with quote trim and escape, enable resplit on converter, make parser handle multi-line csv, add unit tests
This commit is contained in:
parent
1bf6b9d595
commit
035e27c5ab
@ -110,12 +110,14 @@ constexpr bool tied_class_v = tied_class<Ts...>::value;
|
||||
template <typename... Matchers>
|
||||
class converter {
|
||||
constexpr static auto default_delimiter = ",";
|
||||
using line_ptr_type = typename splitter<Matchers...>::line_ptr_type;
|
||||
|
||||
public:
|
||||
// parses line with given delimiter, returns a 'T' object created with
|
||||
// extracted values of type 'Ts'
|
||||
template <typename T, typename... Ts>
|
||||
T convert_object(char* line, const std::string& delim = default_delimiter) {
|
||||
T convert_object(line_ptr_type line,
|
||||
const std::string& delim = default_delimiter) {
|
||||
return to_object<T>(convert<Ts...>(line, delim));
|
||||
}
|
||||
|
||||
@ -123,7 +125,7 @@ public:
|
||||
// extracted values of type 'Ts'
|
||||
template <typename... Ts>
|
||||
no_void_validator_tup_t<Ts...> convert(
|
||||
char* line, const std::string& delim = default_delimiter) {
|
||||
line_ptr_type line, const std::string& delim = default_delimiter) {
|
||||
input_ = split(line, delim);
|
||||
/* TODO
|
||||
if (!splitter_.valid()) {
|
||||
@ -181,6 +183,10 @@ public:
|
||||
: bool_error_ == false;
|
||||
}
|
||||
|
||||
bool unterminated_quote() const {
|
||||
return splitter_.unterminated_quote();
|
||||
}
|
||||
|
||||
const std::string& error_msg() const {
|
||||
return string_error_;
|
||||
}
|
||||
@ -191,7 +197,7 @@ public:
|
||||
|
||||
// 'splits' string by given delimiter, returns vector of pairs which
|
||||
// contain the beginnings and the ends of each column of the string
|
||||
const split_input& split(char* line,
|
||||
const split_input& split(line_ptr_type line,
|
||||
const std::string& delim = default_delimiter) {
|
||||
input_.clear();
|
||||
if (line[0] == '\0') {
|
||||
@ -202,6 +208,12 @@ public:
|
||||
return input_;
|
||||
}
|
||||
|
||||
const split_input& resplit(line_ptr_type new_line, ssize_t new_size,
|
||||
const std::string& delim = default_delimiter) {
|
||||
input_ = splitter_.resplit(new_line, new_size, delim);
|
||||
return input_;
|
||||
}
|
||||
|
||||
private:
|
||||
////////////////
|
||||
// error
|
||||
|
@ -9,6 +9,9 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// TODO remove
|
||||
#include <iostream>
|
||||
|
||||
namespace ss {
|
||||
|
||||
template <typename... Matchers>
|
||||
@ -40,7 +43,7 @@ public:
|
||||
|
||||
void set_error_mode(error_mode mode) {
|
||||
error_mode_ = mode;
|
||||
buff_.set_error_mode(mode);
|
||||
reader_.set_error_mode(mode);
|
||||
}
|
||||
|
||||
const std::string& error_msg() const {
|
||||
@ -52,7 +55,7 @@ public:
|
||||
}
|
||||
|
||||
bool ignore_next() {
|
||||
return buff_.read(file_);
|
||||
return reader_.read(file_);
|
||||
}
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
@ -62,16 +65,16 @@ public:
|
||||
|
||||
template <typename T, typename... Ts>
|
||||
no_void_validator_tup_t<T, Ts...> get_next() {
|
||||
buff_.update();
|
||||
reader_.update();
|
||||
clear_error();
|
||||
if (eof_) {
|
||||
set_error_eof_reached();
|
||||
return {};
|
||||
}
|
||||
|
||||
auto value = buff_.get_converter().template convert<T, Ts...>();
|
||||
auto value = reader_.get_converter().template convert<T, Ts...>();
|
||||
|
||||
if (!buff_.get_converter().valid()) {
|
||||
if (!reader_.get_converter().valid()) {
|
||||
set_error_invalid_conversion();
|
||||
}
|
||||
|
||||
@ -160,8 +163,8 @@ public:
|
||||
no_void_validator_tup_t<U, Us...> try_same() {
|
||||
parser_.clear_error();
|
||||
auto value =
|
||||
parser_.buff_.get_converter().template convert<U, Us...>();
|
||||
if (!parser_.buff_.get_converter().valid()) {
|
||||
parser_.reader_.get_converter().template convert<U, Us...>();
|
||||
if (!parser_.reader_.get_converter().valid()) {
|
||||
parser_.set_error_invalid_conversion();
|
||||
}
|
||||
return value;
|
||||
@ -244,40 +247,124 @@ private:
|
||||
// line reading
|
||||
////////////////
|
||||
|
||||
class buffer {
|
||||
class reader {
|
||||
char* buffer_{nullptr};
|
||||
char* next_line_buffer_{nullptr};
|
||||
char* helper_buffer_{nullptr};
|
||||
|
||||
converter<Matchers...> converter_;
|
||||
converter<Matchers...> next_line_converter_;
|
||||
|
||||
size_t size_{0};
|
||||
size_t helper_size_{0};
|
||||
const std::string& delim_;
|
||||
|
||||
public:
|
||||
buffer(const std::string& delimiter) : delim_{delimiter} {
|
||||
bool crlf;
|
||||
|
||||
bool escaped_eol(size_t size) {
|
||||
// escaped new line
|
||||
if constexpr (setup<Matchers...>::escape::enabled) {
|
||||
const char* curr;
|
||||
for (curr = next_line_buffer_ + size - 1;
|
||||
curr >= next_line_buffer_ &&
|
||||
setup<Matchers...>::escape::match(*curr);
|
||||
--curr) {
|
||||
}
|
||||
return (next_line_buffer_ - curr + size) % 2 == 0;
|
||||
}
|
||||
|
||||
~buffer() {
|
||||
free(buffer_);
|
||||
free(next_line_buffer_);
|
||||
}
|
||||
|
||||
bool read(FILE* file) {
|
||||
ssize_t size = getline(&next_line_buffer_, &size_, file);
|
||||
size_t string_end = size - 1;
|
||||
|
||||
if (size == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (size >= 2 && next_line_buffer_[size - 2] == '\r') {
|
||||
string_end--;
|
||||
bool unterminated_quote() {
|
||||
// unterimated quote
|
||||
if constexpr (ss::setup<Matchers...>::quote::enabled) {
|
||||
if (next_line_converter_.unterminated_quote()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void undo_remove_eol(size_t& string_end) {
|
||||
if (crlf) {
|
||||
memcpy(next_line_buffer_ + string_end, "\r\n\0", 3);
|
||||
string_end += 2;
|
||||
} else {
|
||||
memcpy(next_line_buffer_ + string_end, "\n\0", 2);
|
||||
string_end += 1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remove_eol(char*& buffer, size_t size) {
|
||||
size_t new_size = size - 1;
|
||||
if (size >= 2 && buffer[size - 2] == '\r') {
|
||||
crlf = true;
|
||||
new_size--;
|
||||
} else {
|
||||
crlf = false;
|
||||
}
|
||||
|
||||
buffer[new_size] = '\0';
|
||||
return new_size;
|
||||
}
|
||||
|
||||
void realloc_concat(char*& first, size_t& first_size,
|
||||
const char* const second, size_t second_size) {
|
||||
first = static_cast<char*>(realloc(static_cast<void*>(first),
|
||||
first_size + second_size + 2));
|
||||
|
||||
memcpy(first + first_size, second, second_size + 1);
|
||||
first_size += second_size;
|
||||
}
|
||||
|
||||
bool append_line(FILE* file, char*& dst_buffer, size_t& dst_size) {
|
||||
undo_remove_eol(dst_size);
|
||||
|
||||
ssize_t ssize = getline(&helper_buffer_, &helper_size_, file);
|
||||
if (ssize == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t size = remove_eol(helper_buffer_, ssize);
|
||||
realloc_concat(dst_buffer, dst_size, helper_buffer_, size);
|
||||
return true;
|
||||
}
|
||||
|
||||
public:
|
||||
reader(const std::string& delimiter) : delim_{delimiter} {
|
||||
}
|
||||
|
||||
~reader() {
|
||||
free(buffer_);
|
||||
free(next_line_buffer_);
|
||||
free(helper_buffer_);
|
||||
}
|
||||
|
||||
bool read(FILE* file) {
|
||||
ssize_t ssize = getline(&next_line_buffer_, &size_, file);
|
||||
|
||||
if (ssize == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t size = remove_eol(next_line_buffer_, ssize);
|
||||
|
||||
while (escaped_eol(size)) {
|
||||
if (!append_line(file, next_line_buffer_, size)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
next_line_buffer_[string_end] = '\0';
|
||||
next_line_converter_.split(next_line_buffer_, delim_);
|
||||
|
||||
while (unterminated_quote()) {
|
||||
if (!append_line(file, next_line_buffer_, size)) {
|
||||
return false;
|
||||
}
|
||||
next_line_converter_.resplit(next_line_buffer_, size);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -290,7 +377,7 @@ private:
|
||||
return converter_;
|
||||
}
|
||||
|
||||
const char* get() const {
|
||||
const char* get_buffer() const {
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
@ -301,7 +388,7 @@ private:
|
||||
};
|
||||
|
||||
void read_line() {
|
||||
eof_ = !buff_.read(file_);
|
||||
eof_ = !reader_.read(file_);
|
||||
++line_number_;
|
||||
}
|
||||
|
||||
@ -341,9 +428,9 @@ private:
|
||||
.append(" ")
|
||||
.append(std::to_string(line_number_))
|
||||
.append(": ")
|
||||
.append(buff_.get_converter().error_msg())
|
||||
.append(reader_.get_converter().error_msg())
|
||||
.append(": \"")
|
||||
.append(buff_.get())
|
||||
.append(reader_.get_buffer())
|
||||
.append("\"");
|
||||
} else {
|
||||
bool_error_ = true;
|
||||
@ -360,7 +447,7 @@ private:
|
||||
bool bool_error_{false};
|
||||
error_mode error_mode_{error_mode::error_bool};
|
||||
FILE* file_{nullptr};
|
||||
buffer buff_{delim_};
|
||||
reader reader_{delim_};
|
||||
size_t line_number_{0};
|
||||
bool eof_{false};
|
||||
};
|
||||
|
@ -5,9 +5,6 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// TODO remove
|
||||
#include <iostream>
|
||||
|
||||
namespace ss {
|
||||
template <char... Cs>
|
||||
struct matcher {
|
||||
@ -99,16 +96,17 @@ private:
|
||||
using escape = typename setup<Ts...>::escape;
|
||||
|
||||
constexpr static auto is_const_line = !quote::enabled && !escape::enabled;
|
||||
|
||||
public:
|
||||
using line_ptr_type =
|
||||
typename ternary<is_const_line, const char*, char*>::type;
|
||||
|
||||
public:
|
||||
bool valid() const {
|
||||
return (error_mode_ == error_mode::error_string) ? string_error_.empty()
|
||||
: bool_error_ == false;
|
||||
}
|
||||
|
||||
bool unterminated_quote() {
|
||||
bool unterminated_quote() const {
|
||||
return unterminated_quote_;
|
||||
}
|
||||
|
||||
@ -120,7 +118,7 @@ public:
|
||||
error_mode_ = mode;
|
||||
}
|
||||
|
||||
split_input& split(line_ptr_type new_line,
|
||||
const split_input& split(line_ptr_type new_line,
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
output_.clear();
|
||||
return resplit(new_line, -1, delimiter);
|
||||
@ -133,7 +131,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
split_input& resplit(line_ptr_type new_line, ssize_t new_size,
|
||||
const split_input& resplit(line_ptr_type new_line, ssize_t new_size,
|
||||
const std::string& delimiter = default_delimiter) {
|
||||
line_ = new_line;
|
||||
|
||||
|
@ -46,8 +46,7 @@ TEST_CASE("testing valid conversions") {
|
||||
CHECK(tup == 5);
|
||||
}
|
||||
{
|
||||
// TODO make \t -> ' '
|
||||
auto tup = c.convert<void, int, void>(buff("junk\t5\tjunk"), "\t");
|
||||
auto tup = c.convert<void, int, void>(buff("junk 5 junk"), " ");
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == 5);
|
||||
}
|
||||
@ -398,3 +397,50 @@ TEST_CASE("testing error mode") {
|
||||
CHECK(!c.valid());
|
||||
CHECK(!c.error_msg().empty());
|
||||
}
|
||||
|
||||
TEST_CASE("testing converter with quotes spacing and escaping") {
|
||||
{
|
||||
ss::converter c;
|
||||
|
||||
auto tup = c.convert<std::string, std::string, std::string>(
|
||||
R"("just","some","strings")");
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("\"just\"", "\"some\"", "\"strings\""));
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::quote<'"'>> c;
|
||||
|
||||
auto tup = c.convert<std::string, std::string, double, char>(
|
||||
buff(R"("just",some,"12.3","a")"));
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("just", "some", 12.3, 'a'));
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::trim<' '>> c;
|
||||
|
||||
auto tup = c.convert<std::string, std::string, double, char>(
|
||||
R"( just , some , 12.3 ,a )");
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("just", "some", 12.3, 'a'));
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::escape<'\\'>> c;
|
||||
|
||||
auto tup =
|
||||
c.convert<std::string, std::string>(buff(R"(ju\,st,strings)"));
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("ju,st", "strings"));
|
||||
}
|
||||
|
||||
{
|
||||
ss::converter<ss::escape<'\\'>, ss::trim<' '>, ss::quote<'"'>> c;
|
||||
|
||||
auto tup = c.convert<std::string, std::string, double, std::string>(
|
||||
buff(R"( ju\,st , "so,me" , 12.34 , "str""ings")"));
|
||||
REQUIRE(c.valid());
|
||||
CHECK(tup == std::make_tuple("ju,st", "so,me", 12.34, "str\"ings"));
|
||||
}
|
||||
}
|
||||
|
@ -514,3 +514,72 @@ TEST_CASE("testing error mode") {
|
||||
CHECK(!p.valid());
|
||||
CHECK(!p.error_msg().empty());
|
||||
}
|
||||
|
||||
std::string no_quote(const std::string& s) {
|
||||
if (!s.empty() && s[0] == '"') {
|
||||
return {std::next(begin(s)), std::prev(end(s))};
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
TEST_CASE("testing csv on multiple lines with quotes") {
|
||||
unique_file_name f;
|
||||
std::vector<X> data = {{1, 2, "\"x\nx\nx\""}, {3, 4, "\"y\ny\ny\""},
|
||||
{5, 6, "\"z\nz\""}, {7, 8, "\"u\"\"\""},
|
||||
{9, 10, "v"}, {11, 12, "\"w\n\""}};
|
||||
make_and_write(f.name, data);
|
||||
for (auto& [_, __, s] : data) {
|
||||
s = no_quote(s);
|
||||
if (s[0] == 'u') {
|
||||
s = "u\"";
|
||||
}
|
||||
}
|
||||
|
||||
ss::parser<ss::quote<'"'>> p{f.name, ","};
|
||||
p.set_error_mode(ss::error_mode::error_string);
|
||||
std::vector<X> i;
|
||||
|
||||
while (!p.eof()) {
|
||||
auto a = p.get_next<int, double, std::string>();
|
||||
auto [x, y, z] = a;
|
||||
std::cout << "=====================" << std::endl;
|
||||
std::cout << x << ' ' << y << ' ' << z << std::endl;
|
||||
i.emplace_back(ss::to_object<X>(a));
|
||||
}
|
||||
|
||||
CHECK(std::equal(i.begin(), i.end(), data.begin()));
|
||||
}
|
||||
|
||||
std::string no_escape(std::string& s) {
|
||||
s.erase(std::remove(begin(s), end(s), '\\'), end(s));
|
||||
return s;
|
||||
}
|
||||
|
||||
TEST_CASE("testing csv on multiple lines with escapes") {
|
||||
unique_file_name f;
|
||||
std::vector<X> data = {{1, 2, "x\\\nx\\\nx"}, {3, 4, "y\\\ny\\\ny"},
|
||||
{5, 6, "z\\\nz"}, {7, 8, "u"},
|
||||
{9, 10, "v\\\\"}, {11, 12, "w\\\n"}};
|
||||
|
||||
make_and_write(f.name, data);
|
||||
for (auto& [_, __, s] : data) {
|
||||
s = no_escape(s);
|
||||
if (s == "v") {
|
||||
s = "v\\";
|
||||
}
|
||||
}
|
||||
|
||||
ss::parser<ss::escape<'\\'>> p{f.name, ","};
|
||||
p.set_error_mode(ss::error_mode::error_string);
|
||||
std::vector<X> i;
|
||||
|
||||
while (!p.eof()) {
|
||||
auto a = p.get_next<int, double, std::string>();
|
||||
auto [x, y, z] = a;
|
||||
std::cout << "=====================" << std::endl;
|
||||
std::cout << x << ' ' << y << ' ' << z << std::endl;
|
||||
i.emplace_back(ss::to_object<X>(a));
|
||||
}
|
||||
|
||||
CHECK(std::equal(i.begin(), i.end(), data.begin()));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user