Add option to read csv data from a buffer, add some unit tests for the new feature

This commit is contained in:
ado 2024-02-17 00:55:36 +01:00
parent 4bedc32b63
commit f04ede3a49
3 changed files with 250 additions and 83 deletions

View File

@ -26,7 +26,7 @@ inline void assert_throw_on_error_not_defined() {
} }
#if __unix__ #if __unix__
inline ssize_t get_line(char** lineptr, size_t* n, FILE* stream) { inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) {
return getline(lineptr, n, stream); return getline(lineptr, n, stream);
} }
#else #else

View File

@ -48,6 +48,18 @@ public:
} }
} }
parser(const char* const csv_data_buffer, size_t csv_data_size,
const std::string& delim = ss::default_delimiter)
: file_name_{"buffer line"},
reader_{csv_data_buffer, csv_data_size, delim} {
read_line();
if constexpr (ignore_header) {
ignore_next();
} else {
raw_header_ = reader_.get_buffer();
}
}
parser(parser&& other) = default; parser(parser&& other) = default;
parser& operator=(parser&& other) = default; parser& operator=(parser&& other) = default;
@ -641,18 +653,27 @@ private:
: delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} { : delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} {
} }
reader(const char* const buffer, size_t csv_data_size,
const std::string& delim)
: delim_{delim}, csv_data_buffer_{buffer},
csv_data_size_{csv_data_size} {
}
reader(reader&& other) reader(reader&& other)
: buffer_{other.buffer_}, : buffer_{other.buffer_},
next_line_buffer_{other.next_line_buffer_}, next_line_buffer_{other.next_line_buffer_},
helper_buffer_{other.helper_buffer_}, converter_{std::move( helper_buffer_{other.helper_buffer_},
other.converter_)}, converter_{std::move(other.converter_)},
next_line_converter_{std::move(other.next_line_converter_)}, next_line_converter_{std::move(other.next_line_converter_)},
buffer_size_{other.buffer_size_}, buffer_size_{other.buffer_size_},
next_line_buffer_size_{other.next_line_buffer_size_}, next_line_buffer_size_{other.next_line_buffer_size_},
helper_size_{other.helper_size_}, delim_{std::move(other.delim_)}, helper_buffer_size{other.helper_buffer_size},
file_{other.file_}, crlf_{other.crlf_}, delim_{std::move(other.delim_)}, file_{other.file_},
line_number_{other.line_number_}, next_line_size_{ csv_data_buffer_{other.csv_data_buffer_},
other.next_line_size_} { csv_data_size_{other.csv_data_size_},
curr_char_{other.curr_char_}, crlf_{other.crlf_},
line_number_{other.line_number_},
next_line_size_{other.next_line_size_} {
other.buffer_ = nullptr; other.buffer_ = nullptr;
other.next_line_buffer_ = nullptr; other.next_line_buffer_ = nullptr;
other.helper_buffer_ = nullptr; other.helper_buffer_ = nullptr;
@ -668,9 +689,12 @@ private:
next_line_converter_ = std::move(other.next_line_converter_); next_line_converter_ = std::move(other.next_line_converter_);
buffer_size_ = other.buffer_size_; buffer_size_ = other.buffer_size_;
next_line_buffer_size_ = other.next_line_buffer_size_; next_line_buffer_size_ = other.next_line_buffer_size_;
helper_size_ = other.helper_size_; helper_buffer_size = other.helper_buffer_size;
delim_ = std::move(other.delim_); delim_ = std::move(other.delim_);
file_ = other.file_; file_ = other.file_;
csv_data_buffer_ = other.csv_data_buffer_;
csv_data_size_ = other.csv_data_size_;
curr_char_ = other.curr_char_;
crlf_ = other.crlf_; crlf_ = other.crlf_;
line_number_ = other.line_number_; line_number_ = other.line_number_;
next_line_size_ = other.next_line_size_; next_line_size_ = other.next_line_size_;
@ -698,6 +722,60 @@ private:
reader(const reader& other) = delete; reader(const reader& other) = delete;
reader& operator=(const reader& other) = delete; reader& operator=(const reader& other) = delete;
ssize_t get_line_buffer(char** lineptr, size_t* n,
const char* const buffer, size_t csv_data_size,
size_t& curr_char) {
size_t pos;
int c;
// TODO remove check
if (lineptr == nullptr || buffer == nullptr || n == nullptr) {
return -1;
}
c = buffer[curr_char++];
if (curr_char >= csv_data_size) {
return -1;
}
// TODO maybe remove this too
if (*lineptr == nullptr) {
*lineptr = static_cast<char*>(malloc(128));
if (*lineptr == nullptr) {
return -1;
}
*n = 128;
}
pos = 0;
while (curr_char <= csv_data_size) {
if (pos + 1 >= *n) {
size_t new_size = *n + (*n >> 2);
// TODO maybe remove this too
if (new_size < 128) {
new_size = 128;
}
char* new_ptr = static_cast<char*>(
realloc(static_cast<void*>(*lineptr), new_size));
// TODO check for failed malloc in the callee
if (new_ptr == nullptr) {
return -1;
}
*n = new_size;
*lineptr = new_ptr;
}
(*lineptr)[pos++] = c;
if (c == '\n') {
break;
}
c = buffer[curr_char++];
}
(*lineptr)[pos] = '\0';
return pos;
}
// read next line each time in order to set eof_ // read next line each time in order to set eof_
bool read_next() { bool read_next() {
next_line_converter_.clear_error(); next_line_converter_.clear_error();
@ -708,8 +786,16 @@ private:
if (next_line_buffer_size_ > 0) { if (next_line_buffer_size_ > 0) {
next_line_buffer_[0] = '\0'; next_line_buffer_[0] = '\0';
} }
ssize = get_line(&next_line_buffer_, &next_line_buffer_size_,
file_); if (file_) {
ssize = get_line_file(&next_line_buffer_,
&next_line_buffer_size_, file_);
} else {
ssize = get_line_buffer(&next_line_buffer_,
&next_line_buffer_size_,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
if (ssize == -1) { if (ssize == -1) {
return false; return false;
@ -821,6 +907,10 @@ private:
} }
size_t remove_eol(char*& buffer, size_t ssize) { size_t remove_eol(char*& buffer, size_t ssize) {
if (buffer[ssize - 1] != '\n') {
return ssize;
}
size_t size = ssize - 1; size_t size = ssize - 1;
if (ssize >= 2 && buffer[ssize - 2] == '\r') { if (ssize >= 2 && buffer[ssize - 2] == '\r') {
crlf_ = true; crlf_ = true;
@ -851,8 +941,17 @@ private:
bool append_next_line_to_buffer(char*& buffer, size_t& size) { bool append_next_line_to_buffer(char*& buffer, size_t& size) {
undo_remove_eol(buffer, size); undo_remove_eol(buffer, size);
ssize_t next_ssize = ssize_t next_ssize;
get_line(&helper_buffer_, &helper_size_, file_); if (file_) {
next_ssize =
get_line_file(&helper_buffer_, &helper_buffer_size, file_);
} else {
next_ssize =
get_line_buffer(&helper_buffer_, &helper_buffer_size,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
if (next_ssize == -1) { if (next_ssize == -1) {
return false; return false;
} }
@ -879,11 +978,15 @@ private:
size_t buffer_size_{0}; size_t buffer_size_{0};
size_t next_line_buffer_size_{0}; size_t next_line_buffer_size_{0};
size_t helper_size_{0}; size_t helper_buffer_size{0};
std::string delim_; std::string delim_;
FILE* file_{nullptr}; FILE* file_{nullptr};
const char* csv_data_buffer_{nullptr};
size_t csv_data_size_{0};
size_t curr_char_{0};
bool crlf_{false}; bool crlf_{false};
size_t line_number_{0}; size_t line_number_{0};

View File

@ -38,7 +38,7 @@ void expect_error_on_command(ss::parser<Ts...>& p,
} }
} }
void update_if_crlf(std::string& s) { [[maybe_unused]] void update_if_crlf(std::string& s) {
#ifdef _WIN32 #ifdef _WIN32
replace_all(s, "\r\n", "\n"); replace_all(s, "\r\n", "\n");
#else #else
@ -102,6 +102,31 @@ static void make_and_write(const std::string& file_name,
out << data[i].to_string() << new_lines[i % new_lines.size()]; out << data[i].to_string() << new_lines[i % new_lines.size()];
} }
} }
std::string make_buffer(const std::string& file_name) {
std::ifstream in{file_name, std::ios::binary};
std::string tmp;
std::string out;
out.reserve(sizeof(out) + 1);
while (in >> tmp) {
out += tmp;
out.append("\n");
}
return out;
}
template <bool buffer_mode, typename... Ts>
std::tuple<ss::parser<Ts...>, std::string> make_parser(
const std::string& file_name, const std::string& delim) {
if (buffer_mode) {
auto buffer = make_buffer(file_name);
return {ss::parser<Ts...>{buffer.data(), buffer.size(), delim},
std::move(buffer)};
} else {
return {ss::parser<Ts...>{file_name, delim}, std::string{}};
}
}
} /* namespace */ } /* namespace */
TEST_CASE("test file not found") { TEST_CASE("test file not found") {
@ -125,22 +150,23 @@ TEST_CASE("test file not found") {
} }
} }
template <typename... Ts> template <bool buffer_mode, typename... Ts>
void test_various_cases() { void test_various_cases() {
unique_file_name f{"test_parser"}; unique_file_name f{"test_parser"};
std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"},
{7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}};
make_and_write(f.name, data); make_and_write(f.name, data);
auto csv_data_buffer = make_buffer(f.name);
{ {
ss::parser<Ts...> p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
ss::parser p0{std::move(p)}; ss::parser p0{std::move(p)};
p = std::move(p0); p = std::move(p0);
std::vector<X> i; std::vector<X> i;
ss::parser<ss::string_error> p2{f.name, ","}; auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2; std::vector<X> i2;
auto move_rotate = [&] { auto move_rotate = [&p = p, &p0 = p0] {
auto p1 = std::move(p); auto p1 = std::move(p);
p0 = std::move(p1); p0 = std::move(p1);
p = std::move(p0); p = std::move(p0);
@ -152,7 +178,7 @@ void test_various_cases() {
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
for (const auto& a : p2.iterate<int, double, std::string>()) { for (const auto& a : p2.template iterate<int, double, std::string>()) {
i2.emplace_back(ss::to_object<X>(a)); i2.emplace_back(ss::to_object<X>(a));
} }
@ -161,13 +187,13 @@ void test_various_cases() {
} }
{ {
ss::parser p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
ss::parser p2{f.name, ","}; auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2; std::vector<X> i2;
ss::parser p3{f.name, ","}; auto [p3, ___] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i3; std::vector<X> i3;
std::vector<X> expected = {std::begin(data) + 1, std::end(data)}; std::vector<X> expected = {std::begin(data) + 1, std::end(data)};
@ -175,18 +201,18 @@ void test_various_cases() {
p.ignore_next(); p.ignore_next();
while (!p.eof()) { while (!p.eof()) {
auto a = p.get_next<tup>(); auto a = p.template get_next<tup>();
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
p2.ignore_next(); p2.ignore_next();
for (const auto& a : p2.iterate<tup>()) { for (const auto& a : p2.template iterate<tup>()) {
i2.emplace_back(ss::to_object<X>(a)); i2.emplace_back(ss::to_object<X>(a));
} }
p3.ignore_next(); p3.ignore_next();
for (auto it = p3.iterate<tup>().begin(); it != p3.iterate<tup>().end(); for (auto it = p3.template iterate<tup>().begin();
++it) { it != p3.template iterate<tup>().end(); ++it) {
i3.emplace_back(ss::to_object<X>(*it)); i3.emplace_back(ss::to_object<X>(*it));
} }
@ -196,16 +222,17 @@ void test_various_cases() {
} }
{ {
ss::parser p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
ss::parser p2{f.name, ","}; auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2; std::vector<X> i2;
while (!p.eof()) { while (!p.eof()) {
i.push_back(p.get_object<X, int, double, std::string>()); i.push_back(p.template get_object<X, int, double, std::string>());
} }
for (auto&& a : p2.iterate_object<X, int, double, std::string>()) { for (auto&& a :
p2.template iterate_object<X, int, double, std::string>()) {
i2.push_back(std::move(a)); i2.push_back(std::move(a));
} }
@ -214,10 +241,11 @@ void test_various_cases() {
} }
{ {
ss::parser p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
for (auto&& a : p.iterate_object<X, int, double, std::string>()) { for (auto&& a :
p.template iterate_object<X, int, double, std::string>()) {
i.push_back(std::move(a)); i.push_back(std::move(a));
} }
@ -225,19 +253,19 @@ void test_various_cases() {
} }
{ {
ss::parser p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
ss::parser p2{f.name, ","}; auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2; std::vector<X> i2;
using tup = std::tuple<int, double, std::string>; using tup = std::tuple<int, double, std::string>;
while (!p.eof()) { while (!p.eof()) {
i.push_back(p.get_object<X, tup>()); i.push_back(p.template get_object<X, tup>());
} }
for (auto it = p2.iterate_object<X, tup>().begin(); for (auto it = p2.template iterate_object<X, tup>().begin();
it != p2.iterate_object<X, tup>().end(); it++) { it != p2.template iterate_object<X, tup>().end(); it++) {
i2.push_back({it->i, it->d, it->s}); i2.push_back({it->i, it->d, it->s});
} }
@ -246,11 +274,11 @@ void test_various_cases() {
} }
{ {
ss::parser p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
using tup = std::tuple<int, double, std::string>; using tup = std::tuple<int, double, std::string>;
for (auto&& a : p.iterate_object<X, tup>()) { for (auto&& a : p.template iterate_object<X, tup>()) {
i.push_back(std::move(a)); i.push_back(std::move(a));
} }
@ -258,21 +286,21 @@ void test_various_cases() {
} }
{ {
ss::parser p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
while (!p.eof()) { while (!p.eof()) {
i.push_back(p.get_next<X>()); i.push_back(p.template get_next<X>());
} }
CHECK_EQ(i, data); CHECK_EQ(i, data);
} }
{ {
ss::parser p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
for (auto&& a : p.iterate<X>()) { for (auto&& a : p.template iterate<X>()) {
i.push_back(std::move(a)); i.push_back(std::move(a));
} }
@ -281,24 +309,30 @@ void test_various_cases() {
{ {
constexpr int excluded = 3; constexpr int excluded = 3;
ss::parser p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
ss::parser p2{f.name, ","}; auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2; std::vector<X> i2;
while (!p.eof()) { while (!p.eof()) {
auto a = try {
p.get_object<X, ss::ax<int, excluded>, double, std::string>(); auto a = p.template get_object<X, ss::ax<int, excluded>, double,
if (p.valid()) { std::string>();
i.push_back(a); if (p.valid()) {
} i.push_back(a);
}
} catch (...) {
// ignore
};
} }
for (auto&& a : p2.iterate_object<X, ss::ax<int, excluded>, double, if (!ss::setup<Ts...>::throw_on_error) {
std::string>()) { for (auto&& a : p2.template iterate_object<X, ss::ax<int, excluded>,
if (p2.valid()) { double, std::string>()) {
i2.push_back(std::move(a)); if (p2.valid()) {
i2.push_back(std::move(a));
}
} }
} }
@ -312,33 +346,45 @@ void test_various_cases() {
std::copy_if(data.begin(), data.end(), expected.begin(), std::copy_if(data.begin(), data.end(), expected.begin(),
[&](const X& x) { return x.i != excluded; }); [&](const X& x) { return x.i != excluded; });
CHECK_EQ(i, expected); CHECK_EQ(i, expected);
CHECK_EQ(i2, expected);
if (!ss::setup<Ts...>::throw_on_error) {
CHECK_EQ(i2, expected);
}
} }
{ {
ss::parser p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
ss::parser p2{f.name, ","}; auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2; std::vector<X> i2;
while (!p.eof()) { while (!p.eof()) {
auto a = p.get_object<X, ss::nx<int, 3>, double, std::string>(); try {
if (p.valid()) { auto a = p.template get_object<X, ss::nx<int, 3>, double,
i.push_back(a); std::string>();
if (p.valid()) {
i.push_back(a);
}
} catch (...) {
// ignore
} }
} }
for (auto&& a : if (!ss::setup<Ts...>::throw_on_error) {
p2.iterate_object<X, ss::nx<int, 3>, double, std::string>()) { for (auto&& a : p2.template iterate_object<X, ss::nx<int, 3>,
if (p2.valid()) { double, std::string>()) {
i2.push_back(std::move(a)); if (p2.valid()) {
i2.push_back(std::move(a));
}
} }
} }
std::vector<X> expected = {{3, 4, "y"}}; std::vector<X> expected = {{3, 4, "y"}};
CHECK_EQ(i, expected); CHECK_EQ(i, expected);
CHECK_EQ(i2, expected); if (!ss::setup<Ts...>::throw_on_error) {
CHECK_EQ(i2, expected);
}
} }
{ {
@ -347,17 +393,17 @@ void test_various_cases() {
make_and_write(empty_f.name, empty_data); make_and_write(empty_f.name, empty_data);
ss::parser p{empty_f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(empty_f.name, ",");
std::vector<X> i; std::vector<X> i;
ss::parser p2{empty_f.name, ","}; auto [p2, __] = make_parser<buffer_mode, Ts...>(empty_f.name, ",");
std::vector<X> i2; std::vector<X> i2;
while (!p.eof()) { while (!p.eof()) {
i.push_back(p.get_next<X>()); i.push_back(p.template get_next<X>());
} }
for (auto&& a : p2.iterate<X>()) { for (auto&& a : p2.template iterate<X>()) {
i2.push_back(std::move(a)); i2.push_back(std::move(a));
} }
@ -367,9 +413,12 @@ void test_various_cases() {
} }
TEST_CASE("parser test various cases") { TEST_CASE("parser test various cases") {
test_various_cases(); test_various_cases<false>();
test_various_cases<ss::string_error>(); test_various_cases<false, ss::string_error>();
test_various_cases<ss::throw_on_error>(); test_various_cases<false, ss::throw_on_error>();
test_various_cases<true>();
test_various_cases<true, ss::string_error>();
test_various_cases<true, ss::throw_on_error>();
} }
using test_tuple = std::tuple<double, char, double>; using test_tuple = std::tuple<double, char, double>;
@ -385,7 +434,7 @@ struct test_struct {
static inline void expect_test_struct(const test_struct&) { static inline void expect_test_struct(const test_struct&) {
} }
template <typename... Ts> template <bool buffer_mode, typename... Ts>
void test_composite_conversion() { void test_composite_conversion() {
unique_file_name f{"test_parser"}; unique_file_name f{"test_parser"};
{ {
@ -397,7 +446,7 @@ void test_composite_conversion() {
} }
} }
ss::parser<Ts...> p{f.name, ","}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
auto fail = [] { FAIL(""); }; auto fail = [] { FAIL(""); };
auto expect_error = [](auto error) { CHECK(!error.empty()); }; auto expect_error = [](auto error) { CHECK(!error.empty()); };
auto ignore_error = [] {}; auto ignore_error = [] {};
@ -609,7 +658,8 @@ void test_composite_conversion() {
// various scenarios // various scenarios
TEST_CASE("parser test composite conversion") { TEST_CASE("parser test composite conversion") {
test_composite_conversion<ss::string_error>(); test_composite_conversion<false, ss::string_error>();
test_composite_conversion<true, ss::string_error>();
} }
struct my_string { struct my_string {
@ -653,7 +703,7 @@ struct xyz {
} }
}; };
template <typename... Ts> template <bool buffer_mode, typename... Ts>
void test_moving_of_parsed_composite_values() { void test_moving_of_parsed_composite_values() {
// to compile is enough // to compile is enough
return; return;
@ -669,8 +719,10 @@ void test_moving_of_parsed_composite_values() {
} }
TEST_CASE("parser test the moving of parsed composite values") { TEST_CASE("parser test the moving of parsed composite values") {
test_moving_of_parsed_composite_values(); test_moving_of_parsed_composite_values<false>();
test_moving_of_parsed_composite_values<ss::string_error>(); test_moving_of_parsed_composite_values<false, ss::string_error>();
test_moving_of_parsed_composite_values<true>();
test_moving_of_parsed_composite_values<true, ss::string_error>();
} }
TEST_CASE("parser test error mode") { TEST_CASE("parser test error mode") {
@ -681,12 +733,23 @@ TEST_CASE("parser test error mode") {
out << "junk" << std::endl; out << "junk" << std::endl;
} }
ss::parser<ss::string_error> p(f.name, ","); {
auto [p, _] = make_parser<false, ss::string_error>(f.name, ",");
REQUIRE_FALSE(p.eof()); REQUIRE_FALSE(p.eof());
p.get_next<int>(); p.get_next<int>();
CHECK_FALSE(p.valid()); CHECK_FALSE(p.valid());
CHECK_FALSE(p.error_msg().empty()); CHECK_FALSE(p.error_msg().empty());
}
{
auto [p, _] = make_parser<true, ss::string_error>(f.name, ",");
REQUIRE_FALSE(p.eof());
p.get_next<int>();
CHECK_FALSE(p.valid());
CHECK_FALSE(p.error_msg().empty());
}
} }
TEST_CASE("parser throw on error mode") { TEST_CASE("parser throw on error mode") {
@ -1680,3 +1743,4 @@ TEST_CASE("parser test various cases with empty lines") {
test_ignore_empty({}); test_ignore_empty({});
} }