Add option to read csv data from a buffer, add some unit tests for the new feature

This commit is contained in:
ado 2024-02-17 00:55:36 +01:00
parent 4bedc32b63
commit f04ede3a49
3 changed files with 250 additions and 83 deletions

View File

@ -26,7 +26,7 @@ inline void assert_throw_on_error_not_defined() {
}
#if __unix__
inline ssize_t get_line(char** lineptr, size_t* n, FILE* stream) {
inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) {
return getline(lineptr, n, stream);
}
#else

View File

@ -48,6 +48,18 @@ public:
}
}
parser(const char* const csv_data_buffer, size_t csv_data_size,
const std::string& delim = ss::default_delimiter)
: file_name_{"buffer line"},
reader_{csv_data_buffer, csv_data_size, delim} {
read_line();
if constexpr (ignore_header) {
ignore_next();
} else {
raw_header_ = reader_.get_buffer();
}
}
parser(parser&& other) = default;
parser& operator=(parser&& other) = default;
@ -641,18 +653,27 @@ private:
: delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} {
}
reader(const char* const buffer, size_t csv_data_size,
const std::string& delim)
: delim_{delim}, csv_data_buffer_{buffer},
csv_data_size_{csv_data_size} {
}
reader(reader&& other)
: buffer_{other.buffer_},
next_line_buffer_{other.next_line_buffer_},
helper_buffer_{other.helper_buffer_}, converter_{std::move(
other.converter_)},
helper_buffer_{other.helper_buffer_},
converter_{std::move(other.converter_)},
next_line_converter_{std::move(other.next_line_converter_)},
buffer_size_{other.buffer_size_},
next_line_buffer_size_{other.next_line_buffer_size_},
helper_size_{other.helper_size_}, delim_{std::move(other.delim_)},
file_{other.file_}, crlf_{other.crlf_},
line_number_{other.line_number_}, next_line_size_{
other.next_line_size_} {
helper_buffer_size{other.helper_buffer_size},
delim_{std::move(other.delim_)}, file_{other.file_},
csv_data_buffer_{other.csv_data_buffer_},
csv_data_size_{other.csv_data_size_},
curr_char_{other.curr_char_}, crlf_{other.crlf_},
line_number_{other.line_number_},
next_line_size_{other.next_line_size_} {
other.buffer_ = nullptr;
other.next_line_buffer_ = nullptr;
other.helper_buffer_ = nullptr;
@ -668,9 +689,12 @@ private:
next_line_converter_ = std::move(other.next_line_converter_);
buffer_size_ = other.buffer_size_;
next_line_buffer_size_ = other.next_line_buffer_size_;
helper_size_ = other.helper_size_;
helper_buffer_size = other.helper_buffer_size;
delim_ = std::move(other.delim_);
file_ = other.file_;
csv_data_buffer_ = other.csv_data_buffer_;
csv_data_size_ = other.csv_data_size_;
curr_char_ = other.curr_char_;
crlf_ = other.crlf_;
line_number_ = other.line_number_;
next_line_size_ = other.next_line_size_;
@ -698,6 +722,60 @@ private:
reader(const reader& other) = delete;
reader& operator=(const reader& other) = delete;
ssize_t get_line_buffer(char** lineptr, size_t* n,
const char* const buffer, size_t csv_data_size,
size_t& curr_char) {
size_t pos;
int c;
// TODO remove check
if (lineptr == nullptr || buffer == nullptr || n == nullptr) {
return -1;
}
c = buffer[curr_char++];
if (curr_char >= csv_data_size) {
return -1;
}
// TODO maybe remove this too
if (*lineptr == nullptr) {
*lineptr = static_cast<char*>(malloc(128));
if (*lineptr == nullptr) {
return -1;
}
*n = 128;
}
pos = 0;
while (curr_char <= csv_data_size) {
if (pos + 1 >= *n) {
size_t new_size = *n + (*n >> 2);
// TODO maybe remove this too
if (new_size < 128) {
new_size = 128;
}
char* new_ptr = static_cast<char*>(
realloc(static_cast<void*>(*lineptr), new_size));
// TODO check for failed malloc in the callee
if (new_ptr == nullptr) {
return -1;
}
*n = new_size;
*lineptr = new_ptr;
}
(*lineptr)[pos++] = c;
if (c == '\n') {
break;
}
c = buffer[curr_char++];
}
(*lineptr)[pos] = '\0';
return pos;
}
// read next line each time in order to set eof_
bool read_next() {
next_line_converter_.clear_error();
@ -708,8 +786,16 @@ private:
if (next_line_buffer_size_ > 0) {
next_line_buffer_[0] = '\0';
}
ssize = get_line(&next_line_buffer_, &next_line_buffer_size_,
file_);
if (file_) {
ssize = get_line_file(&next_line_buffer_,
&next_line_buffer_size_, file_);
} else {
ssize = get_line_buffer(&next_line_buffer_,
&next_line_buffer_size_,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
if (ssize == -1) {
return false;
@ -821,6 +907,10 @@ private:
}
size_t remove_eol(char*& buffer, size_t ssize) {
if (buffer[ssize - 1] != '\n') {
return ssize;
}
size_t size = ssize - 1;
if (ssize >= 2 && buffer[ssize - 2] == '\r') {
crlf_ = true;
@ -851,8 +941,17 @@ private:
bool append_next_line_to_buffer(char*& buffer, size_t& size) {
undo_remove_eol(buffer, size);
ssize_t next_ssize =
get_line(&helper_buffer_, &helper_size_, file_);
ssize_t next_ssize;
if (file_) {
next_ssize =
get_line_file(&helper_buffer_, &helper_buffer_size, file_);
} else {
next_ssize =
get_line_buffer(&helper_buffer_, &helper_buffer_size,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
if (next_ssize == -1) {
return false;
}
@ -879,11 +978,15 @@ private:
size_t buffer_size_{0};
size_t next_line_buffer_size_{0};
size_t helper_size_{0};
size_t helper_buffer_size{0};
std::string delim_;
FILE* file_{nullptr};
const char* csv_data_buffer_{nullptr};
size_t csv_data_size_{0};
size_t curr_char_{0};
bool crlf_{false};
size_t line_number_{0};

View File

@ -38,7 +38,7 @@ void expect_error_on_command(ss::parser<Ts...>& p,
}
}
void update_if_crlf(std::string& s) {
[[maybe_unused]] void update_if_crlf(std::string& s) {
#ifdef _WIN32
replace_all(s, "\r\n", "\n");
#else
@ -102,6 +102,31 @@ static void make_and_write(const std::string& file_name,
out << data[i].to_string() << new_lines[i % new_lines.size()];
}
}
std::string make_buffer(const std::string& file_name) {
std::ifstream in{file_name, std::ios::binary};
std::string tmp;
std::string out;
out.reserve(sizeof(out) + 1);
while (in >> tmp) {
out += tmp;
out.append("\n");
}
return out;
}
template <bool buffer_mode, typename... Ts>
std::tuple<ss::parser<Ts...>, std::string> make_parser(
const std::string& file_name, const std::string& delim) {
if (buffer_mode) {
auto buffer = make_buffer(file_name);
return {ss::parser<Ts...>{buffer.data(), buffer.size(), delim},
std::move(buffer)};
} else {
return {ss::parser<Ts...>{file_name, delim}, std::string{}};
}
}
} /* namespace */
TEST_CASE("test file not found") {
@ -125,22 +150,23 @@ TEST_CASE("test file not found") {
}
}
template <typename... Ts>
template <bool buffer_mode, typename... Ts>
void test_various_cases() {
unique_file_name f{"test_parser"};
std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"},
{7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}};
make_and_write(f.name, data);
auto csv_data_buffer = make_buffer(f.name);
{
ss::parser<Ts...> p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
ss::parser p0{std::move(p)};
p = std::move(p0);
std::vector<X> i;
ss::parser<ss::string_error> p2{f.name, ","};
auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2;
auto move_rotate = [&] {
auto move_rotate = [&p = p, &p0 = p0] {
auto p1 = std::move(p);
p0 = std::move(p1);
p = std::move(p0);
@ -152,7 +178,7 @@ void test_various_cases() {
i.emplace_back(ss::to_object<X>(a));
}
for (const auto& a : p2.iterate<int, double, std::string>()) {
for (const auto& a : p2.template iterate<int, double, std::string>()) {
i2.emplace_back(ss::to_object<X>(a));
}
@ -161,13 +187,13 @@ void test_various_cases() {
}
{
ss::parser p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i;
ss::parser p2{f.name, ","};
auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2;
ss::parser p3{f.name, ","};
auto [p3, ___] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i3;
std::vector<X> expected = {std::begin(data) + 1, std::end(data)};
@ -175,18 +201,18 @@ void test_various_cases() {
p.ignore_next();
while (!p.eof()) {
auto a = p.get_next<tup>();
auto a = p.template get_next<tup>();
i.emplace_back(ss::to_object<X>(a));
}
p2.ignore_next();
for (const auto& a : p2.iterate<tup>()) {
for (const auto& a : p2.template iterate<tup>()) {
i2.emplace_back(ss::to_object<X>(a));
}
p3.ignore_next();
for (auto it = p3.iterate<tup>().begin(); it != p3.iterate<tup>().end();
++it) {
for (auto it = p3.template iterate<tup>().begin();
it != p3.template iterate<tup>().end(); ++it) {
i3.emplace_back(ss::to_object<X>(*it));
}
@ -196,16 +222,17 @@ void test_various_cases() {
}
{
ss::parser p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i;
ss::parser p2{f.name, ","};
auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2;
while (!p.eof()) {
i.push_back(p.get_object<X, int, double, std::string>());
i.push_back(p.template get_object<X, int, double, std::string>());
}
for (auto&& a : p2.iterate_object<X, int, double, std::string>()) {
for (auto&& a :
p2.template iterate_object<X, int, double, std::string>()) {
i2.push_back(std::move(a));
}
@ -214,10 +241,11 @@ void test_various_cases() {
}
{
ss::parser p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i;
for (auto&& a : p.iterate_object<X, int, double, std::string>()) {
for (auto&& a :
p.template iterate_object<X, int, double, std::string>()) {
i.push_back(std::move(a));
}
@ -225,19 +253,19 @@ void test_various_cases() {
}
{
ss::parser p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i;
ss::parser p2{f.name, ","};
auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2;
using tup = std::tuple<int, double, std::string>;
while (!p.eof()) {
i.push_back(p.get_object<X, tup>());
i.push_back(p.template get_object<X, tup>());
}
for (auto it = p2.iterate_object<X, tup>().begin();
it != p2.iterate_object<X, tup>().end(); it++) {
for (auto it = p2.template iterate_object<X, tup>().begin();
it != p2.template iterate_object<X, tup>().end(); it++) {
i2.push_back({it->i, it->d, it->s});
}
@ -246,11 +274,11 @@ void test_various_cases() {
}
{
ss::parser p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i;
using tup = std::tuple<int, double, std::string>;
for (auto&& a : p.iterate_object<X, tup>()) {
for (auto&& a : p.template iterate_object<X, tup>()) {
i.push_back(std::move(a));
}
@ -258,21 +286,21 @@ void test_various_cases() {
}
{
ss::parser p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i;
while (!p.eof()) {
i.push_back(p.get_next<X>());
i.push_back(p.template get_next<X>());
}
CHECK_EQ(i, data);
}
{
ss::parser p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i;
for (auto&& a : p.iterate<X>()) {
for (auto&& a : p.template iterate<X>()) {
i.push_back(std::move(a));
}
@ -281,26 +309,32 @@ void test_various_cases() {
{
constexpr int excluded = 3;
ss::parser p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i;
ss::parser p2{f.name, ","};
auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2;
while (!p.eof()) {
auto a =
p.get_object<X, ss::ax<int, excluded>, double, std::string>();
try {
auto a = p.template get_object<X, ss::ax<int, excluded>, double,
std::string>();
if (p.valid()) {
i.push_back(a);
}
} catch (...) {
// ignore
};
}
for (auto&& a : p2.iterate_object<X, ss::ax<int, excluded>, double,
std::string>()) {
if (!ss::setup<Ts...>::throw_on_error) {
for (auto&& a : p2.template iterate_object<X, ss::ax<int, excluded>,
double, std::string>()) {
if (p2.valid()) {
i2.push_back(std::move(a));
}
}
}
std::vector<X> expected;
for (auto& x : data) {
@ -312,34 +346,46 @@ void test_various_cases() {
std::copy_if(data.begin(), data.end(), expected.begin(),
[&](const X& x) { return x.i != excluded; });
CHECK_EQ(i, expected);
if (!ss::setup<Ts...>::throw_on_error) {
CHECK_EQ(i2, expected);
}
}
{
ss::parser p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i;
ss::parser p2{f.name, ","};
auto [p2, __] = make_parser<buffer_mode, Ts...>(f.name, ",");
std::vector<X> i2;
while (!p.eof()) {
auto a = p.get_object<X, ss::nx<int, 3>, double, std::string>();
try {
auto a = p.template get_object<X, ss::nx<int, 3>, double,
std::string>();
if (p.valid()) {
i.push_back(a);
}
} catch (...) {
// ignore
}
}
for (auto&& a :
p2.iterate_object<X, ss::nx<int, 3>, double, std::string>()) {
if (!ss::setup<Ts...>::throw_on_error) {
for (auto&& a : p2.template iterate_object<X, ss::nx<int, 3>,
double, std::string>()) {
if (p2.valid()) {
i2.push_back(std::move(a));
}
}
}
std::vector<X> expected = {{3, 4, "y"}};
CHECK_EQ(i, expected);
if (!ss::setup<Ts...>::throw_on_error) {
CHECK_EQ(i2, expected);
}
}
{
unique_file_name empty_f{"test_parser"};
@ -347,17 +393,17 @@ void test_various_cases() {
make_and_write(empty_f.name, empty_data);
ss::parser p{empty_f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(empty_f.name, ",");
std::vector<X> i;
ss::parser p2{empty_f.name, ","};
auto [p2, __] = make_parser<buffer_mode, Ts...>(empty_f.name, ",");
std::vector<X> i2;
while (!p.eof()) {
i.push_back(p.get_next<X>());
i.push_back(p.template get_next<X>());
}
for (auto&& a : p2.iterate<X>()) {
for (auto&& a : p2.template iterate<X>()) {
i2.push_back(std::move(a));
}
@ -367,9 +413,12 @@ void test_various_cases() {
}
TEST_CASE("parser test various cases") {
test_various_cases();
test_various_cases<ss::string_error>();
test_various_cases<ss::throw_on_error>();
test_various_cases<false>();
test_various_cases<false, ss::string_error>();
test_various_cases<false, ss::throw_on_error>();
test_various_cases<true>();
test_various_cases<true, ss::string_error>();
test_various_cases<true, ss::throw_on_error>();
}
using test_tuple = std::tuple<double, char, double>;
@ -385,7 +434,7 @@ struct test_struct {
static inline void expect_test_struct(const test_struct&) {
}
template <typename... Ts>
template <bool buffer_mode, typename... Ts>
void test_composite_conversion() {
unique_file_name f{"test_parser"};
{
@ -397,7 +446,7 @@ void test_composite_conversion() {
}
}
ss::parser<Ts...> p{f.name, ","};
auto [p, _] = make_parser<buffer_mode, Ts...>(f.name, ",");
auto fail = [] { FAIL(""); };
auto expect_error = [](auto error) { CHECK(!error.empty()); };
auto ignore_error = [] {};
@ -609,7 +658,8 @@ void test_composite_conversion() {
// various scenarios
TEST_CASE("parser test composite conversion") {
test_composite_conversion<ss::string_error>();
test_composite_conversion<false, ss::string_error>();
test_composite_conversion<true, ss::string_error>();
}
struct my_string {
@ -653,7 +703,7 @@ struct xyz {
}
};
template <typename... Ts>
template <bool buffer_mode, typename... Ts>
void test_moving_of_parsed_composite_values() {
// to compile is enough
return;
@ -669,8 +719,10 @@ void test_moving_of_parsed_composite_values() {
}
TEST_CASE("parser test the moving of parsed composite values") {
test_moving_of_parsed_composite_values();
test_moving_of_parsed_composite_values<ss::string_error>();
test_moving_of_parsed_composite_values<false>();
test_moving_of_parsed_composite_values<false, ss::string_error>();
test_moving_of_parsed_composite_values<true>();
test_moving_of_parsed_composite_values<true, ss::string_error>();
}
TEST_CASE("parser test error mode") {
@ -681,7 +733,8 @@ TEST_CASE("parser test error mode") {
out << "junk" << std::endl;
}
ss::parser<ss::string_error> p(f.name, ",");
{
auto [p, _] = make_parser<false, ss::string_error>(f.name, ",");
REQUIRE_FALSE(p.eof());
p.get_next<int>();
@ -689,6 +742,16 @@ TEST_CASE("parser test error mode") {
CHECK_FALSE(p.error_msg().empty());
}
{
auto [p, _] = make_parser<true, ss::string_error>(f.name, ",");
REQUIRE_FALSE(p.eof());
p.get_next<int>();
CHECK_FALSE(p.valid());
CHECK_FALSE(p.error_msg().empty());
}
}
TEST_CASE("parser throw on error mode") {
unique_file_name f{"test_parser"};
{
@ -1680,3 +1743,4 @@ TEST_CASE("parser test various cases with empty lines") {
test_ignore_empty({});
}