Fix out of bounds reading for get_line_buffer, write more buffer mode unit tests

This commit is contained in:
ado 2024-02-18 11:20:36 +01:00
parent e89e268280
commit 82f8ed12b4
3 changed files with 221 additions and 210 deletions

View File

@ -733,10 +733,10 @@ private:
return -1; return -1;
} }
c = buffer[curr_char++];
if (curr_char >= csv_data_size) { if (curr_char >= csv_data_size) {
return -1; return -1;
} }
c = buffer[curr_char++];
// TODO maybe remove this too // TODO maybe remove this too
if (*lineptr == nullptr) { if (*lineptr == nullptr) {

View File

@ -1,6 +1,6 @@
#include "test_parser1.hpp" #include "test_parser1.hpp"
template <typename... Ts> template <bool buffer_mode, typename... Ts>
void test_multiline_restricted() { void test_multiline_restricted() {
unique_file_name f{"test_parser"}; unique_file_name f{"test_parser"};
{ {
@ -23,9 +23,9 @@ void test_multiline_restricted() {
auto bad_lines = 15; auto bad_lines = 15;
auto num_errors = 0; auto num_errors = 0;
ss::parser<ss::multiline_restricted<2>, ss::quote<'"'>, ss::escape<'\\'>, auto [p, _] =
Ts...> make_parser<buffer_mode, ss::multiline_restricted<2>, ss::quote<'"'>,
p{f.name, ","}; ss::escape<'\\'>, Ts...>(f.name, ",");
std::vector<X> i; std::vector<X> i;
while (!p.eof()) { while (!p.eof()) {
@ -64,12 +64,15 @@ void test_multiline_restricted() {
} }
TEST_CASE("parser test multiline restricted") { TEST_CASE("parser test multiline restricted") {
test_multiline_restricted(); test_multiline_restricted<false>();
test_multiline_restricted<ss::string_error>(); test_multiline_restricted<false, ss::string_error>();
test_multiline_restricted<ss::throw_on_error>(); test_multiline_restricted<false, ss::throw_on_error>();
test_multiline_restricted<true>();
test_multiline_restricted<true, ss::string_error>();
test_multiline_restricted<true, ss::throw_on_error>();
} }
template <typename... Ts> template <bool buffer_mode, typename... Ts>
void test_unterminated_line_impl(const std::vector<std::string>& lines, void test_unterminated_line_impl(const std::vector<std::string>& lines,
size_t bad_line) { size_t bad_line) {
unique_file_name f{"test_parser"}; unique_file_name f{"test_parser"};
@ -79,10 +82,12 @@ void test_unterminated_line_impl(const std::vector<std::string>& lines,
} }
out.close(); out.close();
ss::parser<Ts...> p{f.name}; auto [p, _] = make_parser<buffer_mode, Ts...>(f.name);
size_t line = 0; size_t line = 0;
while (!p.eof()) { while (!p.eof()) {
auto command = [&] { p.template get_next<int, double, std::string>(); }; auto command = [&p = p] {
p.template get_next<int, double, std::string>();
};
if (line == bad_line) { if (line == bad_line) {
expect_error_on_command(p, command); expect_error_on_command(p, command);
@ -98,9 +103,15 @@ void test_unterminated_line_impl(const std::vector<std::string>& lines,
template <typename... Ts> template <typename... Ts>
void test_unterminated_line(const std::vector<std::string>& lines, void test_unterminated_line(const std::vector<std::string>& lines,
size_t bad_line) { size_t bad_line) {
test_unterminated_line_impl<Ts...>(lines, bad_line); test_unterminated_line_impl<false, Ts...>(lines, bad_line);
test_unterminated_line_impl<Ts..., ss::string_error>(lines, bad_line); test_unterminated_line_impl<false, Ts..., ss::string_error>(lines,
test_unterminated_line_impl<Ts..., ss::throw_on_error>(lines, bad_line); bad_line);
test_unterminated_line_impl<false, Ts..., ss::throw_on_error>(lines,
bad_line);
test_unterminated_line_impl<true, Ts...>(lines, bad_line);
test_unterminated_line_impl<true, Ts..., ss::string_error>(lines, bad_line);
test_unterminated_line_impl<true, Ts..., ss::throw_on_error>(lines,
bad_line);
} }
TEST_CASE("parser test csv on multiline with errors") { TEST_CASE("parser test csv on multiline with errors") {
@ -317,199 +328,3 @@ TEST_CASE("parser test csv on multiline with errors") {
test_unterminated_line<multiline, escape, quote>(lines, 1); test_unterminated_line<multiline, escape, quote>(lines, 1);
} }
} }
template <typename T, typename Tuple>
struct has_type;
template <typename T, typename... Us>
struct has_type<T, std::tuple<Us...>>
: std::disjunction<std::is_same<T, Us>...> {};
static inline void check_size(size_t size1, size_t size2) {
CHECK_EQ(size1, size2);
}
template <typename Setup, typename... Ts>
static void test_fields_impl(const std::string file_name,
const std::vector<X>& data,
const std::vector<std::string>& fields) {
using CaseType = std::tuple<Ts...>;
ss::parser<Setup> p{file_name, ","};
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(fields);
std::vector<CaseType> i;
for (const auto& a : p.template iterate<CaseType>()) {
i.push_back(a);
}
check_size(i.size(), data.size());
for (size_t j = 0; j < i.size(); ++j) {
if constexpr (has_type<int, CaseType>::value) {
CHECK_EQ(std::get<int>(i[j]), data[j].i);
}
if constexpr (has_type<double, CaseType>::value) {
CHECK_EQ(std::get<double>(i[j]), data[j].d);
}
if constexpr (has_type<std::string, CaseType>::value) {
CHECK_EQ(std::get<std::string>(i[j]), data[j].s);
}
}
}
template <typename... Ts>
static void test_fields(const std::string file_name, const std::vector<X>& data,
const std::vector<std::string>& fields) {
test_fields_impl<ss::setup<>, Ts...>(file_name, data, fields);
test_fields_impl<ss::setup<ss::string_error>, Ts...>(file_name, data,
fields);
test_fields_impl<ss::setup<ss::throw_on_error>, Ts...>(file_name, data,
fields);
}
TEST_CASE("parser test various cases with header") {
unique_file_name f{"test_parser"};
constexpr static auto Int = "Int";
constexpr static auto Dbl = "Double";
constexpr static auto Str = "String";
using str = std::string;
std::vector<std::string> header{Int, Dbl, Str};
std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"},
{7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}};
make_and_write(f.name, data, header);
const auto& o = f.name;
const auto& d = data;
{
ss::parser<ss::string_error> p{f.name, ","};
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_NE(i, data);
}
{
ss::parser<ss::string_error> p{f.name, ","};
std::vector<X> i;
p.ignore_next();
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header> p{f.name, ","};
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Dbl, Str);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(Int, "Unknown");
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Int);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::string_error> p{f.name, ","};
p.use_fields(Int, Dbl);
{
auto [int_, double_] = p.get_next<int, double>();
CHECK_EQ(int_, data[0].i);
CHECK_EQ(double_, data[0].d);
}
p.use_fields(Dbl, Int);
{
auto [double_, int_] = p.get_next<double, int>();
CHECK_EQ(int_, data[1].i);
CHECK_EQ(double_, data[1].d);
}
p.use_fields(Str);
{
auto string_ = p.get_next<std::string>();
CHECK_EQ(string_, data[2].s);
}
p.use_fields(Str, Int, Dbl);
{
auto [string_, int_, double_] =
p.get_next<std::string, int, double>();
CHECK_EQ(double_, data[3].d);
CHECK_EQ(int_, data[3].i);
CHECK_EQ(string_, data[3].s);
}
}
/* python used to generate permutations
import itertools
header = {'str': 'Str',
'double': 'Dbl',
'int': 'Int'}
keys = ['str', 'int', 'double']
for r in range (1, 3):
combinations = list(itertools.permutations(keys, r = r))
for combination in combinations:
template_params = []
arg_params = []
for type in combination:
template_params.append(type)
arg_params.append(header[type])
call = 'testFields<' + ', '.join(template_params) + \
'>(o, d, {' + ', '.join(arg_params) + '});'
print(call)
*/
test_fields<str>(o, d, {Str});
test_fields<int>(o, d, {Int});
test_fields<double>(o, d, {Dbl});
test_fields<str, int>(o, d, {Str, Int});
test_fields<str, double>(o, d, {Str, Dbl});
test_fields<int, str>(o, d, {Int, Str});
test_fields<int, double>(o, d, {Int, Dbl});
test_fields<double, str>(o, d, {Dbl, Str});
test_fields<double, int>(o, d, {Dbl, Int});
test_fields<str, int, double>(o, d, {Str, Int, Dbl});
test_fields<str, double, int>(o, d, {Str, Dbl, Int});
test_fields<int, str, double>(o, d, {Int, Str, Dbl});
test_fields<int, double, str>(o, d, {Int, Dbl, Str});
test_fields<double, str, int>(o, d, {Dbl, Str, Int});
test_fields<double, int, str>(o, d, {Dbl, Int, Str});
}

View File

@ -1,5 +1,201 @@
#include "test_parser1.hpp" #include "test_parser1.hpp"
template <typename T, typename Tuple>
struct has_type;
template <typename T, typename... Us>
struct has_type<T, std::tuple<Us...>>
: std::disjunction<std::is_same<T, Us>...> {};
static inline void check_size(size_t size1, size_t size2) {
CHECK_EQ(size1, size2);
}
template <typename Setup, typename... Ts>
static void test_fields_impl(const std::string file_name,
const std::vector<X>& data,
const std::vector<std::string>& fields) {
using CaseType = std::tuple<Ts...>;
ss::parser<Setup> p{file_name, ","};
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(fields);
std::vector<CaseType> i;
for (const auto& a : p.template iterate<CaseType>()) {
i.push_back(a);
}
check_size(i.size(), data.size());
for (size_t j = 0; j < i.size(); ++j) {
if constexpr (has_type<int, CaseType>::value) {
CHECK_EQ(std::get<int>(i[j]), data[j].i);
}
if constexpr (has_type<double, CaseType>::value) {
CHECK_EQ(std::get<double>(i[j]), data[j].d);
}
if constexpr (has_type<std::string, CaseType>::value) {
CHECK_EQ(std::get<std::string>(i[j]), data[j].s);
}
}
}
template <typename... Ts>
static void test_fields(const std::string file_name, const std::vector<X>& data,
const std::vector<std::string>& fields) {
test_fields_impl<ss::setup<>, Ts...>(file_name, data, fields);
test_fields_impl<ss::setup<ss::string_error>, Ts...>(file_name, data,
fields);
test_fields_impl<ss::setup<ss::throw_on_error>, Ts...>(file_name, data,
fields);
}
TEST_CASE("parser test various cases with header") {
unique_file_name f{"test_parser"};
constexpr static auto Int = "Int";
constexpr static auto Dbl = "Double";
constexpr static auto Str = "String";
using str = std::string;
std::vector<std::string> header{Int, Dbl, Str};
std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"},
{7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}};
make_and_write(f.name, data, header);
const auto& o = f.name;
const auto& d = data;
{
ss::parser<ss::string_error> p{f.name, ","};
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_NE(i, data);
}
{
ss::parser<ss::string_error> p{f.name, ","};
std::vector<X> i;
p.ignore_next();
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header> p{f.name, ","};
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Dbl, Str);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(Int, "Unknown");
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Int);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::string_error> p{f.name, ","};
p.use_fields(Int, Dbl);
{
auto [int_, double_] = p.get_next<int, double>();
CHECK_EQ(int_, data[0].i);
CHECK_EQ(double_, data[0].d);
}
p.use_fields(Dbl, Int);
{
auto [double_, int_] = p.get_next<double, int>();
CHECK_EQ(int_, data[1].i);
CHECK_EQ(double_, data[1].d);
}
p.use_fields(Str);
{
auto string_ = p.get_next<std::string>();
CHECK_EQ(string_, data[2].s);
}
p.use_fields(Str, Int, Dbl);
{
auto [string_, int_, double_] =
p.get_next<std::string, int, double>();
CHECK_EQ(double_, data[3].d);
CHECK_EQ(int_, data[3].i);
CHECK_EQ(string_, data[3].s);
}
}
/* python used to generate permutations
import itertools
header = {'str': 'Str',
'double': 'Dbl',
'int': 'Int'}
keys = ['str', 'int', 'double']
for r in range (1, 3):
combinations = list(itertools.permutations(keys, r = r))
for combination in combinations:
template_params = []
arg_params = []
for type in combination:
template_params.append(type)
arg_params.append(header[type])
call = 'testFields<' + ', '.join(template_params) + \
'>(o, d, {' + ', '.join(arg_params) + '});'
print(call)
*/
test_fields<str>(o, d, {Str});
test_fields<int>(o, d, {Int});
test_fields<double>(o, d, {Dbl});
test_fields<str, int>(o, d, {Str, Int});
test_fields<str, double>(o, d, {Str, Dbl});
test_fields<int, str>(o, d, {Int, Str});
test_fields<int, double>(o, d, {Int, Dbl});
test_fields<double, str>(o, d, {Dbl, Str});
test_fields<double, int>(o, d, {Dbl, Int});
test_fields<str, int, double>(o, d, {Str, Int, Dbl});
test_fields<str, double, int>(o, d, {Str, Dbl, Int});
test_fields<int, str, double>(o, d, {Int, Str, Dbl});
test_fields<int, double, str>(o, d, {Int, Dbl, Str});
test_fields<double, str, int>(o, d, {Dbl, Str, Int});
test_fields<double, int, str>(o, d, {Dbl, Int, Str});
}
template <typename... Ts> template <typename... Ts>
void test_invalid_fields_impl(const std::vector<std::string>& lines, void test_invalid_fields_impl(const std::vector<std::string>& lines,
const std::vector<std::string>& fields) { const std::vector<std::string>& fields) {