7 Commits

Author SHA1 Message Date
ado
809939d0e2 Update parser error messages, fix parser tests 2024-03-13 19:39:07 +01:00
ado
b9f4afdd5f Add header and raw_header methods, update header usage methods error handling, write new and update existing unit tests 2024-03-13 17:15:31 +01:00
red0124
69875c238e Resolve clang-tidy warnings (#48)
* Resolve clang-tidy warnings, update single_header_generator.py

* Update single header test, resolve additional clang-tidy warnings
2024-03-12 18:31:24 +01:00
red0124
457defadaa Bugfix/odr violations (#47)
* Make common non-member functions inline, remove unreachable line from get_line_buffer

* [skip ci] Fix namespace comments
2024-03-12 10:22:10 +01:00
red0124
1b9a01f787 Feature/fuzz (#44)
* Add fuzzing ci, add bedge to README
2024-03-03 20:46:12 +01:00
red0124
f5b750dd93 Merge pull request #43 from red0124/bugfix/ftell_slowdown
Remove usage of ftell when updating cursor position value
2024-03-02 02:20:05 +01:00
ado
7f53b585f9 Remove usage of ftell when updating cursor position value 2024-03-02 00:34:19 +01:00
27 changed files with 786 additions and 270 deletions

7
.github/fuzz/makefile vendored Normal file
View File

@@ -0,0 +1,7 @@
EXE=ssp_fuzz
all:
clang++ ${CXXFLAGS} ssp_fuzz.cpp -fsanitize=fuzzer -std=c++17 -o ${EXE}
run:
./${EXE} -max_total_time=900

81
.github/fuzz/ssp_fuzz.cpp vendored Normal file
View File

@@ -0,0 +1,81 @@
#include "../../ssp.hpp"
#include <filesystem>
#include <iostream>
#include <unistd.h>
template <typename... Ts>
void test_ssp_file_mode(const uint8_t* data, size_t size,
std::string delim = ss::default_delimiter) {
std::string file_name = std::filesystem::temp_directory_path().append(
"ss_fuzzer" + std::to_string(getpid()) + ".csv");
FILE* file = std::fopen(file_name.c_str(), "wb");
if (!file) {
std::exit(1);
}
std::fwrite(data, size, 1, file);
std::fclose(file);
ss::parser<Ts...> p{file_name.c_str(), delim};
while (!p.eof()) {
try {
const auto& [s0, s1] =
p.template get_next<std::string, std::string>();
if (s0.size() == 10000) {
std::cout << s0.size() << std::endl;
}
} catch (ss::exception& e) {
continue;
}
}
std::remove(file_name.c_str());
}
template <typename... Ts>
void test_ssp_buffer_mode(const uint8_t* data, size_t size,
std::string delim = ss::default_delimiter) {
ss::parser<Ts...> p{(const char*)data, size, delim};
while (!p.eof()) {
try {
const auto& [s0, s1] =
p.template get_next<std::string, std::string>();
if (s0.size() == 10000) {
std::cout << s0.size() << std::endl;
}
} catch (ss::exception& e) {
continue;
}
}
}
template <typename... Ts>
void test_ssp(const uint8_t* data, size_t size) {
test_ssp_file_mode<Ts...>(data, size);
test_ssp_file_mode<Ts..., ss::throw_on_error>(data, size);
test_ssp_file_mode<Ts...>(data, size, ":::");
test_ssp_file_mode<Ts..., ss::throw_on_error>(data, size, ":::");
test_ssp_buffer_mode<Ts...>(data, size);
test_ssp_buffer_mode<Ts..., ss::throw_on_error>(data, size);
test_ssp_buffer_mode<Ts...>(data, size, ":::");
test_ssp_buffer_mode<Ts..., ss::throw_on_error>(data, size, ":::");
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
using escape = ss::escape<'\\'>;
using quote = ss::quote<'"'>;
using trim = ss::trim<' ', '\t'>;
using multiline_r = ss::multiline_restricted<5>;
test_ssp<>(data, size);
test_ssp<escape>(data, size);
test_ssp<quote>(data, size);
test_ssp<trim>(data, size);
test_ssp<quote, escape>(data, size);
test_ssp<escape, quote, multiline_r, trim>(data, size);
test_ssp<escape, quote, multiline_r, trim, ss::ignore_empty>(data, size);
return 0;
}

43
.github/workflows/fuzz.yml vendored Normal file
View File

@@ -0,0 +1,43 @@
name: fuzz-ci
on:
workflow_dispatch:
push:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
pull_request:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
jobs:
clang_tests:
if: >-
! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
! contains(toJSON(github.event.commits.*.message), '[skip github]')
runs-on: ubuntu-latest
name: "Fuzzing"
container:
image: silkeh/clang:15
options: -v /usr/local:/host_usr_local
steps:
- uses: actions/checkout@v1
- name: Build
working-directory: .github/fuzz
run: make
- name: Run
working-directory: .github/fuzz
run: make run

4
.gitignore vendored
View File

@@ -1,6 +1,8 @@
compile_commands.json
.clang-format
.ccls-cache/*
.clang-tidy
.ccls-cache/
.cache/
experiment/
build/
hbuild/

View File

@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14)
project(
ssp
VERSION 1.7.1
VERSION 1.7.2
DESCRIPTION "csv parser"
HOMEPAGE_URL "https://github.com/red0124/ssp"
LANGUAGES CXX

View File

@@ -9,6 +9,7 @@
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![coverage](https://coveralls.io/repos/github/red0124/ssp/badge.svg?branch=master)](https://coveralls.io/github/red0124/ssp?branch=master)
[![fuzz](https://github.com/red0124/ssp/workflows/fuzz-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/fuzz.yml)
[![single-header](https://github.com/red0124/ssp/workflows/single-header-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/single-header.yml)
[![ubuntu-latest-gcc](https://github.com/red0124/ssp/workflows/ubuntu-latest-gcc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-gcc.yml)
[![ubuntu-latest-clang](https://github.com/red0124/ssp/workflows/ubuntu-latest-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-clang.yml)

View File

@@ -1,11 +1,15 @@
#pragma once
#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
#if !__unix__
#include <array>
#include <cstdint>
#endif
namespace ss {
struct none {};
@@ -17,13 +21,13 @@ constexpr inline auto default_delimiter = ",";
constexpr inline auto get_line_initial_buffer_size = 128;
template <bool StringError>
inline void assert_string_error_defined() {
void assert_string_error_defined() {
static_assert(StringError,
"'string_error' needs to be enabled to use 'error_msg'");
}
template <bool ThrowOnError>
inline void assert_throw_on_error_not_defined() {
void assert_throw_on_error_not_defined() {
static_assert(!ThrowOnError, "cannot handle errors manually if "
"'throw_on_error' is enabled");
}
@@ -45,8 +49,8 @@ inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
using ssize_t = intptr_t;
ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
char buff[get_line_initial_buffer_size];
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
std::array<char, get_line_initial_buffer_size> buff;
if (lineptr == nullptr || n < sizeof(buff)) {
size_t new_n = sizeof(buff);
@@ -57,9 +61,9 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
lineptr[0] = '\0';
size_t line_used = 0;
while (std::fgets(buff, sizeof(buff), file) != nullptr) {
while (std::fgets(buff.data(), sizeof(buff), file) != nullptr) {
line_used = std::strlen(lineptr);
size_t buff_used = std::strlen(buff);
size_t buff_used = std::strlen(buff.data());
if (n <= buff_used + line_used) {
size_t new_n = n * 2;
@@ -67,7 +71,7 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
n = new_n;
}
std::memcpy(lineptr + line_used, buff, buff_used);
std::memcpy(lineptr + line_used, buff.data(), buff_used);
line_used += buff_used;
lineptr[line_used] = '\0';
@@ -81,7 +85,7 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
#endif
ssize_t get_line_buffer(char*& lineptr, size_t& n,
inline ssize_t get_line_buffer(char*& lineptr, size_t& n,
const char* const csv_data_buffer, size_t csv_data_size,
size_t& curr_char) {
if (curr_char >= csv_data_size) {
@@ -89,7 +93,7 @@ ssize_t get_line_buffer(char*& lineptr, size_t& n,
}
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
auto new_lineptr = static_cast<char*>(
auto* new_lineptr = static_cast<char*>(
strict_realloc(lineptr, get_line_initial_buffer_size));
lineptr = new_lineptr;
n = get_line_initial_buffer_size;
@@ -114,22 +118,18 @@ ssize_t get_line_buffer(char*& lineptr, size_t& n,
}
}
if (line_used != 0) {
lineptr[line_used] = '\0';
return line_used;
}
return -1;
}
std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
inline std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
FILE* file,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
ssize_t ssize;
ssize_t ssize = 0;
if (file) {
ssize = get_line_file(buffer, buffer_size, file);
curr_char = std::ftell(file);
curr_char += ssize;
} else {
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
csv_data_size, curr_char);
@@ -145,4 +145,4 @@ std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
return {ssize, false};
}
} /* ss */
} /* namespace ss */

View File

@@ -224,8 +224,9 @@ private:
}
std::string error_sufix(const string_range msg, size_t pos) const {
constexpr static auto reserve_size = 32;
std::string error;
error.reserve(32);
error.reserve(reserve_size);
error.append("at column ")
.append(std::to_string(pos + 1))
.append(": \'")
@@ -391,7 +392,7 @@ private:
////////////////
bool columns_mapped() const {
return column_mappings_.size() != 0;
return !column_mappings_.empty();
}
size_t column_position(size_t tuple_position) const {
@@ -404,7 +405,7 @@ private:
// assumes positions are valid and the vector is not empty
void set_column_mapping(std::vector<size_t> positions,
size_t number_of_columns) {
column_mappings_ = positions;
column_mappings_ = std::move(positions);
number_of_columns_ = number_of_columns;
}
@@ -490,7 +491,7 @@ private:
friend class parser;
std::vector<size_t> column_mappings_;
size_t number_of_columns_;
size_t number_of_columns_{0};
};
} /* ss */
} /* namespace ss */

View File

@@ -12,12 +12,12 @@ class exception : public std::exception {
std::string msg_;
public:
exception(const std::string& msg): msg_{msg} {
exception(std::string msg): msg_{std::move(msg)} {
}
virtual char const* what() const noexcept {
char const* what() const noexcept override {
return msg_.c_str();
}
};
} /* ss */
} /* namespace ss */

View File

@@ -2,8 +2,8 @@
#include "type_traits.hpp"
#include <charconv>
#include <cstdint>
#include <cstring>
#include <functional>
#include <optional>
#include <string>
#include <string_view>
@@ -14,6 +14,7 @@
#else
#include <algorithm>
#include <cstdlib>
#include <array>
#endif
namespace ss {
@@ -45,16 +46,17 @@ std::enable_if_t<std::is_floating_point_v<T>, std::optional<T>> to_num(
"Conversion to long double is disabled");
constexpr static auto buff_max = 64;
char short_buff[buff_max];
std::array<char, buff_max> short_buff;
size_t string_range = std::distance(begin, end);
std::string long_buff;
char* buff;
char* buff = nullptr;
if (string_range > buff_max) {
long_buff = std::string{begin, end};
buff = long_buff.data();
} else {
buff = short_buff;
buff = short_buff.data();
buff[string_range] = '\0';
std::copy_n(begin, string_range, buff);
}
@@ -92,6 +94,8 @@ struct numeric_wrapper {
numeric_wrapper& operator=(numeric_wrapper&&) = default;
numeric_wrapper& operator=(const numeric_wrapper&) = default;
~numeric_wrapper() = default;
numeric_wrapper(T other) : value{other} {
}
@@ -142,7 +146,7 @@ template <typename T>
struct unsupported_type {
constexpr static bool value = false;
};
} /* namespace */
} /* namespace errors */
template <typename T>
std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> &&
@@ -215,10 +219,13 @@ inline bool extract(const char* begin, const char* end, bool& value) {
return false;
}
} else {
constexpr static auto true_size = 4;
constexpr static auto false_size = 5;
size_t size = end - begin;
if (size == 4 && std::strncmp(begin, "true", size) == 0) {
if (size == true_size && std::strncmp(begin, "true", size) == 0) {
value = true;
} else if (size == 5 && std::strncmp(begin, "false", size) == 0) {
} else if (size == false_size &&
std::strncmp(begin, "false", size) == 0) {
value = false;
} else {
return false;
@@ -247,4 +254,4 @@ inline bool extract(const char* begin, const char* end,
return true;
}
} /* ss */
} /* namespace ss */

View File

@@ -2,7 +2,6 @@
#include <cstdlib>
#include <functional>
#include <tuple>
namespace ss {
@@ -77,4 +76,4 @@ struct member_wrapper<R T::*> {
template <typename T> \
constexpr bool has_m_##method##_t = has_m_##method<T>::value;
} /* trait */
} /* namespace ss */

View File

@@ -5,7 +5,6 @@
#include "exception.hpp"
#include "extract.hpp"
#include "restrictions.hpp"
#include <cerrno>
#include <cstdlib>
#include <cstring>
#include <optional>
@@ -32,10 +31,12 @@ class parser {
constexpr static bool ignore_empty = setup<Options...>::ignore_empty;
using header_splitter = ss::splitter<
ss::filter_not_t<ss::is_instance_of_multiline, Options...>>;
public:
parser(const std::string& file_name,
const std::string& delim = ss::default_delimiter)
: file_name_{file_name}, reader_{file_name_, delim} {
parser(std::string file_name, std::string delim = ss::default_delimiter)
: file_name_{std::move(file_name)}, reader_{file_name_, delim} {
if (reader_.file_) {
read_line();
if constexpr (ignore_header) {
@@ -51,7 +52,7 @@ public:
parser(const char* const csv_data_buffer, size_t csv_data_size,
const std::string& delim = ss::default_delimiter)
: file_name_{"buffer line"},
: file_name_{"CSV data buffer"},
reader_{csv_data_buffer, csv_data_size, delim} {
if (csv_data_buffer) {
read_line();
@@ -68,6 +69,7 @@ public:
parser(parser&& other) = default;
parser& operator=(parser&& other) = default;
~parser() = default;
parser() = delete;
parser(const parser& other) = delete;
@@ -162,20 +164,49 @@ public:
return value;
}
std::string raw_header() const {
assert_ignore_header_not_defined();
return raw_header_;
}
std::vector<std::string> header() {
assert_ignore_header_not_defined();
clear_error();
header_splitter splitter;
std::string raw_header_copy = raw_header_;
if (!strict_split(splitter, raw_header_copy)) {
return {};
}
std::vector<std::string> split_header;
for (const auto& [begin, end] : splitter.split_data_) {
split_header.emplace_back(begin, end);
}
return split_header;
}
bool field_exists(const std::string& field) {
assert_ignore_header_not_defined();
clear_error();
if (header_.empty()) {
split_header_data();
}
if (!valid()) {
return false;
}
return header_index(field).has_value();
}
template <typename... Ts>
void use_fields(const Ts&... fields_args) {
if constexpr (ignore_header) {
handle_error_header_ignored();
return;
}
assert_ignore_header_not_defined();
clear_error();
if (header_.empty() && !eof()) {
split_header_data();
@@ -188,7 +219,7 @@ public:
auto fields = std::vector<std::string>{fields_args...};
if (fields.empty()) {
handle_error_empty_mapping();
handle_error_invalid_use_fields_argument();
return;
}
@@ -237,6 +268,10 @@ public:
iterator(const iterator& other) = default;
iterator(iterator&& other) = default;
~iterator() = default;
iterator& operator=(const iterator& other) = delete;
iterator& operator=(iterator&& other) = delete;
value& operator*() {
return value_;
@@ -261,8 +296,10 @@ public:
return *this;
}
iterator& operator++(int) {
return ++*this;
iterator operator++(int) {
auto result = *this;
++*this;
return result;
}
friend bool operator==(const iterator& lhs, const iterator& rhs) {
@@ -326,7 +363,7 @@ public:
Fun&& fun = none{}) {
using Value = no_void_validator_tup_t<Us...>;
std::optional<Value> value;
try_convert_and_invoke<Value, Us...>(value, fun);
try_convert_and_invoke<Value, Us...>(value, std::forward<Fun>(fun));
return composite_with(std::move(value));
}
@@ -335,7 +372,7 @@ public:
template <typename U, typename... Us, typename Fun = none>
composite<Ts..., std::optional<U>> or_object(Fun&& fun = none{}) {
std::optional<U> value;
try_convert_and_invoke<U, Us...>(value, fun);
try_convert_and_invoke<U, Us...>(value, std::forward<Fun>(fun));
return composite_with(std::move(value));
}
@@ -443,7 +480,8 @@ private:
using Ret = decltype(try_invoke_impl(arg, std::forward<Fun>(fun)));
constexpr bool returns_void = std::is_same_v<Ret, void>;
if constexpr (!returns_void) {
if (!try_invoke_impl(arg, std::forward<Fun>(fun))) {
if (!try_invoke_impl(std::forward<Arg>(arg),
std::forward<Fun>(fun))) {
handle_error_failed_check();
}
} else {
@@ -478,22 +516,58 @@ private:
if (valid()) {
try_invoke(*value, std::forward<Fun>(fun));
}
return {valid() ? std::move(value) : std::nullopt, *this};
return {valid() ? std::forward<T>(value) : std::nullopt, *this};
}
////////////////
// header
////////////////
void assert_ignore_header_not_defined() const {
static_assert(!ignore_header,
"cannot use this method when 'ignore_header' is defined");
}
bool strict_split(header_splitter& splitter, std::string& header) {
if (header.empty()) {
return false;
}
if constexpr (throw_on_error) {
try {
splitter.split(header.data(), reader_.delim_);
} catch (const ss::exception& e) {
decorate_rethrow_invalid_header_split(e);
}
} else {
splitter.split(header.data(), reader_.delim_);
if (!splitter.valid()) {
handle_error_invalid_header_split(splitter);
return false;
}
}
return true;
}
void split_header_data() {
ss::splitter<Options...> splitter;
header_splitter splitter;
std::string raw_header_copy = raw_header_;
splitter.split(raw_header_copy.data(), reader_.delim_);
if (!strict_split(splitter, raw_header_copy)) {
return;
}
for (const auto& [begin, end] : splitter.split_data_) {
std::string field{begin, end};
if (field.empty()) {
handle_error_duplicate_header_field(field);
header_.clear();
return;
}
if (std::find(header_.begin(), header_.end(), field) !=
header_.end()) {
handle_error_invalid_header(field);
handle_error_duplicate_header_field(field);
header_.clear();
return;
}
@@ -524,7 +598,7 @@ private:
}
void handle_error_failed_check() {
constexpr static auto error_msg = " failed check";
constexpr static auto error_msg = ": failed check";
if constexpr (string_error) {
error_.clear();
@@ -537,7 +611,7 @@ private:
}
void handle_error_null_buffer() {
constexpr static auto error_msg = " received null data buffer";
constexpr static auto error_msg = ": received null data buffer";
if constexpr (string_error) {
error_.clear();
@@ -550,7 +624,7 @@ private:
}
void handle_error_file_not_open() {
constexpr static auto error_msg = " could not be opened";
constexpr static auto error_msg = ": could not be opened";
if constexpr (string_error) {
error_.clear();
@@ -563,7 +637,7 @@ private:
}
void handle_error_eof_reached() {
constexpr static auto error_msg = " read on end of file";
constexpr static auto error_msg = ": read on end of file";
if constexpr (string_error) {
error_.clear();
@@ -588,20 +662,6 @@ private:
}
}
void handle_error_header_ignored() {
constexpr static auto error_msg =
": the header row is ignored within the setup it cannot be used";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg};
} else {
error_ = true;
}
}
void handle_error_invalid_field(const std::string& field) {
constexpr static auto error_msg =
": header does not contain given field: ";
@@ -629,8 +689,9 @@ private:
}
}
void handle_error_empty_mapping() {
constexpr static auto error_msg = "received empty mapping";
void handle_error_invalid_use_fields_argument() {
constexpr static auto error_msg =
"received invalid argument for 'use_fields'";
if constexpr (string_error) {
error_.clear();
@@ -642,19 +703,53 @@ private:
}
}
void handle_error_invalid_header(const std::string& field) {
constexpr static auto error_msg = "header contains duplicates: ";
void handle_error_invalid_header_field() {
constexpr static auto error_msg = ": header contains empty field";
if constexpr (string_error) {
error_.clear();
error_.append(error_msg).append(error_msg);
error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) {
throw ss::exception{error_msg + field};
throw ss::exception{file_name_ + error_msg};
} else {
error_ = true;
}
}
void handle_error_duplicate_header_field(const std::string& field) {
constexpr static auto error_msg = ": header contains duplicate: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg).append(field);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg + field};
} else {
error_ = true;
}
}
void handle_error_invalid_header_split(const header_splitter& splitter) {
constexpr static auto error_msg = ": failed header parsing: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_)
.append(error_msg)
.append(splitter.error_msg());
} else {
error_ = true;
}
}
void decorate_rethrow_invalid_header_split(const ss::exception& e) const {
static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method");
throw ss::exception{std::string{file_name_}
.append(": failed header parsing: ")
.append(e.what())};
}
void decorate_rethrow(const ss::exception& e) const {
static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method");
@@ -674,17 +769,18 @@ private:
}
struct reader {
reader(const std::string& file_name_, const std::string& delim)
: delim_{delim}, file_{std::fopen(file_name_.c_str(), "rb")} {
reader(const std::string& file_name_, std::string delim)
: delim_{std::move(delim)},
file_{std::fopen(file_name_.c_str(), "rb")} {
}
reader(const char* const buffer, size_t csv_data_size,
const std::string& delim)
: delim_{delim}, csv_data_buffer_{buffer},
std::string delim)
: delim_{std::move(delim)}, csv_data_buffer_{buffer},
csv_data_size_{csv_data_size} {
}
reader(reader&& other)
reader(reader&& other) noexcept
: buffer_{other.buffer_},
next_line_buffer_{other.next_line_buffer_},
helper_buffer_{other.helper_buffer_},
@@ -705,7 +801,7 @@ private:
other.file_ = nullptr;
}
reader& operator=(reader&& other) {
reader& operator=(reader&& other) noexcept {
if (this != &other) {
buffer_ = other.buffer_;
next_line_buffer_ = other.next_line_buffer_;
@@ -853,7 +949,7 @@ private:
}
bool escaped_eol(size_t size) {
const char* curr;
const char* curr = nullptr;
for (curr = next_line_buffer_ + size - 1;
curr >= next_line_buffer_ &&
setup<Options...>::escape::match(*curr);
@@ -899,7 +995,7 @@ private:
size_t& buffer_size, const char* const second,
size_t second_size) {
buffer_size = first_size + second_size + 3;
auto new_first = static_cast<char*>(
auto* new_first = static_cast<char*>(
strict_realloc(static_cast<void*>(first), buffer_size));
first = new_first;
@@ -928,7 +1024,7 @@ private:
}
std::string get_buffer() {
return std::string{next_line_buffer_, next_line_buffer_size_};
return std::string{next_line_buffer_, next_line_size_};
}
////////////////
@@ -971,4 +1067,4 @@ private:
bool eof_{false};
};
} /* ss */
} /* namespace ss */

View File

@@ -124,4 +124,4 @@ struct ne {
}
};
} /* ss */
} /* namespace ss */

View File

@@ -293,4 +293,7 @@ private:
template <typename... Options>
struct setup<setup<Options...>> : setup<Options...> {};
} /* ss */
template <typename... Options>
struct setup<std::tuple<Options...>> : setup<Options...> {};
} /* namespace ss */

View File

@@ -2,11 +2,9 @@
#include "common.hpp"
#include "exception.hpp"
#include "setup.hpp"
#include "type_traits.hpp"
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <string>
#include <vector>
@@ -322,8 +320,9 @@ private:
trim_left_if_enabled(begin_);
for (done_ = false; !done_; read(delim))
;
for (done_ = false; !done_;) {
read(delim);
}
return split_data_;
}
@@ -479,4 +478,4 @@ public:
friend class converter;
};
} /* ss */
} /* namespace ss */

View File

@@ -34,7 +34,11 @@ struct left_of_impl;
template <size_t N, typename T, typename... Ts>
struct left_of_impl {
static_assert(N < 128, "recursion limit reached");
private:
constexpr static auto recursion_limit = 128;
public:
static_assert(N < recursion_limit, "recursion limit reached");
static_assert(N != 0, "cannot take the whole tuple");
using type = tup_cat_t<T, typename left_of_impl<N - 1, Ts...>::type>;
};
@@ -378,4 +382,4 @@ T to_object(U&& data) {
}
}
} /* trait */
} /* namespace ss */

View File

@@ -6,7 +6,7 @@ project(
'cpp_std=c++17',
'buildtype=debugoptimized',
'wrap_mode=forcefallback'],
version: '1.7.1',
version: '1.7.2',
meson_version:'>=0.54.0')
fast_float_dep = dependency('fast_float')

View File

@@ -14,14 +14,21 @@ headers = ['type_traits.hpp',
combined_file = []
includes = []
in_pp_block = False
for header in headers:
with open(headers_dir + header) as f:
for line in f.read().splitlines():
if '#if ' in line:
in_pp_block = True
if '#endif' in line:
in_pp_block = False
if '#include "' in line or '#include <fast_float' in line:
continue
if '#include <' in line:
if '#include <' in line and not in_pp_block:
includes.append(line)
continue

298
ssp.hpp
View File

@@ -8,7 +8,6 @@
#include <cstring>
#include <exception>
#include <functional>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
@@ -50,7 +49,11 @@ struct left_of_impl;
template <size_t N, typename T, typename... Ts>
struct left_of_impl {
static_assert(N < 128, "recursion limit reached");
private:
constexpr static auto recursion_limit = 128;
public:
static_assert(N < recursion_limit, "recursion limit reached");
static_assert(N != 0, "cannot take the whole tuple");
using type = tup_cat_t<T, typename left_of_impl<N - 1, Ts...>::type>;
};
@@ -394,7 +397,7 @@ T to_object(U&& data) {
}
}
} /* trait */
} /* namespace ss */
namespace ss {
@@ -406,15 +409,15 @@ class exception : public std::exception {
std::string msg_;
public:
exception(const std::string& msg): msg_{msg} {
exception(std::string msg): msg_{std::move(msg)} {
}
virtual char const* what() const noexcept {
char const* what() const noexcept override {
return msg_.c_str();
}
};
} /* ss */
} /* namespace ss */
namespace ss {
@@ -490,7 +493,7 @@ struct member_wrapper<R T::*> {
template <typename T> \
constexpr bool has_m_##method##_t = has_m_##method<T>::value;
} /* trait */
} /* namespace ss */
namespace ss {
@@ -616,7 +619,12 @@ struct ne {
}
};
} /* ss */
} /* namespace ss */
#if !__unix__
#include <array>
#include <cstdint>
#endif
namespace ss {
@@ -629,13 +637,13 @@ constexpr inline auto default_delimiter = ",";
constexpr inline auto get_line_initial_buffer_size = 128;
template <bool StringError>
inline void assert_string_error_defined() {
void assert_string_error_defined() {
static_assert(StringError,
"'string_error' needs to be enabled to use 'error_msg'");
}
template <bool ThrowOnError>
inline void assert_throw_on_error_not_defined() {
void assert_throw_on_error_not_defined() {
static_assert(!ThrowOnError, "cannot handle errors manually if "
"'throw_on_error' is enabled");
}
@@ -657,8 +665,8 @@ inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
using ssize_t = intptr_t;
ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
char buff[get_line_initial_buffer_size];
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
std::array<char, get_line_initial_buffer_size> buff;
if (lineptr == nullptr || n < sizeof(buff)) {
size_t new_n = sizeof(buff);
@@ -669,9 +677,9 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
lineptr[0] = '\0';
size_t line_used = 0;
while (std::fgets(buff, sizeof(buff), file) != nullptr) {
while (std::fgets(buff.data(), sizeof(buff), file) != nullptr) {
line_used = std::strlen(lineptr);
size_t buff_used = std::strlen(buff);
size_t buff_used = std::strlen(buff.data());
if (n <= buff_used + line_used) {
size_t new_n = n * 2;
@@ -679,7 +687,7 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
n = new_n;
}
std::memcpy(lineptr + line_used, buff, buff_used);
std::memcpy(lineptr + line_used, buff.data(), buff_used);
line_used += buff_used;
lineptr[line_used] = '\0';
@@ -693,7 +701,7 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
#endif
ssize_t get_line_buffer(char*& lineptr, size_t& n,
inline ssize_t get_line_buffer(char*& lineptr, size_t& n,
const char* const csv_data_buffer, size_t csv_data_size,
size_t& curr_char) {
if (curr_char >= csv_data_size) {
@@ -701,7 +709,7 @@ ssize_t get_line_buffer(char*& lineptr, size_t& n,
}
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
auto new_lineptr = static_cast<char*>(
auto* new_lineptr = static_cast<char*>(
strict_realloc(lineptr, get_line_initial_buffer_size));
lineptr = new_lineptr;
n = get_line_initial_buffer_size;
@@ -726,22 +734,18 @@ ssize_t get_line_buffer(char*& lineptr, size_t& n,
}
}
if (line_used != 0) {
lineptr[line_used] = '\0';
return line_used;
}
return -1;
}
std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
inline std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
FILE* file,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
ssize_t ssize;
ssize_t ssize = 0;
if (file) {
ssize = get_line_file(buffer, buffer_size, file);
curr_char = std::ftell(file);
curr_char += ssize;
} else {
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
csv_data_size, curr_char);
@@ -757,7 +761,7 @@ std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
return {ssize, false};
}
} /* ss */
} /* namespace ss */
namespace ss {
@@ -1050,7 +1054,10 @@ private:
template <typename... Options>
struct setup<setup<Options...>> : setup<Options...> {};
} /* ss */
template <typename... Options>
struct setup<std::tuple<Options...>> : setup<Options...> {};
} /* namespace ss */
namespace ss {
@@ -1364,8 +1371,9 @@ private:
trim_left_if_enabled(begin_);
for (done_ = false; !done_; read(delim))
;
for (done_ = false; !done_;) {
read(delim);
}
return split_data_;
}
@@ -1521,7 +1529,7 @@ public:
friend class converter;
};
} /* ss */
} /* namespace ss */
#ifndef SSP_DISABLE_FAST_FLOAT
@@ -1557,16 +1565,17 @@ std::enable_if_t<std::is_floating_point_v<T>, std::optional<T>> to_num(
"Conversion to long double is disabled");
constexpr static auto buff_max = 64;
char short_buff[buff_max];
std::array<char, buff_max> short_buff;
size_t string_range = std::distance(begin, end);
std::string long_buff;
char* buff;
char* buff = nullptr;
if (string_range > buff_max) {
long_buff = std::string{begin, end};
buff = long_buff.data();
} else {
buff = short_buff;
buff = short_buff.data();
buff[string_range] = '\0';
std::copy_n(begin, string_range, buff);
}
@@ -1604,6 +1613,8 @@ struct numeric_wrapper {
numeric_wrapper& operator=(numeric_wrapper&&) = default;
numeric_wrapper& operator=(const numeric_wrapper&) = default;
~numeric_wrapper() = default;
numeric_wrapper(T other) : value{other} {
}
@@ -1654,7 +1665,7 @@ template <typename T>
struct unsupported_type {
constexpr static bool value = false;
};
} /* namespace */
} /* namespace errors */
template <typename T>
std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> &&
@@ -1727,10 +1738,13 @@ inline bool extract(const char* begin, const char* end, bool& value) {
return false;
}
} else {
constexpr static auto true_size = 4;
constexpr static auto false_size = 5;
size_t size = end - begin;
if (size == 4 && std::strncmp(begin, "true", size) == 0) {
if (size == true_size && std::strncmp(begin, "true", size) == 0) {
value = true;
} else if (size == 5 && std::strncmp(begin, "false", size) == 0) {
} else if (size == false_size &&
std::strncmp(begin, "false", size) == 0) {
value = false;
} else {
return false;
@@ -1759,7 +1773,7 @@ inline bool extract(const char* begin, const char* end,
return true;
}
} /* ss */
} /* namespace ss */
namespace ss {
INIT_HAS_METHOD(tied)
@@ -1976,8 +1990,9 @@ private:
}
std::string error_sufix(const string_range msg, size_t pos) const {
constexpr static auto reserve_size = 32;
std::string error;
error.reserve(32);
error.reserve(reserve_size);
error.append("at column ")
.append(std::to_string(pos + 1))
.append(": \'")
@@ -2143,7 +2158,7 @@ private:
////////////////
bool columns_mapped() const {
return column_mappings_.size() != 0;
return !column_mappings_.empty();
}
size_t column_position(size_t tuple_position) const {
@@ -2156,7 +2171,7 @@ private:
// assumes positions are valid and the vector is not empty
void set_column_mapping(std::vector<size_t> positions,
size_t number_of_columns) {
column_mappings_ = positions;
column_mappings_ = std::move(positions);
number_of_columns_ = number_of_columns;
}
@@ -2242,10 +2257,10 @@ private:
friend class parser;
std::vector<size_t> column_mappings_;
size_t number_of_columns_;
size_t number_of_columns_{0};
};
} /* ss */
} /* namespace ss */
namespace ss {
@@ -2268,10 +2283,12 @@ class parser {
constexpr static bool ignore_empty = setup<Options...>::ignore_empty;
using header_splitter = ss::splitter<
ss::filter_not_t<ss::is_instance_of_multiline, Options...>>;
public:
parser(const std::string& file_name,
const std::string& delim = ss::default_delimiter)
: file_name_{file_name}, reader_{file_name_, delim} {
parser(std::string file_name, std::string delim = ss::default_delimiter)
: file_name_{std::move(file_name)}, reader_{file_name_, delim} {
if (reader_.file_) {
read_line();
if constexpr (ignore_header) {
@@ -2287,7 +2304,7 @@ public:
parser(const char* const csv_data_buffer, size_t csv_data_size,
const std::string& delim = ss::default_delimiter)
: file_name_{"buffer line"},
: file_name_{"CSV data buffer"},
reader_{csv_data_buffer, csv_data_size, delim} {
if (csv_data_buffer) {
read_line();
@@ -2304,6 +2321,7 @@ public:
parser(parser&& other) = default;
parser& operator=(parser&& other) = default;
~parser() = default;
parser() = delete;
parser(const parser& other) = delete;
@@ -2398,20 +2416,49 @@ public:
return value;
}
std::string raw_header() const {
assert_ignore_header_not_defined();
return raw_header_;
}
std::vector<std::string> header() {
assert_ignore_header_not_defined();
clear_error();
header_splitter splitter;
std::string raw_header_copy = raw_header_;
if (!strict_split(splitter, raw_header_copy)) {
return {};
}
std::vector<std::string> split_header;
for (const auto& [begin, end] : splitter.split_data_) {
split_header.emplace_back(begin, end);
}
return split_header;
}
bool field_exists(const std::string& field) {
assert_ignore_header_not_defined();
clear_error();
if (header_.empty()) {
split_header_data();
}
if (!valid()) {
return false;
}
return header_index(field).has_value();
}
template <typename... Ts>
void use_fields(const Ts&... fields_args) {
if constexpr (ignore_header) {
handle_error_header_ignored();
return;
}
assert_ignore_header_not_defined();
clear_error();
if (header_.empty() && !eof()) {
split_header_data();
@@ -2424,7 +2471,7 @@ public:
auto fields = std::vector<std::string>{fields_args...};
if (fields.empty()) {
handle_error_empty_mapping();
handle_error_invalid_use_fields_argument();
return;
}
@@ -2473,6 +2520,10 @@ public:
iterator(const iterator& other) = default;
iterator(iterator&& other) = default;
~iterator() = default;
iterator& operator=(const iterator& other) = delete;
iterator& operator=(iterator&& other) = delete;
value& operator*() {
return value_;
@@ -2497,8 +2548,10 @@ public:
return *this;
}
iterator& operator++(int) {
return ++*this;
iterator operator++(int) {
auto result = *this;
++*this;
return result;
}
friend bool operator==(const iterator& lhs, const iterator& rhs) {
@@ -2562,7 +2615,7 @@ public:
Fun&& fun = none{}) {
using Value = no_void_validator_tup_t<Us...>;
std::optional<Value> value;
try_convert_and_invoke<Value, Us...>(value, fun);
try_convert_and_invoke<Value, Us...>(value, std::forward<Fun>(fun));
return composite_with(std::move(value));
}
@@ -2571,7 +2624,7 @@ public:
template <typename U, typename... Us, typename Fun = none>
composite<Ts..., std::optional<U>> or_object(Fun&& fun = none{}) {
std::optional<U> value;
try_convert_and_invoke<U, Us...>(value, fun);
try_convert_and_invoke<U, Us...>(value, std::forward<Fun>(fun));
return composite_with(std::move(value));
}
@@ -2679,7 +2732,8 @@ private:
using Ret = decltype(try_invoke_impl(arg, std::forward<Fun>(fun)));
constexpr bool returns_void = std::is_same_v<Ret, void>;
if constexpr (!returns_void) {
if (!try_invoke_impl(arg, std::forward<Fun>(fun))) {
if (!try_invoke_impl(std::forward<Arg>(arg),
std::forward<Fun>(fun))) {
handle_error_failed_check();
}
} else {
@@ -2714,22 +2768,58 @@ private:
if (valid()) {
try_invoke(*value, std::forward<Fun>(fun));
}
return {valid() ? std::move(value) : std::nullopt, *this};
return {valid() ? std::forward<T>(value) : std::nullopt, *this};
}
////////////////
// header
////////////////
void assert_ignore_header_not_defined() const {
static_assert(!ignore_header,
"cannot use this method when 'ignore_header' is defined");
}
bool strict_split(header_splitter& splitter, std::string& header) {
if (header.empty()) {
return false;
}
if constexpr (throw_on_error) {
try {
splitter.split(header.data(), reader_.delim_);
} catch (const ss::exception& e) {
decorate_rethrow_invalid_header_split(e);
}
} else {
splitter.split(header.data(), reader_.delim_);
if (!splitter.valid()) {
handle_error_invalid_header_split(splitter);
return false;
}
}
return true;
}
void split_header_data() {
ss::splitter<Options...> splitter;
header_splitter splitter;
std::string raw_header_copy = raw_header_;
splitter.split(raw_header_copy.data(), reader_.delim_);
if (!strict_split(splitter, raw_header_copy)) {
return;
}
for (const auto& [begin, end] : splitter.split_data_) {
std::string field{begin, end};
if (field.empty()) {
handle_error_duplicate_header_field(field);
header_.clear();
return;
}
if (std::find(header_.begin(), header_.end(), field) !=
header_.end()) {
handle_error_invalid_header(field);
handle_error_duplicate_header_field(field);
header_.clear();
return;
}
@@ -2760,7 +2850,7 @@ private:
}
void handle_error_failed_check() {
constexpr static auto error_msg = " failed check";
constexpr static auto error_msg = ": failed check";
if constexpr (string_error) {
error_.clear();
@@ -2773,7 +2863,7 @@ private:
}
void handle_error_null_buffer() {
constexpr static auto error_msg = " received null data buffer";
constexpr static auto error_msg = ": received null data buffer";
if constexpr (string_error) {
error_.clear();
@@ -2786,7 +2876,7 @@ private:
}
void handle_error_file_not_open() {
constexpr static auto error_msg = " could not be opened";
constexpr static auto error_msg = ": could not be opened";
if constexpr (string_error) {
error_.clear();
@@ -2799,7 +2889,7 @@ private:
}
void handle_error_eof_reached() {
constexpr static auto error_msg = " read on end of file";
constexpr static auto error_msg = ": read on end of file";
if constexpr (string_error) {
error_.clear();
@@ -2824,20 +2914,6 @@ private:
}
}
void handle_error_header_ignored() {
constexpr static auto error_msg =
": the header row is ignored within the setup it cannot be used";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg};
} else {
error_ = true;
}
}
void handle_error_invalid_field(const std::string& field) {
constexpr static auto error_msg =
": header does not contain given field: ";
@@ -2865,8 +2941,9 @@ private:
}
}
void handle_error_empty_mapping() {
constexpr static auto error_msg = "received empty mapping";
void handle_error_invalid_use_fields_argument() {
constexpr static auto error_msg =
"received invalid argument for 'use_fields'";
if constexpr (string_error) {
error_.clear();
@@ -2878,19 +2955,53 @@ private:
}
}
void handle_error_invalid_header(const std::string& field) {
constexpr static auto error_msg = "header contains duplicates: ";
void handle_error_invalid_header_field() {
constexpr static auto error_msg = ": header contains empty field";
if constexpr (string_error) {
error_.clear();
error_.append(error_msg).append(error_msg);
error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) {
throw ss::exception{error_msg + field};
throw ss::exception{file_name_ + error_msg};
} else {
error_ = true;
}
}
void handle_error_duplicate_header_field(const std::string& field) {
constexpr static auto error_msg = ": header contains duplicate: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg).append(field);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg + field};
} else {
error_ = true;
}
}
void handle_error_invalid_header_split(const header_splitter& splitter) {
constexpr static auto error_msg = ": failed header parsing: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_)
.append(error_msg)
.append(splitter.error_msg());
} else {
error_ = true;
}
}
void decorate_rethrow_invalid_header_split(const ss::exception& e) const {
static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method");
throw ss::exception{std::string{file_name_}
.append(": failed header parsing: ")
.append(e.what())};
}
void decorate_rethrow(const ss::exception& e) const {
static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method");
@@ -2910,17 +3021,18 @@ private:
}
struct reader {
reader(const std::string& file_name_, const std::string& delim)
: delim_{delim}, file_{std::fopen(file_name_.c_str(), "rb")} {
reader(const std::string& file_name_, std::string delim)
: delim_{std::move(delim)},
file_{std::fopen(file_name_.c_str(), "rb")} {
}
reader(const char* const buffer, size_t csv_data_size,
const std::string& delim)
: delim_{delim}, csv_data_buffer_{buffer},
std::string delim)
: delim_{std::move(delim)}, csv_data_buffer_{buffer},
csv_data_size_{csv_data_size} {
}
reader(reader&& other)
reader(reader&& other) noexcept
: buffer_{other.buffer_},
next_line_buffer_{other.next_line_buffer_},
helper_buffer_{other.helper_buffer_},
@@ -2941,7 +3053,7 @@ private:
other.file_ = nullptr;
}
reader& operator=(reader&& other) {
reader& operator=(reader&& other) noexcept {
if (this != &other) {
buffer_ = other.buffer_;
next_line_buffer_ = other.next_line_buffer_;
@@ -3089,7 +3201,7 @@ private:
}
bool escaped_eol(size_t size) {
const char* curr;
const char* curr = nullptr;
for (curr = next_line_buffer_ + size - 1;
curr >= next_line_buffer_ &&
setup<Options...>::escape::match(*curr);
@@ -3135,7 +3247,7 @@ private:
size_t& buffer_size, const char* const second,
size_t second_size) {
buffer_size = first_size + second_size + 3;
auto new_first = static_cast<char*>(
auto* new_first = static_cast<char*>(
strict_realloc(static_cast<void*>(first), buffer_size));
first = new_first;
@@ -3164,7 +3276,7 @@ private:
}
std::string get_buffer() {
return std::string{next_line_buffer_, next_line_buffer_size_};
return std::string{next_line_buffer_, next_line_size_};
}
////////////////
@@ -3207,4 +3319,4 @@ private:
bool eof_{false};
};
} /* ss */
} /* namespace ss */

View File

@@ -23,7 +23,7 @@ struct is_unsigned : public std::is_unsigned<T> {};
template <>
struct is_unsigned<ss::uint8> : public std::true_type {};
} /* namespace */
} /* anonymous namespace */
static_assert(is_signed<ss::int8>::value);
static_assert(is_unsigned<ss::uint8>::value);

View File

@@ -19,7 +19,7 @@
namespace ss {
template <typename... Ts>
class parser;
} /* ss */
} /* namespace ss */
namespace {
@@ -145,6 +145,17 @@ struct unique_file_name {
CHECK_FALSE(std::string{e.what()}.empty()); \
}
#define CHECK_EQ_ARRAY(first, second) \
{ \
const auto& first_ = (first); \
const auto& second_ = (second); \
CHECK_EQ(first_.size(), second_.size()); \
for (size_t i_ = 0; i_ < std::min(first_.size(), second_.size()); \
++i_) { \
CHECK_EQ(first_[i_], second_[i_]); \
} \
}
template <typename T>
[[maybe_unused]] std::vector<std::vector<T>> vector_combinations(
const std::vector<T>& v, size_t n) {
@@ -166,6 +177,22 @@ template <typename T>
return ret;
}
[[maybe_unused]] std::string merge_header(
const std::vector<std::string>& header,
const std::string& delimiter = ss::default_delimiter) {
std::string s;
if (!header.empty()) {
for (const auto& i : header) {
s.append(i);
s.append(delimiter);
}
for (size_t i = 0; i < delimiter.size(); ++i) {
s.pop_back();
}
}
return s;
};
[[maybe_unused]] std::string make_buffer(const std::string& file_name) {
std::ifstream in{file_name, std::ios::binary};
std::string tmp;
@@ -224,4 +251,4 @@ make_parser(const std::string& file_name,
return make_parser_impl<buffer_mode, Ts...>(file_name, delim);
}
} /* namespace */
} /* anonymous namespace */

View File

@@ -28,6 +28,7 @@ void expect_error_on_command(ss::parser<Ts...>& p,
if (ss::setup<Ts...>::throw_on_error) {
try {
command();
FAIL("expected exception");
} catch (const std::exception& e) {
CHECK_FALSE(std::string{e.what()}.empty());
}
@@ -109,4 +110,4 @@ static void make_and_write(const std::string& file_name,
}
}
} /* namespace */
} /* anonymous namespace */

View File

@@ -9,6 +9,7 @@ struct has_type<T, std::tuple<Us...>>
template <typename T, typename... Ts>
static void test_fields(const std::string file_name, const std::vector<X>& data,
const std::vector<std::string>& header,
const std::vector<std::string>& fields) {
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
@@ -17,9 +18,14 @@ static void test_fields(const std::string file_name, const std::vector<X>& data,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(file_name, ",");
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(fields);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
std::vector<CaseType> i;
for (const auto& a : p.template iterate<CaseType>()) {
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.push_back(a);
}
@@ -40,12 +46,12 @@ static void test_fields(const std::string file_name, const std::vector<X>& data,
TEST_CASE_TEMPLATE("test various cases with header", T,
ParserOptionCombinations) {
unique_file_name f{"various_cases_with_header"};
using str = std::string;
constexpr static auto Int = "Int";
constexpr static auto Dbl = "Double";
constexpr static auto Str = "String";
using str = std::string;
std::vector<std::string> header{Int, Dbl, Str};
const std::vector<std::string> header{Int, Dbl, Str};
std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"},
{7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}};
@@ -59,6 +65,8 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.emplace_back(ss::to_object<X>(a));
}
@@ -71,46 +79,22 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
p.ignore_next();
for (const auto& a : p.iterate<int, double, std::string>()) {
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header> p{f.name, ","};
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Dbl, Str);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(Int, "Unknown");
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Int);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::string_error> p{f.name, ","};
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
p.use_fields(Int, Dbl);
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
{
auto [int_, double_] = p.get_next<int, double>();
@@ -119,6 +103,8 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
}
p.use_fields(Dbl, Int);
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
{
auto [double_, int_] = p.get_next<double, int>();
@@ -163,25 +149,25 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
template_params.append(type)
arg_params.append(header[type])
call = 'testFields<' + ', '.join(template_params) + \
'>(o, d, {' + ', '.join(arg_params) + '});'
'>(o, d, header, {' + ', '.join(arg_params) + '});'
print(call)
*/
test_fields<T, str>(o, d, {Str});
test_fields<T, int>(o, d, {Int});
test_fields<T, double>(o, d, {Dbl});
test_fields<T, str, int>(o, d, {Str, Int});
test_fields<T, str, double>(o, d, {Str, Dbl});
test_fields<T, int, str>(o, d, {Int, Str});
test_fields<T, int, double>(o, d, {Int, Dbl});
test_fields<T, double, str>(o, d, {Dbl, Str});
test_fields<T, double, int>(o, d, {Dbl, Int});
test_fields<T, str, int, double>(o, d, {Str, Int, Dbl});
test_fields<T, str, double, int>(o, d, {Str, Dbl, Int});
test_fields<T, int, str, double>(o, d, {Int, Str, Dbl});
test_fields<T, int, double, str>(o, d, {Int, Dbl, Str});
test_fields<T, double, str, int>(o, d, {Dbl, Str, Int});
test_fields<T, double, int, str>(o, d, {Dbl, Int, Str});
test_fields<T, str>(o, d, header, {Str});
test_fields<T, int>(o, d, header, {Int});
test_fields<T, double>(o, d, header, {Dbl});
test_fields<T, str, int>(o, d, header, {Str, Int});
test_fields<T, str, double>(o, d, header, {Str, Dbl});
test_fields<T, int, str>(o, d, header, {Int, Str});
test_fields<T, int, double>(o, d, header, {Int, Dbl});
test_fields<T, double, str>(o, d, header, {Dbl, Str});
test_fields<T, double, int>(o, d, header, {Dbl, Int});
test_fields<T, str, int, double>(o, d, header, {Str, Int, Dbl});
test_fields<T, str, double, int>(o, d, header, {Str, Dbl, Int});
test_fields<T, int, str, double>(o, d, header, {Int, Str, Dbl});
test_fields<T, int, double, str>(o, d, header, {Int, Dbl, Str});
test_fields<T, double, str, int>(o, d, header, {Dbl, Str, Int});
test_fields<T, double, int, str>(o, d, header, {Dbl, Int, Str});
}
template <typename T>
@@ -190,6 +176,17 @@ void test_invalid_fields(const std::vector<std::string>& lines,
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
auto check_header = [&lines](auto& p) {
if (lines.empty()) {
CHECK(p.header().empty());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
} else {
CHECK_EQ(lines[0], merge_header(p.header()));
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
}
CHECK(p.valid());
};
unique_file_name f{"invalid_fields"};
{
std::ofstream out{f.name};
@@ -203,6 +200,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p] { p.use_fields(); };
expect_error_on_command(p, command);
check_header(p);
}
{
@@ -210,6 +208,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p] { p.use_fields("Unknown"); };
expect_error_on_command(p, command);
check_header(p);
}
{
@@ -221,6 +220,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
if (!fields.empty()) {
expect_error_on_command(p, command);
}
check_header(p);
}
{
@@ -230,15 +230,19 @@ void test_invalid_fields(const std::vector<std::string>& lines,
p.use_fields(fields.at(0));
p.template get_next<std::string, std::string>();
};
check_header(p);
if (!fields.empty()) {
expect_error_on_command(p, command);
}
check_header(p);
}
{
// Invalid header
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p, &fields = fields] { p.use_fields(fields); };
check_header(p);
if (!fields.empty()) {
// Pass if there are no duplicates, fail otherwise
@@ -255,6 +259,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
}
}
}
check_header(p);
}
}
@@ -289,7 +294,7 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
unique_file_name f{"invalid rows with header"};
unique_file_name f{"invalid_rows_with_header"};
{
std::ofstream out{f.name};
out << "Int,String,Double" << std::endl;
@@ -301,8 +306,12 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
out << "six,line6,10.11" << std::endl;
}
std::vector<std::string> header = {"Int", "String", "Double"};
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("Int", "String", "Double");
using data = std::tuple<int, std::string, double>;
@@ -325,10 +334,14 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
{3, "line3", 67.8},
{5, "line5", 9.10}};
CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("Double", "Int");
using data = std::tuple<double, int>;
@@ -349,10 +362,14 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
std::vector<data> expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}};
CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("String", "Double");
using data = std::tuple<std::string, double>;
@@ -376,6 +393,92 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
{"line5", 9.10},
{"line6", 10.11}};
CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
}
}
TEST_CASE_TEMPLATE("test invalid header", T, ParserOptionCombinations) {
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
unique_file_name f{"invalid_header"};
// Empty header
{
std::ofstream out{f.name};
out << "" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK(p.header().empty());
CHECK_EQ(merge_header(p.header()), p.raw_header());
CHECK(p.valid());
}
// Unterminated quote in header
{
std::ofstream out{f.name};
out << "\"Int" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::quote<'"'>>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::quote<'"'>, ss::multiline>(
f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::quote<'"'>,
ss::escape<'\\'>, ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
// Unterminated escape in header
{
std::ofstream out{f.name};
out << "Int\\" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>,
ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>,
ss::quote<'"'>, ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
}

View File

@@ -1,13 +1,7 @@
#include "test_helpers.hpp"
#include <algorithm>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <regex>
#include <ss/parser.hpp>
#include <sstream>
#include <unordered_map>
#include <unordered_set>
#ifndef SEGMENT_NAME
@@ -333,8 +327,10 @@ void test_data_combinations(const std::vector<column>& input_data,
field_header.push_back(field{el.header});
}
std::string header_line;
if (include_header) {
auto header_data = generate_csv_data<Ts...>(field_header, delim);
header_line = merge_header(header_data, delim);
if (input_data.size() == 0 && rand() % 10 == 0) {
write_to_file(header_data, delim, f.name, false);
} else {
@@ -403,7 +399,9 @@ void test_data_combinations(const std::vector<column>& input_data,
fields.push_back(header[index]);
}
if constexpr (!setup::ignore_header) {
p.use_fields(fields);
}
if (!p.valid()) {
if constexpr (setup::string_error) {
@@ -425,8 +423,19 @@ void test_data_combinations(const std::vector<column>& input_data,
}
};
auto check_header = [&p = p, &header = header, include_header,
header_line] {
if (include_header) {
if constexpr (!setup::ignore_header) {
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(header_line, p.raw_header());
}
}
};
int num_columns = layout.size();
for (size_t i = 0; i < n + 1; ++i) {
check_header();
try {
switch (num_columns) {
case 1: {
@@ -616,7 +625,7 @@ void test_option_combinations3() {
test_option_combinations2<Ts..., trim>();
}
} /* namespace */
} /* anonymous namespace */
// Tests split into multiple compilation units
#if 0

View File

@@ -3,12 +3,14 @@
set -x
set -e
python3 script/single_header_generator.py > ssp.cpp
TMP_HDR=test_single_header.hpp
TMP_SRC=test_single_header.cpp
TMP_BIN=test_single_header
echo 'int main(){ ss::parser p{""}; p.get_next<int, float>(); return 0; }' \
>> ssp.cpp
python3 script/single_header_generator.py > ${TMP_HDR}
cat ${TMP_HDR} test/test_single_header_main.txt > ${TMP_SRC}
g++ -std=c++17 ssp.cpp -o ssp.bin -Wall -Wextra
./ssp.bin
g++ -std=c++17 ${TMP_SRC} -o ${TMP_BIN} -Wall -Wextra
./${TMP_BIN}
rm ssp.cpp ssp.bin
rm ${TMP_HDR} ${TMP_SRC} ${TMP_BIN}

View File

@@ -0,0 +1,12 @@
int main() {
using quote = ss::quote<'"'>;
using escape = ss::escape<'\\'>;
using trim = ss::trim<' '>;
std::string data = "1,string,2.34,c";
ss::parser<quote, escape, trim, ss::multiline> p{data.c_str(), data.size()};
auto tup = p.get_next<int, std::string, float, std::optional<char>>();
return 0;
}

View File

@@ -145,7 +145,7 @@ make_combinations(const std::vector<std::string>& input,
return {std::move(lines), std::move(expectations)};
}
} /* namespace */
} /* anonymous namespace */
/* ********************************** */
/* ********************************** */
@@ -548,7 +548,7 @@ public:
return splitter.size_shifted();
}
};
} /* ss */
} /* namespace ss */
TEST_CASE("splitter test resplit unterminated quote") {