15 Commits

Author SHA1 Message Date
ado
809939d0e2 Update parser error messages, fix parser tests 2024-03-13 19:39:07 +01:00
ado
b9f4afdd5f Add header and raw_header methods, update header usage methods error handling, write new and update existing unit tests 2024-03-13 17:15:31 +01:00
red0124
69875c238e Resolve clang-tidy warnings (#48)
* Resolve clang-tidy warnings, update single_header_generator.py

* Update single header test, resolve additional clang-tidy warnings
2024-03-12 18:31:24 +01:00
red0124
457defadaa Bugfix/odr violations (#47)
* Make common non-member functions inline, remove unreachable line from get_line_buffer

* [skip ci] Fix namespace comments
2024-03-12 10:22:10 +01:00
red0124
1b9a01f787 Feature/fuzz (#44)
* Add fuzzing ci, add bedge to README
2024-03-03 20:46:12 +01:00
red0124
f5b750dd93 Merge pull request #43 from red0124/bugfix/ftell_slowdown
Remove usage of ftell when updating cursor position value
2024-03-02 02:20:05 +01:00
ado
7f53b585f9 Remove usage of ftell when updating cursor position value 2024-03-02 00:34:19 +01:00
ado
67ef6651c1 Fix README typos 2024-03-01 17:23:26 +01:00
ado
fa4ec324de Update version 2024-03-01 16:22:45 +01:00
red0124
f229de61d6 Merge pull request #42 from red0124/dev
Merge with development
2024-03-01 16:17:16 +01:00
red0124
df2beab6c3 Fix buffer overflow on multiline restricted with unterminated quote and multiple empty lines (#41) 2024-03-01 15:46:34 +01:00
red0124
27bd60b5ce Fix bug with get_line_buffer when used with data buffer that is not null terminated and does not end with \n (#40) 2024-03-01 02:47:04 +01:00
red0124
c5b50f2b47 Fix compile issues for c++20 (#39) 2024-03-01 00:52:00 +01:00
red0124
d8dcce7f2a Fix buffer overflow on multiline csv data containing null characters (#38) 2024-02-29 22:03:20 +01:00
red0124
126329608c Add macOS ci (#36)
* Add macOS ci, update README
2024-02-28 22:20:26 +01:00
29 changed files with 1045 additions and 446 deletions

7
.github/fuzz/makefile vendored Normal file
View File

@@ -0,0 +1,7 @@
EXE=ssp_fuzz
all:
clang++ ${CXXFLAGS} ssp_fuzz.cpp -fsanitize=fuzzer -std=c++17 -o ${EXE}
run:
./${EXE} -max_total_time=900

81
.github/fuzz/ssp_fuzz.cpp vendored Normal file
View File

@@ -0,0 +1,81 @@
#include "../../ssp.hpp"
#include <filesystem>
#include <iostream>
#include <unistd.h>
template <typename... Ts>
void test_ssp_file_mode(const uint8_t* data, size_t size,
std::string delim = ss::default_delimiter) {
std::string file_name = std::filesystem::temp_directory_path().append(
"ss_fuzzer" + std::to_string(getpid()) + ".csv");
FILE* file = std::fopen(file_name.c_str(), "wb");
if (!file) {
std::exit(1);
}
std::fwrite(data, size, 1, file);
std::fclose(file);
ss::parser<Ts...> p{file_name.c_str(), delim};
while (!p.eof()) {
try {
const auto& [s0, s1] =
p.template get_next<std::string, std::string>();
if (s0.size() == 10000) {
std::cout << s0.size() << std::endl;
}
} catch (ss::exception& e) {
continue;
}
}
std::remove(file_name.c_str());
}
template <typename... Ts>
void test_ssp_buffer_mode(const uint8_t* data, size_t size,
std::string delim = ss::default_delimiter) {
ss::parser<Ts...> p{(const char*)data, size, delim};
while (!p.eof()) {
try {
const auto& [s0, s1] =
p.template get_next<std::string, std::string>();
if (s0.size() == 10000) {
std::cout << s0.size() << std::endl;
}
} catch (ss::exception& e) {
continue;
}
}
}
template <typename... Ts>
void test_ssp(const uint8_t* data, size_t size) {
test_ssp_file_mode<Ts...>(data, size);
test_ssp_file_mode<Ts..., ss::throw_on_error>(data, size);
test_ssp_file_mode<Ts...>(data, size, ":::");
test_ssp_file_mode<Ts..., ss::throw_on_error>(data, size, ":::");
test_ssp_buffer_mode<Ts...>(data, size);
test_ssp_buffer_mode<Ts..., ss::throw_on_error>(data, size);
test_ssp_buffer_mode<Ts...>(data, size, ":::");
test_ssp_buffer_mode<Ts..., ss::throw_on_error>(data, size, ":::");
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
using escape = ss::escape<'\\'>;
using quote = ss::quote<'"'>;
using trim = ss::trim<' ', '\t'>;
using multiline_r = ss::multiline_restricted<5>;
test_ssp<>(data, size);
test_ssp<escape>(data, size);
test_ssp<quote>(data, size);
test_ssp<trim>(data, size);
test_ssp<quote, escape>(data, size);
test_ssp<escape, quote, multiline_r, trim>(data, size);
test_ssp<escape, quote, multiline_r, trim, ss::ignore_empty>(data, size);
return 0;
}

43
.github/workflows/fuzz.yml vendored Normal file
View File

@@ -0,0 +1,43 @@
name: fuzz-ci
on:
workflow_dispatch:
push:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
pull_request:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
jobs:
clang_tests:
if: >-
! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
! contains(toJSON(github.event.commits.*.message), '[skip github]')
runs-on: ubuntu-latest
name: "Fuzzing"
container:
image: silkeh/clang:15
options: -v /usr/local:/host_usr_local
steps:
- uses: actions/checkout@v1
- name: Build
working-directory: .github/fuzz
run: make
- name: Run
working-directory: .github/fuzz
run: make run

55
.github/workflows/macos-apple-clang.yml vendored Normal file
View File

@@ -0,0 +1,55 @@
name: macos-apple-clang-ci
on:
workflow_dispatch:
push:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
pull_request:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
jobs:
clang_tests:
if: >-
! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
! contains(toJSON(github.event.commits.*.message), '[skip github]')
strategy:
matrix:
xcode: ['13.4.1', '14.1']
type: [Release, Debug]
runs-on: macos-12
env:
DEVELOPER_DIR: /Applications/Xcode_${{matrix.xcode}}.app/Contents/Developer
name: "Xcode ${{matrix.xcode}}: ${{matrix.type}}"
steps:
- uses: actions/checkout@v3
- uses: friendlyanon/fetch-core-count@v1
id: cores
- name: Install dependencies
run: script/ci_install_deps.sh
- name: Configure
run: cmake -S test -B build -DCMAKE_BUILD_TYPE=${{matrix.type}}
- name: Build
run: cmake --build build -j ${{steps.cores.outputs.count}}
- name: Run
working-directory: build
run: ctest --output-on-failure

4
.gitignore vendored
View File

@@ -1,6 +1,8 @@
compile_commands.json compile_commands.json
.clang-format .clang-format
.ccls-cache/* .clang-tidy
.ccls-cache/
.cache/
experiment/ experiment/
build/ build/
hbuild/ hbuild/

View File

@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14)
project( project(
ssp ssp
VERSION 1.7.0 VERSION 1.7.2
DESCRIPTION "csv parser" DESCRIPTION "csv parser"
HOMEPAGE_URL "https://github.com/red0124/ssp" HOMEPAGE_URL "https://github.com/red0124/ssp"
LANGUAGES CXX LANGUAGES CXX

View File

@@ -9,6 +9,7 @@
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![coverage](https://coveralls.io/repos/github/red0124/ssp/badge.svg?branch=master)](https://coveralls.io/github/red0124/ssp?branch=master) [![coverage](https://coveralls.io/repos/github/red0124/ssp/badge.svg?branch=master)](https://coveralls.io/github/red0124/ssp?branch=master)
[![fuzz](https://github.com/red0124/ssp/workflows/fuzz-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/fuzz.yml)
[![single-header](https://github.com/red0124/ssp/workflows/single-header-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/single-header.yml) [![single-header](https://github.com/red0124/ssp/workflows/single-header-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/single-header.yml)
[![ubuntu-latest-gcc](https://github.com/red0124/ssp/workflows/ubuntu-latest-gcc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-gcc.yml) [![ubuntu-latest-gcc](https://github.com/red0124/ssp/workflows/ubuntu-latest-gcc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-gcc.yml)
[![ubuntu-latest-clang](https://github.com/red0124/ssp/workflows/ubuntu-latest-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-clang.yml) [![ubuntu-latest-clang](https://github.com/red0124/ssp/workflows/ubuntu-latest-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-clang.yml)
@@ -16,6 +17,7 @@
[![windows-msys2-gcc](https://github.com/red0124/ssp/workflows/win-msys2-gcc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msys2-gcc.yml) [![windows-msys2-gcc](https://github.com/red0124/ssp/workflows/win-msys2-gcc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msys2-gcc.yml)
[![windows-msys2-clang](https://github.com/red0124/ssp/workflows/win-msys2-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msys2-clang.yml) [![windows-msys2-clang](https://github.com/red0124/ssp/workflows/win-msys2-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msys2-clang.yml)
[![windows-msvc](https://github.com/red0124/ssp/workflows/win-msvc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msvc.yml) [![windows-msvc](https://github.com/red0124/ssp/workflows/win-msvc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msvc.yml)
[![macos-apple-clang](https://github.com/red0124/ssp/workflows/macos-apple-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/macos-apple-clang.yml)
A header only CSV parser which is fast and versatile with modern C++ API. Requires compiler with C++17 support. [Can also be used to efficiently convert strings to specific types.](#the-converter) A header only CSV parser which is fast and versatile with modern C++ API. Requires compiler with C++17 support. [Can also be used to efficiently convert strings to specific types.](#the-converter)
@@ -72,7 +74,7 @@ Bill (Heath) Gates 65 3.3
# Single header # Single header
The library can be used with a single header file **`ssp.hpp`**, but it sufferes a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file. The library can be used with a single header file **`ssp.hpp`**, but it suffers a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file.
# Installation # Installation
@@ -114,11 +116,11 @@ James Bailey 2.5
Brian S. Wolfe 1.9 Brian S. Wolfe 1.9
Bill (Heath) Gates 3.3 Bill (Heath) Gates 3.3
``` ```
The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** metod after the parser has been constructed. The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** method after the parser has been constructed.
```cpp ```cpp
ss::parser<ss::ignore_header> p{file_name}; ss::parser<ss::ignore_header> p{file_name};
``` ```
The fields with which the parser works with can be modified at any given time. The praser can also check if a field is present within the header by using the **`field_exists`** method. The fields with which the parser works with can be modified at any given time. The parser can also check if a field is present within the header by using the **`field_exists`** method.
```cpp ```cpp
// ... // ...
ss::parser<ss::throw_on_error> p{"students_with_header.csv"}; ss::parser<ss::throw_on_error> p{"students_with_header.csv"};
@@ -248,7 +250,7 @@ By default, **`,`** is used as the delimiter, a custom delimiter can be specifie
```cpp ```cpp
ss::parser p{file_name, "--"}; ss::parser p{file_name, "--"};
``` ```
*Note, the delimiter can consist of multiple characters but the parser is slightliy faster when using single character delimiters.* *Note, the delimiter can consist of multiple characters but the parser is slightly faster when using single character delimiters.*
### Empty lines ### Empty lines
Empty lines can be ignored by defining **`ss::ignore_empty`** within the setup parameters: Empty lines can be ignored by defining **`ss::ignore_empty`** within the setup parameters:
@@ -397,7 +399,7 @@ if (std::holds_alternative<float>(grade)) {
// grade set as char // grade set as char
} }
``` ```
Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers arround the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character. Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers around the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character.
```cpp ```cpp
// returns std::tuple<std::string, ss::uint8, float> // returns std::tuple<std::string, ss::uint8, float>
auto [id, age, grade] = p.get_next<std::string, ss::uint8, float>(); auto [id, age, grade] = p.get_next<std::string, ss::uint8, float>();

View File

@@ -1,11 +1,15 @@
#pragma once #pragma once
#include <cerrno> #include <cerrno>
#include <cstdint>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <vector> #include <vector>
#if !__unix__
#include <array>
#include <cstdint>
#endif
namespace ss { namespace ss {
struct none {}; struct none {};
@@ -17,13 +21,13 @@ constexpr inline auto default_delimiter = ",";
constexpr inline auto get_line_initial_buffer_size = 128; constexpr inline auto get_line_initial_buffer_size = 128;
template <bool StringError> template <bool StringError>
inline void assert_string_error_defined() { void assert_string_error_defined() {
static_assert(StringError, static_assert(StringError,
"'string_error' needs to be enabled to use 'error_msg'"); "'string_error' needs to be enabled to use 'error_msg'");
} }
template <bool ThrowOnError> template <bool ThrowOnError>
inline void assert_throw_on_error_not_defined() { void assert_throw_on_error_not_defined() {
static_assert(!ThrowOnError, "cannot handle errors manually if " static_assert(!ThrowOnError, "cannot handle errors manually if "
"'throw_on_error' is enabled"); "'throw_on_error' is enabled");
} }
@@ -38,45 +42,40 @@ inline void* strict_realloc(void* ptr, size_t size) {
} }
#if __unix__ #if __unix__
inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
return getline(lineptr, n, stream); return getline(&lineptr, &n, file);
} }
#else #else
using ssize_t = intptr_t; using ssize_t = intptr_t;
ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
if (lineptr == nullptr || n == nullptr || fp == nullptr) { std::array<char, get_line_initial_buffer_size> buff;
errno = EINVAL;
return -1;
}
char buff[get_line_initial_buffer_size]; if (lineptr == nullptr || n < sizeof(buff)) {
if (*lineptr == nullptr || *n < sizeof(buff)) {
size_t new_n = sizeof(buff); size_t new_n = sizeof(buff);
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n)); lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
*n = new_n; n = new_n;
} }
(*lineptr)[0] = '\0'; lineptr[0] = '\0';
size_t line_used = 0; size_t line_used = 0;
while (std::fgets(buff, sizeof(buff), fp) != nullptr) { while (std::fgets(buff.data(), sizeof(buff), file) != nullptr) {
line_used = std::strlen(*lineptr); line_used = std::strlen(lineptr);
size_t buff_used = std::strlen(buff); size_t buff_used = std::strlen(buff.data());
if (*n <= buff_used + line_used) { if (n <= buff_used + line_used) {
size_t new_n = *n * 2; size_t new_n = n * 2;
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n)); lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
*n = new_n; n = new_n;
} }
std::memcpy(*lineptr + line_used, buff, buff_used); std::memcpy(lineptr + line_used, buff.data(), buff_used);
line_used += buff_used; line_used += buff_used;
(*lineptr)[line_used] = '\0'; lineptr[line_used] = '\0';
if ((*lineptr)[line_used - 1] == '\n') { if (lineptr[line_used - 1] == '\n') {
return line_used; return line_used;
} }
} }
@@ -86,4 +85,64 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
#endif #endif
} /* ss */ inline ssize_t get_line_buffer(char*& lineptr, size_t& n,
const char* const csv_data_buffer, size_t csv_data_size,
size_t& curr_char) {
if (curr_char >= csv_data_size) {
return -1;
}
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
auto* new_lineptr = static_cast<char*>(
strict_realloc(lineptr, get_line_initial_buffer_size));
lineptr = new_lineptr;
n = get_line_initial_buffer_size;
}
size_t line_used = 0;
while (curr_char < csv_data_size) {
if (line_used + 1 >= n) {
size_t new_n = n * 2;
char* new_lineptr =
static_cast<char*>(strict_realloc(lineptr, new_n));
n = new_n;
lineptr = new_lineptr;
}
auto c = csv_data_buffer[curr_char++];
lineptr[line_used++] = c;
if (c == '\n') {
lineptr[line_used] = '\0';
return line_used;
}
}
lineptr[line_used] = '\0';
return line_used;
}
inline std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
FILE* file,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
ssize_t ssize = 0;
if (file) {
ssize = get_line_file(buffer, buffer_size, file);
curr_char += ssize;
} else {
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
csv_data_size, curr_char);
}
if (ssize == -1) {
if (errno == ENOMEM) {
throw std::bad_alloc{};
}
return {ssize, true};
}
return {ssize, false};
}
} /* namespace ss */

View File

@@ -150,7 +150,7 @@ public:
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) { if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
return convert_impl(elems, static_cast<T*>(nullptr)); return convert_impl(elems, static_cast<T*>(nullptr));
} else if constexpr (tied_class_v<T, Ts...>) { } else if constexpr (tied_class_v<T, Ts...>) {
using arg_ref_tuple = std::result_of_t<decltype (&T::tied)(T)>; using arg_ref_tuple = std::invoke_result_t<decltype(&T::tied), T>;
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>; using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
return to_object<T>( return to_object<T>(
@@ -224,8 +224,9 @@ private:
} }
std::string error_sufix(const string_range msg, size_t pos) const { std::string error_sufix(const string_range msg, size_t pos) const {
constexpr static auto reserve_size = 32;
std::string error; std::string error;
error.reserve(32); error.reserve(reserve_size);
error.append("at column ") error.append("at column ")
.append(std::to_string(pos + 1)) .append(std::to_string(pos + 1))
.append(": \'") .append(": \'")
@@ -269,6 +270,7 @@ private:
void handle_error_multiline_limit_reached() { void handle_error_multiline_limit_reached() {
constexpr static auto error_msg = "multiline limit reached"; constexpr static auto error_msg = "multiline limit reached";
splitter_.unterminated_quote_ = false;
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -390,7 +392,7 @@ private:
//////////////// ////////////////
bool columns_mapped() const { bool columns_mapped() const {
return column_mappings_.size() != 0; return !column_mappings_.empty();
} }
size_t column_position(size_t tuple_position) const { size_t column_position(size_t tuple_position) const {
@@ -403,7 +405,7 @@ private:
// assumes positions are valid and the vector is not empty // assumes positions are valid and the vector is not empty
void set_column_mapping(std::vector<size_t> positions, void set_column_mapping(std::vector<size_t> positions,
size_t number_of_columns) { size_t number_of_columns) {
column_mappings_ = positions; column_mappings_ = std::move(positions);
number_of_columns_ = number_of_columns; number_of_columns_ = number_of_columns;
} }
@@ -489,7 +491,7 @@ private:
friend class parser; friend class parser;
std::vector<size_t> column_mappings_; std::vector<size_t> column_mappings_;
size_t number_of_columns_; size_t number_of_columns_{0};
}; };
} /* ss */ } /* namespace ss */

View File

@@ -12,12 +12,12 @@ class exception : public std::exception {
std::string msg_; std::string msg_;
public: public:
exception(const std::string& msg): msg_{msg} { exception(std::string msg): msg_{std::move(msg)} {
} }
virtual char const* what() const noexcept { char const* what() const noexcept override {
return msg_.c_str(); return msg_.c_str();
} }
}; };
} /* ss */ } /* namespace ss */

View File

@@ -2,8 +2,8 @@
#include "type_traits.hpp" #include "type_traits.hpp"
#include <charconv> #include <charconv>
#include <cstdint>
#include <cstring> #include <cstring>
#include <functional>
#include <optional> #include <optional>
#include <string> #include <string>
#include <string_view> #include <string_view>
@@ -14,6 +14,7 @@
#else #else
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <array>
#endif #endif
namespace ss { namespace ss {
@@ -45,16 +46,17 @@ std::enable_if_t<std::is_floating_point_v<T>, std::optional<T>> to_num(
"Conversion to long double is disabled"); "Conversion to long double is disabled");
constexpr static auto buff_max = 64; constexpr static auto buff_max = 64;
char short_buff[buff_max]; std::array<char, buff_max> short_buff;
size_t string_range = std::distance(begin, end); size_t string_range = std::distance(begin, end);
std::string long_buff; std::string long_buff;
char* buff; char* buff = nullptr;
if (string_range > buff_max) { if (string_range > buff_max) {
long_buff = std::string{begin, end}; long_buff = std::string{begin, end};
buff = long_buff.data(); buff = long_buff.data();
} else { } else {
buff = short_buff; buff = short_buff.data();
buff[string_range] = '\0'; buff[string_range] = '\0';
std::copy_n(begin, string_range, buff); std::copy_n(begin, string_range, buff);
} }
@@ -92,6 +94,8 @@ struct numeric_wrapper {
numeric_wrapper& operator=(numeric_wrapper&&) = default; numeric_wrapper& operator=(numeric_wrapper&&) = default;
numeric_wrapper& operator=(const numeric_wrapper&) = default; numeric_wrapper& operator=(const numeric_wrapper&) = default;
~numeric_wrapper() = default;
numeric_wrapper(T other) : value{other} { numeric_wrapper(T other) : value{other} {
} }
@@ -142,7 +146,7 @@ template <typename T>
struct unsupported_type { struct unsupported_type {
constexpr static bool value = false; constexpr static bool value = false;
}; };
} /* namespace */ } /* namespace errors */
template <typename T> template <typename T>
std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> && std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> &&
@@ -215,10 +219,13 @@ inline bool extract(const char* begin, const char* end, bool& value) {
return false; return false;
} }
} else { } else {
constexpr static auto true_size = 4;
constexpr static auto false_size = 5;
size_t size = end - begin; size_t size = end - begin;
if (size == 4 && std::strncmp(begin, "true", size) == 0) { if (size == true_size && std::strncmp(begin, "true", size) == 0) {
value = true; value = true;
} else if (size == 5 && std::strncmp(begin, "false", size) == 0) { } else if (size == false_size &&
std::strncmp(begin, "false", size) == 0) {
value = false; value = false;
} else { } else {
return false; return false;
@@ -247,4 +254,4 @@ inline bool extract(const char* begin, const char* end,
return true; return true;
} }
} /* ss */ } /* namespace ss */

View File

@@ -2,7 +2,6 @@
#include <cstdlib> #include <cstdlib>
#include <functional> #include <functional>
#include <tuple>
namespace ss { namespace ss {
@@ -77,4 +76,4 @@ struct member_wrapper<R T::*> {
template <typename T> \ template <typename T> \
constexpr bool has_m_##method##_t = has_m_##method<T>::value; constexpr bool has_m_##method##_t = has_m_##method<T>::value;
} /* trait */ } /* namespace ss */

View File

@@ -5,7 +5,6 @@
#include "exception.hpp" #include "exception.hpp"
#include "extract.hpp" #include "extract.hpp"
#include "restrictions.hpp" #include "restrictions.hpp"
#include <cerrno>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <optional> #include <optional>
@@ -32,10 +31,12 @@ class parser {
constexpr static bool ignore_empty = setup<Options...>::ignore_empty; constexpr static bool ignore_empty = setup<Options...>::ignore_empty;
using header_splitter = ss::splitter<
ss::filter_not_t<ss::is_instance_of_multiline, Options...>>;
public: public:
parser(const std::string& file_name, parser(std::string file_name, std::string delim = ss::default_delimiter)
const std::string& delim = ss::default_delimiter) : file_name_{std::move(file_name)}, reader_{file_name_, delim} {
: file_name_{file_name}, reader_{file_name_, delim} {
if (reader_.file_) { if (reader_.file_) {
read_line(); read_line();
if constexpr (ignore_header) { if constexpr (ignore_header) {
@@ -51,7 +52,7 @@ public:
parser(const char* const csv_data_buffer, size_t csv_data_size, parser(const char* const csv_data_buffer, size_t csv_data_size,
const std::string& delim = ss::default_delimiter) const std::string& delim = ss::default_delimiter)
: file_name_{"buffer line"}, : file_name_{"CSV data buffer"},
reader_{csv_data_buffer, csv_data_size, delim} { reader_{csv_data_buffer, csv_data_size, delim} {
if (csv_data_buffer) { if (csv_data_buffer) {
read_line(); read_line();
@@ -68,6 +69,7 @@ public:
parser(parser&& other) = default; parser(parser&& other) = default;
parser& operator=(parser&& other) = default; parser& operator=(parser&& other) = default;
~parser() = default;
parser() = delete; parser() = delete;
parser(const parser& other) = delete; parser(const parser& other) = delete;
@@ -162,20 +164,49 @@ public:
return value; return value;
} }
std::string raw_header() const {
assert_ignore_header_not_defined();
return raw_header_;
}
std::vector<std::string> header() {
assert_ignore_header_not_defined();
clear_error();
header_splitter splitter;
std::string raw_header_copy = raw_header_;
if (!strict_split(splitter, raw_header_copy)) {
return {};
}
std::vector<std::string> split_header;
for (const auto& [begin, end] : splitter.split_data_) {
split_header.emplace_back(begin, end);
}
return split_header;
}
bool field_exists(const std::string& field) { bool field_exists(const std::string& field) {
assert_ignore_header_not_defined();
clear_error();
if (header_.empty()) { if (header_.empty()) {
split_header_data(); split_header_data();
} }
if (!valid()) {
return false;
}
return header_index(field).has_value(); return header_index(field).has_value();
} }
template <typename... Ts> template <typename... Ts>
void use_fields(const Ts&... fields_args) { void use_fields(const Ts&... fields_args) {
if constexpr (ignore_header) { assert_ignore_header_not_defined();
handle_error_header_ignored(); clear_error();
return;
}
if (header_.empty() && !eof()) { if (header_.empty() && !eof()) {
split_header_data(); split_header_data();
@@ -188,7 +219,7 @@ public:
auto fields = std::vector<std::string>{fields_args...}; auto fields = std::vector<std::string>{fields_args...};
if (fields.empty()) { if (fields.empty()) {
handle_error_empty_mapping(); handle_error_invalid_use_fields_argument();
return; return;
} }
@@ -237,6 +268,10 @@ public:
iterator(const iterator& other) = default; iterator(const iterator& other) = default;
iterator(iterator&& other) = default; iterator(iterator&& other) = default;
~iterator() = default;
iterator& operator=(const iterator& other) = delete;
iterator& operator=(iterator&& other) = delete;
value& operator*() { value& operator*() {
return value_; return value_;
@@ -261,8 +296,10 @@ public:
return *this; return *this;
} }
iterator& operator++(int) { iterator operator++(int) {
return ++*this; auto result = *this;
++*this;
return result;
} }
friend bool operator==(const iterator& lhs, const iterator& rhs) { friend bool operator==(const iterator& lhs, const iterator& rhs) {
@@ -326,7 +363,7 @@ public:
Fun&& fun = none{}) { Fun&& fun = none{}) {
using Value = no_void_validator_tup_t<Us...>; using Value = no_void_validator_tup_t<Us...>;
std::optional<Value> value; std::optional<Value> value;
try_convert_and_invoke<Value, Us...>(value, fun); try_convert_and_invoke<Value, Us...>(value, std::forward<Fun>(fun));
return composite_with(std::move(value)); return composite_with(std::move(value));
} }
@@ -335,7 +372,7 @@ public:
template <typename U, typename... Us, typename Fun = none> template <typename U, typename... Us, typename Fun = none>
composite<Ts..., std::optional<U>> or_object(Fun&& fun = none{}) { composite<Ts..., std::optional<U>> or_object(Fun&& fun = none{}) {
std::optional<U> value; std::optional<U> value;
try_convert_and_invoke<U, Us...>(value, fun); try_convert_and_invoke<U, Us...>(value, std::forward<Fun>(fun));
return composite_with(std::move(value)); return composite_with(std::move(value));
} }
@@ -443,7 +480,8 @@ private:
using Ret = decltype(try_invoke_impl(arg, std::forward<Fun>(fun))); using Ret = decltype(try_invoke_impl(arg, std::forward<Fun>(fun)));
constexpr bool returns_void = std::is_same_v<Ret, void>; constexpr bool returns_void = std::is_same_v<Ret, void>;
if constexpr (!returns_void) { if constexpr (!returns_void) {
if (!try_invoke_impl(arg, std::forward<Fun>(fun))) { if (!try_invoke_impl(std::forward<Arg>(arg),
std::forward<Fun>(fun))) {
handle_error_failed_check(); handle_error_failed_check();
} }
} else { } else {
@@ -478,22 +516,58 @@ private:
if (valid()) { if (valid()) {
try_invoke(*value, std::forward<Fun>(fun)); try_invoke(*value, std::forward<Fun>(fun));
} }
return {valid() ? std::move(value) : std::nullopt, *this}; return {valid() ? std::forward<T>(value) : std::nullopt, *this};
} }
//////////////// ////////////////
// header // header
//////////////// ////////////////
void assert_ignore_header_not_defined() const {
static_assert(!ignore_header,
"cannot use this method when 'ignore_header' is defined");
}
bool strict_split(header_splitter& splitter, std::string& header) {
if (header.empty()) {
return false;
}
if constexpr (throw_on_error) {
try {
splitter.split(header.data(), reader_.delim_);
} catch (const ss::exception& e) {
decorate_rethrow_invalid_header_split(e);
}
} else {
splitter.split(header.data(), reader_.delim_);
if (!splitter.valid()) {
handle_error_invalid_header_split(splitter);
return false;
}
}
return true;
}
void split_header_data() { void split_header_data() {
ss::splitter<Options...> splitter; header_splitter splitter;
std::string raw_header_copy = raw_header_; std::string raw_header_copy = raw_header_;
splitter.split(raw_header_copy.data(), reader_.delim_);
if (!strict_split(splitter, raw_header_copy)) {
return;
}
for (const auto& [begin, end] : splitter.split_data_) { for (const auto& [begin, end] : splitter.split_data_) {
std::string field{begin, end}; std::string field{begin, end};
if (field.empty()) {
handle_error_duplicate_header_field(field);
header_.clear();
return;
}
if (std::find(header_.begin(), header_.end(), field) != if (std::find(header_.begin(), header_.end(), field) !=
header_.end()) { header_.end()) {
handle_error_invalid_header(field); handle_error_duplicate_header_field(field);
header_.clear(); header_.clear();
return; return;
} }
@@ -524,7 +598,7 @@ private:
} }
void handle_error_failed_check() { void handle_error_failed_check() {
constexpr static auto error_msg = " failed check"; constexpr static auto error_msg = ": failed check";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -537,7 +611,7 @@ private:
} }
void handle_error_null_buffer() { void handle_error_null_buffer() {
constexpr static auto error_msg = " received null data buffer"; constexpr static auto error_msg = ": received null data buffer";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -550,7 +624,7 @@ private:
} }
void handle_error_file_not_open() { void handle_error_file_not_open() {
constexpr static auto error_msg = " could not be opened"; constexpr static auto error_msg = ": could not be opened";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -563,7 +637,7 @@ private:
} }
void handle_error_eof_reached() { void handle_error_eof_reached() {
constexpr static auto error_msg = " read on end of file"; constexpr static auto error_msg = ": read on end of file";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -588,20 +662,6 @@ private:
} }
} }
void handle_error_header_ignored() {
constexpr static auto error_msg =
": the header row is ignored within the setup it cannot be used";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg};
} else {
error_ = true;
}
}
void handle_error_invalid_field(const std::string& field) { void handle_error_invalid_field(const std::string& field) {
constexpr static auto error_msg = constexpr static auto error_msg =
": header does not contain given field: "; ": header does not contain given field: ";
@@ -629,8 +689,9 @@ private:
} }
} }
void handle_error_empty_mapping() { void handle_error_invalid_use_fields_argument() {
constexpr static auto error_msg = "received empty mapping"; constexpr static auto error_msg =
"received invalid argument for 'use_fields'";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -642,19 +703,53 @@ private:
} }
} }
void handle_error_invalid_header(const std::string& field) { void handle_error_invalid_header_field() {
constexpr static auto error_msg = "header contains duplicates: "; constexpr static auto error_msg = ": header contains empty field";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
error_.append(error_msg).append(error_msg); error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) { } else if constexpr (throw_on_error) {
throw ss::exception{error_msg + field}; throw ss::exception{file_name_ + error_msg};
} else { } else {
error_ = true; error_ = true;
} }
} }
void handle_error_duplicate_header_field(const std::string& field) {
constexpr static auto error_msg = ": header contains duplicate: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg).append(field);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg + field};
} else {
error_ = true;
}
}
void handle_error_invalid_header_split(const header_splitter& splitter) {
constexpr static auto error_msg = ": failed header parsing: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_)
.append(error_msg)
.append(splitter.error_msg());
} else {
error_ = true;
}
}
void decorate_rethrow_invalid_header_split(const ss::exception& e) const {
static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method");
throw ss::exception{std::string{file_name_}
.append(": failed header parsing: ")
.append(e.what())};
}
void decorate_rethrow(const ss::exception& e) const { void decorate_rethrow(const ss::exception& e) const {
static_assert(throw_on_error, static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method"); "throw_on_error needs to be enabled to use this method");
@@ -674,17 +769,18 @@ private:
} }
struct reader { struct reader {
reader(const std::string& file_name_, const std::string& delim) reader(const std::string& file_name_, std::string delim)
: delim_{delim}, file_{std::fopen(file_name_.c_str(), "rb")} { : delim_{std::move(delim)},
file_{std::fopen(file_name_.c_str(), "rb")} {
} }
reader(const char* const buffer, size_t csv_data_size, reader(const char* const buffer, size_t csv_data_size,
const std::string& delim) std::string delim)
: delim_{delim}, csv_data_buffer_{buffer}, : delim_{std::move(delim)}, csv_data_buffer_{buffer},
csv_data_size_{csv_data_size} { csv_data_size_{csv_data_size} {
} }
reader(reader&& other) reader(reader&& other) noexcept
: buffer_{other.buffer_}, : buffer_{other.buffer_},
next_line_buffer_{other.next_line_buffer_}, next_line_buffer_{other.next_line_buffer_},
helper_buffer_{other.helper_buffer_}, helper_buffer_{other.helper_buffer_},
@@ -705,7 +801,7 @@ private:
other.file_ = nullptr; other.file_ = nullptr;
} }
reader& operator=(reader&& other) { reader& operator=(reader&& other) noexcept {
if (this != &other) { if (this != &other) {
buffer_ = other.buffer_; buffer_ = other.buffer_;
next_line_buffer_ = other.next_line_buffer_; next_line_buffer_ = other.next_line_buffer_;
@@ -749,46 +845,9 @@ private:
reader(const reader& other) = delete; reader(const reader& other) = delete;
reader& operator=(const reader& other) = delete; reader& operator=(const reader& other) = delete;
ssize_t get_line_buffer(char** lineptr, size_t* n,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
if (curr_char >= csv_data_size) {
return -1;
}
if (*lineptr == nullptr || *n < get_line_initial_buffer_size) {
auto new_lineptr = static_cast<char*>(
strict_realloc(*lineptr, get_line_initial_buffer_size));
*lineptr = new_lineptr;
*n = get_line_initial_buffer_size;
}
size_t line_used = 0;
while (curr_char <= csv_data_size) {
if (line_used + 1 >= *n) {
size_t new_n = *n * 2;
char* new_lineptr =
static_cast<char*>(strict_realloc(*lineptr, new_n));
*n = new_n;
*lineptr = new_lineptr;
}
auto c = csv_data_buffer[curr_char++];
(*lineptr)[line_used++] = c;
if (c == '\n') {
(*lineptr)[line_used] = '\0';
return line_used;
}
}
return (line_used != 0) ? line_used : -1;
}
// read next line each time in order to set eof_ // read next line each time in order to set eof_
bool read_next() { bool read_next() {
next_line_converter_.clear_error(); next_line_converter_.clear_error();
ssize_t ssize = 0;
size_t size = 0; size_t size = 0;
while (size == 0) { while (size == 0) {
++line_number_; ++line_number_;
@@ -797,21 +856,11 @@ private:
} }
chars_read_ = curr_char_; chars_read_ = curr_char_;
if (file_) { auto [ssize, eof] =
ssize = get_line_file(&next_line_buffer_, get_line(next_line_buffer_, next_line_buffer_size_, file_,
&next_line_buffer_size_, file_); csv_data_buffer_, csv_data_size_, curr_char_);
curr_char_ = std::ftell(file_);
} else {
ssize = get_line_buffer(&next_line_buffer_,
&next_line_buffer_size_,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
if (ssize == -1) { if (eof) {
if (errno == ENOMEM) {
throw std::bad_alloc{};
}
return false; return false;
} }
@@ -836,7 +885,8 @@ private:
} }
if (!append_next_line_to_buffer(next_line_buffer_, if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) { next_line_size_,
next_line_buffer_size_)) {
next_line_converter_.handle_error_unterminated_escape(); next_line_converter_.handle_error_unterminated_escape();
return; return;
} }
@@ -854,7 +904,8 @@ private:
} }
if (!append_next_line_to_buffer(next_line_buffer_, if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) { next_line_size_,
next_line_buffer_size_)) {
next_line_converter_.handle_error_unterminated_quote(); next_line_converter_.handle_error_unterminated_quote();
return; return;
} }
@@ -865,8 +916,9 @@ private:
return; return;
} }
if (!append_next_line_to_buffer(next_line_buffer_, if (!append_next_line_to_buffer(
next_line_size_)) { next_line_buffer_, next_line_size_,
next_line_buffer_size_)) {
next_line_converter_ next_line_converter_
.handle_error_unterminated_escape(); .handle_error_unterminated_escape();
return; return;
@@ -897,7 +949,7 @@ private:
} }
bool escaped_eol(size_t size) { bool escaped_eol(size_t size) {
const char* curr; const char* curr = nullptr;
for (curr = next_line_buffer_ + size - 1; for (curr = next_line_buffer_ + size - 1;
curr >= next_line_buffer_ && curr >= next_line_buffer_ &&
setup<Options...>::escape::match(*curr); setup<Options...>::escape::match(*curr);
@@ -910,18 +962,20 @@ private:
return next_line_converter_.unterminated_quote(); return next_line_converter_.unterminated_quote();
} }
void undo_remove_eol(char* buffer, size_t& string_end) { void undo_remove_eol(char* buffer, size_t& line_size,
if (crlf_) { size_t buffer_size) {
std::copy_n("\r\n\0", 3, buffer + string_end); if (crlf_ && buffer_size >= line_size + 2) {
string_end += 2; std::copy_n("\r\n", 2, buffer + line_size);
} else { line_size += 2;
std::copy_n("\n\0", 2, buffer + string_end); } else if (buffer_size > line_size) {
string_end += 1; std::copy_n("\n", 1, buffer + line_size);
line_size += 1;
} }
} }
size_t remove_eol(char*& buffer, size_t ssize) { size_t remove_eol(char*& buffer, size_t ssize) {
if (buffer[ssize - 1] != '\n') { if (buffer[ssize - 1] != '\n') {
crlf_ = false;
return ssize; return ssize;
} }
@@ -941,7 +995,7 @@ private:
size_t& buffer_size, const char* const second, size_t& buffer_size, const char* const second,
size_t second_size) { size_t second_size) {
buffer_size = first_size + second_size + 3; buffer_size = first_size + second_size + 3;
auto new_first = static_cast<char*>( auto* new_first = static_cast<char*>(
strict_realloc(static_cast<void*>(first), buffer_size)); strict_realloc(static_cast<void*>(first), buffer_size));
first = new_first; first = new_first;
@@ -949,33 +1003,28 @@ private:
first_size += second_size; first_size += second_size;
} }
bool append_next_line_to_buffer(char*& buffer, size_t& size) { bool append_next_line_to_buffer(char*& buffer, size_t& line_size,
undo_remove_eol(buffer, size); size_t buffer_size) {
undo_remove_eol(buffer, line_size, buffer_size);
ssize_t next_ssize; chars_read_ = curr_char_;
if (file_) { auto [next_ssize, eof] =
next_ssize = get_line(helper_buffer_, helper_buffer_size, file_,
get_line_file(&helper_buffer_, &helper_buffer_size, file_); csv_data_buffer_, csv_data_size_, curr_char_);
} else {
next_ssize =
get_line_buffer(&helper_buffer_, &helper_buffer_size,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
if (next_ssize == -1) { if (eof) {
return false; return false;
} }
++line_number_; ++line_number_;
size_t next_size = remove_eol(helper_buffer_, next_ssize); size_t next_size = remove_eol(helper_buffer_, next_ssize);
realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_, realloc_concat(buffer, line_size, next_line_buffer_size_,
next_size); helper_buffer_, next_size);
return true; return true;
} }
std::string get_buffer() { std::string get_buffer() {
return std::string{next_line_buffer_, next_line_buffer_size_}; return std::string{next_line_buffer_, next_line_size_};
} }
//////////////// ////////////////
@@ -1018,4 +1067,4 @@ private:
bool eof_{false}; bool eof_{false};
}; };
} /* ss */ } /* namespace ss */

View File

@@ -124,4 +124,4 @@ struct ne {
} }
}; };
} /* ss */ } /* namespace ss */

View File

@@ -293,4 +293,7 @@ private:
template <typename... Options> template <typename... Options>
struct setup<setup<Options...>> : setup<Options...> {}; struct setup<setup<Options...>> : setup<Options...> {};
} /* ss */ template <typename... Options>
struct setup<std::tuple<Options...>> : setup<Options...> {};
} /* namespace ss */

View File

@@ -2,11 +2,9 @@
#include "common.hpp" #include "common.hpp"
#include "exception.hpp" #include "exception.hpp"
#include "setup.hpp" #include "setup.hpp"
#include "type_traits.hpp"
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
@@ -322,8 +320,9 @@ private:
trim_left_if_enabled(begin_); trim_left_if_enabled(begin_);
for (done_ = false; !done_; read(delim)) for (done_ = false; !done_;) {
; read(delim);
}
return split_data_; return split_data_;
} }
@@ -479,4 +478,4 @@ public:
friend class converter; friend class converter;
}; };
} /* ss */ } /* namespace ss */

View File

@@ -34,7 +34,11 @@ struct left_of_impl;
template <size_t N, typename T, typename... Ts> template <size_t N, typename T, typename... Ts>
struct left_of_impl { struct left_of_impl {
static_assert(N < 128, "recursion limit reached"); private:
constexpr static auto recursion_limit = 128;
public:
static_assert(N < recursion_limit, "recursion limit reached");
static_assert(N != 0, "cannot take the whole tuple"); static_assert(N != 0, "cannot take the whole tuple");
using type = tup_cat_t<T, typename left_of_impl<N - 1, Ts...>::type>; using type = tup_cat_t<T, typename left_of_impl<N - 1, Ts...>::type>;
}; };
@@ -378,4 +382,4 @@ T to_object(U&& data) {
} }
} }
} /* trait */ } /* namespace ss */

View File

@@ -6,7 +6,7 @@ project(
'cpp_std=c++17', 'cpp_std=c++17',
'buildtype=debugoptimized', 'buildtype=debugoptimized',
'wrap_mode=forcefallback'], 'wrap_mode=forcefallback'],
version: '1.7.0', version: '1.7.2',
meson_version:'>=0.54.0') meson_version:'>=0.54.0')
fast_float_dep = dependency('fast_float') fast_float_dep = dependency('fast_float')

View File

@@ -14,14 +14,21 @@ headers = ['type_traits.hpp',
combined_file = [] combined_file = []
includes = [] includes = []
in_pp_block = False
for header in headers: for header in headers:
with open(headers_dir + header) as f: with open(headers_dir + header) as f:
for line in f.read().splitlines(): for line in f.read().splitlines():
if '#if ' in line:
in_pp_block = True
if '#endif' in line:
in_pp_block = False
if '#include "' in line or '#include <fast_float' in line: if '#include "' in line or '#include <fast_float' in line:
continue continue
if '#include <' in line: if '#include <' in line and not in_pp_block:
includes.append(line) includes.append(line)
continue continue

485
ssp.hpp
View File

@@ -8,7 +8,6 @@
#include <cstring> #include <cstring>
#include <exception> #include <exception>
#include <functional> #include <functional>
#include <memory>
#include <optional> #include <optional>
#include <string> #include <string>
#include <string_view> #include <string_view>
@@ -50,7 +49,11 @@ struct left_of_impl;
template <size_t N, typename T, typename... Ts> template <size_t N, typename T, typename... Ts>
struct left_of_impl { struct left_of_impl {
static_assert(N < 128, "recursion limit reached"); private:
constexpr static auto recursion_limit = 128;
public:
static_assert(N < recursion_limit, "recursion limit reached");
static_assert(N != 0, "cannot take the whole tuple"); static_assert(N != 0, "cannot take the whole tuple");
using type = tup_cat_t<T, typename left_of_impl<N - 1, Ts...>::type>; using type = tup_cat_t<T, typename left_of_impl<N - 1, Ts...>::type>;
}; };
@@ -394,7 +397,7 @@ T to_object(U&& data) {
} }
} }
} /* trait */ } /* namespace ss */
namespace ss { namespace ss {
@@ -406,15 +409,15 @@ class exception : public std::exception {
std::string msg_; std::string msg_;
public: public:
exception(const std::string& msg): msg_{msg} { exception(std::string msg): msg_{std::move(msg)} {
} }
virtual char const* what() const noexcept { char const* what() const noexcept override {
return msg_.c_str(); return msg_.c_str();
} }
}; };
} /* ss */ } /* namespace ss */
namespace ss { namespace ss {
@@ -490,7 +493,7 @@ struct member_wrapper<R T::*> {
template <typename T> \ template <typename T> \
constexpr bool has_m_##method##_t = has_m_##method<T>::value; constexpr bool has_m_##method##_t = has_m_##method<T>::value;
} /* trait */ } /* namespace ss */
namespace ss { namespace ss {
@@ -616,7 +619,12 @@ struct ne {
} }
}; };
} /* ss */ } /* namespace ss */
#if !__unix__
#include <array>
#include <cstdint>
#endif
namespace ss { namespace ss {
@@ -629,13 +637,13 @@ constexpr inline auto default_delimiter = ",";
constexpr inline auto get_line_initial_buffer_size = 128; constexpr inline auto get_line_initial_buffer_size = 128;
template <bool StringError> template <bool StringError>
inline void assert_string_error_defined() { void assert_string_error_defined() {
static_assert(StringError, static_assert(StringError,
"'string_error' needs to be enabled to use 'error_msg'"); "'string_error' needs to be enabled to use 'error_msg'");
} }
template <bool ThrowOnError> template <bool ThrowOnError>
inline void assert_throw_on_error_not_defined() { void assert_throw_on_error_not_defined() {
static_assert(!ThrowOnError, "cannot handle errors manually if " static_assert(!ThrowOnError, "cannot handle errors manually if "
"'throw_on_error' is enabled"); "'throw_on_error' is enabled");
} }
@@ -650,45 +658,40 @@ inline void* strict_realloc(void* ptr, size_t size) {
} }
#if __unix__ #if __unix__
inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) { inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
return getline(lineptr, n, stream); return getline(&lineptr, &n, file);
} }
#else #else
using ssize_t = intptr_t; using ssize_t = intptr_t;
ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) { inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
if (lineptr == nullptr || n == nullptr || fp == nullptr) { std::array<char, get_line_initial_buffer_size> buff;
errno = EINVAL;
return -1;
}
char buff[get_line_initial_buffer_size]; if (lineptr == nullptr || n < sizeof(buff)) {
if (*lineptr == nullptr || *n < sizeof(buff)) {
size_t new_n = sizeof(buff); size_t new_n = sizeof(buff);
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n)); lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
*n = new_n; n = new_n;
} }
(*lineptr)[0] = '\0'; lineptr[0] = '\0';
size_t line_used = 0; size_t line_used = 0;
while (std::fgets(buff, sizeof(buff), fp) != nullptr) { while (std::fgets(buff.data(), sizeof(buff), file) != nullptr) {
line_used = std::strlen(*lineptr); line_used = std::strlen(lineptr);
size_t buff_used = std::strlen(buff); size_t buff_used = std::strlen(buff.data());
if (*n <= buff_used + line_used) { if (n <= buff_used + line_used) {
size_t new_n = *n * 2; size_t new_n = n * 2;
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n)); lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
*n = new_n; n = new_n;
} }
std::memcpy(*lineptr + line_used, buff, buff_used); std::memcpy(lineptr + line_used, buff.data(), buff_used);
line_used += buff_used; line_used += buff_used;
(*lineptr)[line_used] = '\0'; lineptr[line_used] = '\0';
if ((*lineptr)[line_used - 1] == '\n') { if (lineptr[line_used - 1] == '\n') {
return line_used; return line_used;
} }
} }
@@ -698,7 +701,67 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
#endif #endif
} /* ss */ inline ssize_t get_line_buffer(char*& lineptr, size_t& n,
const char* const csv_data_buffer, size_t csv_data_size,
size_t& curr_char) {
if (curr_char >= csv_data_size) {
return -1;
}
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
auto* new_lineptr = static_cast<char*>(
strict_realloc(lineptr, get_line_initial_buffer_size));
lineptr = new_lineptr;
n = get_line_initial_buffer_size;
}
size_t line_used = 0;
while (curr_char < csv_data_size) {
if (line_used + 1 >= n) {
size_t new_n = n * 2;
char* new_lineptr =
static_cast<char*>(strict_realloc(lineptr, new_n));
n = new_n;
lineptr = new_lineptr;
}
auto c = csv_data_buffer[curr_char++];
lineptr[line_used++] = c;
if (c == '\n') {
lineptr[line_used] = '\0';
return line_used;
}
}
lineptr[line_used] = '\0';
return line_used;
}
inline std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
FILE* file,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
ssize_t ssize = 0;
if (file) {
ssize = get_line_file(buffer, buffer_size, file);
curr_char += ssize;
} else {
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
csv_data_size, curr_char);
}
if (ssize == -1) {
if (errno == ENOMEM) {
throw std::bad_alloc{};
}
return {ssize, true};
}
return {ssize, false};
}
} /* namespace ss */
namespace ss { namespace ss {
@@ -991,7 +1054,10 @@ private:
template <typename... Options> template <typename... Options>
struct setup<setup<Options...>> : setup<Options...> {}; struct setup<setup<Options...>> : setup<Options...> {};
} /* ss */ template <typename... Options>
struct setup<std::tuple<Options...>> : setup<Options...> {};
} /* namespace ss */
namespace ss { namespace ss {
@@ -1305,8 +1371,9 @@ private:
trim_left_if_enabled(begin_); trim_left_if_enabled(begin_);
for (done_ = false; !done_; read(delim)) for (done_ = false; !done_;) {
; read(delim);
}
return split_data_; return split_data_;
} }
@@ -1462,7 +1529,7 @@ public:
friend class converter; friend class converter;
}; };
} /* ss */ } /* namespace ss */
#ifndef SSP_DISABLE_FAST_FLOAT #ifndef SSP_DISABLE_FAST_FLOAT
@@ -1498,16 +1565,17 @@ std::enable_if_t<std::is_floating_point_v<T>, std::optional<T>> to_num(
"Conversion to long double is disabled"); "Conversion to long double is disabled");
constexpr static auto buff_max = 64; constexpr static auto buff_max = 64;
char short_buff[buff_max]; std::array<char, buff_max> short_buff;
size_t string_range = std::distance(begin, end); size_t string_range = std::distance(begin, end);
std::string long_buff; std::string long_buff;
char* buff; char* buff = nullptr;
if (string_range > buff_max) { if (string_range > buff_max) {
long_buff = std::string{begin, end}; long_buff = std::string{begin, end};
buff = long_buff.data(); buff = long_buff.data();
} else { } else {
buff = short_buff; buff = short_buff.data();
buff[string_range] = '\0'; buff[string_range] = '\0';
std::copy_n(begin, string_range, buff); std::copy_n(begin, string_range, buff);
} }
@@ -1545,6 +1613,8 @@ struct numeric_wrapper {
numeric_wrapper& operator=(numeric_wrapper&&) = default; numeric_wrapper& operator=(numeric_wrapper&&) = default;
numeric_wrapper& operator=(const numeric_wrapper&) = default; numeric_wrapper& operator=(const numeric_wrapper&) = default;
~numeric_wrapper() = default;
numeric_wrapper(T other) : value{other} { numeric_wrapper(T other) : value{other} {
} }
@@ -1595,7 +1665,7 @@ template <typename T>
struct unsupported_type { struct unsupported_type {
constexpr static bool value = false; constexpr static bool value = false;
}; };
} /* namespace */ } /* namespace errors */
template <typename T> template <typename T>
std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> && std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> &&
@@ -1668,10 +1738,13 @@ inline bool extract(const char* begin, const char* end, bool& value) {
return false; return false;
} }
} else { } else {
constexpr static auto true_size = 4;
constexpr static auto false_size = 5;
size_t size = end - begin; size_t size = end - begin;
if (size == 4 && std::strncmp(begin, "true", size) == 0) { if (size == true_size && std::strncmp(begin, "true", size) == 0) {
value = true; value = true;
} else if (size == 5 && std::strncmp(begin, "false", size) == 0) { } else if (size == false_size &&
std::strncmp(begin, "false", size) == 0) {
value = false; value = false;
} else { } else {
return false; return false;
@@ -1700,7 +1773,7 @@ inline bool extract(const char* begin, const char* end,
return true; return true;
} }
} /* ss */ } /* namespace ss */
namespace ss { namespace ss {
INIT_HAS_METHOD(tied) INIT_HAS_METHOD(tied)
@@ -1843,7 +1916,7 @@ public:
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) { if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
return convert_impl(elems, static_cast<T*>(nullptr)); return convert_impl(elems, static_cast<T*>(nullptr));
} else if constexpr (tied_class_v<T, Ts...>) { } else if constexpr (tied_class_v<T, Ts...>) {
using arg_ref_tuple = std::result_of_t<decltype (&T::tied)(T)>; using arg_ref_tuple = std::invoke_result_t<decltype(&T::tied), T>;
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>; using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
return to_object<T>( return to_object<T>(
@@ -1917,8 +1990,9 @@ private:
} }
std::string error_sufix(const string_range msg, size_t pos) const { std::string error_sufix(const string_range msg, size_t pos) const {
constexpr static auto reserve_size = 32;
std::string error; std::string error;
error.reserve(32); error.reserve(reserve_size);
error.append("at column ") error.append("at column ")
.append(std::to_string(pos + 1)) .append(std::to_string(pos + 1))
.append(": \'") .append(": \'")
@@ -1962,6 +2036,7 @@ private:
void handle_error_multiline_limit_reached() { void handle_error_multiline_limit_reached() {
constexpr static auto error_msg = "multiline limit reached"; constexpr static auto error_msg = "multiline limit reached";
splitter_.unterminated_quote_ = false;
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -2083,7 +2158,7 @@ private:
//////////////// ////////////////
bool columns_mapped() const { bool columns_mapped() const {
return column_mappings_.size() != 0; return !column_mappings_.empty();
} }
size_t column_position(size_t tuple_position) const { size_t column_position(size_t tuple_position) const {
@@ -2096,7 +2171,7 @@ private:
// assumes positions are valid and the vector is not empty // assumes positions are valid and the vector is not empty
void set_column_mapping(std::vector<size_t> positions, void set_column_mapping(std::vector<size_t> positions,
size_t number_of_columns) { size_t number_of_columns) {
column_mappings_ = positions; column_mappings_ = std::move(positions);
number_of_columns_ = number_of_columns; number_of_columns_ = number_of_columns;
} }
@@ -2182,10 +2257,10 @@ private:
friend class parser; friend class parser;
std::vector<size_t> column_mappings_; std::vector<size_t> column_mappings_;
size_t number_of_columns_; size_t number_of_columns_{0};
}; };
} /* ss */ } /* namespace ss */
namespace ss { namespace ss {
@@ -2208,10 +2283,12 @@ class parser {
constexpr static bool ignore_empty = setup<Options...>::ignore_empty; constexpr static bool ignore_empty = setup<Options...>::ignore_empty;
using header_splitter = ss::splitter<
ss::filter_not_t<ss::is_instance_of_multiline, Options...>>;
public: public:
parser(const std::string& file_name, parser(std::string file_name, std::string delim = ss::default_delimiter)
const std::string& delim = ss::default_delimiter) : file_name_{std::move(file_name)}, reader_{file_name_, delim} {
: file_name_{file_name}, reader_{file_name_, delim} {
if (reader_.file_) { if (reader_.file_) {
read_line(); read_line();
if constexpr (ignore_header) { if constexpr (ignore_header) {
@@ -2227,7 +2304,7 @@ public:
parser(const char* const csv_data_buffer, size_t csv_data_size, parser(const char* const csv_data_buffer, size_t csv_data_size,
const std::string& delim = ss::default_delimiter) const std::string& delim = ss::default_delimiter)
: file_name_{"buffer line"}, : file_name_{"CSV data buffer"},
reader_{csv_data_buffer, csv_data_size, delim} { reader_{csv_data_buffer, csv_data_size, delim} {
if (csv_data_buffer) { if (csv_data_buffer) {
read_line(); read_line();
@@ -2244,6 +2321,7 @@ public:
parser(parser&& other) = default; parser(parser&& other) = default;
parser& operator=(parser&& other) = default; parser& operator=(parser&& other) = default;
~parser() = default;
parser() = delete; parser() = delete;
parser(const parser& other) = delete; parser(const parser& other) = delete;
@@ -2338,20 +2416,49 @@ public:
return value; return value;
} }
std::string raw_header() const {
assert_ignore_header_not_defined();
return raw_header_;
}
std::vector<std::string> header() {
assert_ignore_header_not_defined();
clear_error();
header_splitter splitter;
std::string raw_header_copy = raw_header_;
if (!strict_split(splitter, raw_header_copy)) {
return {};
}
std::vector<std::string> split_header;
for (const auto& [begin, end] : splitter.split_data_) {
split_header.emplace_back(begin, end);
}
return split_header;
}
bool field_exists(const std::string& field) { bool field_exists(const std::string& field) {
assert_ignore_header_not_defined();
clear_error();
if (header_.empty()) { if (header_.empty()) {
split_header_data(); split_header_data();
} }
if (!valid()) {
return false;
}
return header_index(field).has_value(); return header_index(field).has_value();
} }
template <typename... Ts> template <typename... Ts>
void use_fields(const Ts&... fields_args) { void use_fields(const Ts&... fields_args) {
if constexpr (ignore_header) { assert_ignore_header_not_defined();
handle_error_header_ignored(); clear_error();
return;
}
if (header_.empty() && !eof()) { if (header_.empty() && !eof()) {
split_header_data(); split_header_data();
@@ -2364,7 +2471,7 @@ public:
auto fields = std::vector<std::string>{fields_args...}; auto fields = std::vector<std::string>{fields_args...};
if (fields.empty()) { if (fields.empty()) {
handle_error_empty_mapping(); handle_error_invalid_use_fields_argument();
return; return;
} }
@@ -2413,6 +2520,10 @@ public:
iterator(const iterator& other) = default; iterator(const iterator& other) = default;
iterator(iterator&& other) = default; iterator(iterator&& other) = default;
~iterator() = default;
iterator& operator=(const iterator& other) = delete;
iterator& operator=(iterator&& other) = delete;
value& operator*() { value& operator*() {
return value_; return value_;
@@ -2437,8 +2548,10 @@ public:
return *this; return *this;
} }
iterator& operator++(int) { iterator operator++(int) {
return ++*this; auto result = *this;
++*this;
return result;
} }
friend bool operator==(const iterator& lhs, const iterator& rhs) { friend bool operator==(const iterator& lhs, const iterator& rhs) {
@@ -2502,7 +2615,7 @@ public:
Fun&& fun = none{}) { Fun&& fun = none{}) {
using Value = no_void_validator_tup_t<Us...>; using Value = no_void_validator_tup_t<Us...>;
std::optional<Value> value; std::optional<Value> value;
try_convert_and_invoke<Value, Us...>(value, fun); try_convert_and_invoke<Value, Us...>(value, std::forward<Fun>(fun));
return composite_with(std::move(value)); return composite_with(std::move(value));
} }
@@ -2511,7 +2624,7 @@ public:
template <typename U, typename... Us, typename Fun = none> template <typename U, typename... Us, typename Fun = none>
composite<Ts..., std::optional<U>> or_object(Fun&& fun = none{}) { composite<Ts..., std::optional<U>> or_object(Fun&& fun = none{}) {
std::optional<U> value; std::optional<U> value;
try_convert_and_invoke<U, Us...>(value, fun); try_convert_and_invoke<U, Us...>(value, std::forward<Fun>(fun));
return composite_with(std::move(value)); return composite_with(std::move(value));
} }
@@ -2619,7 +2732,8 @@ private:
using Ret = decltype(try_invoke_impl(arg, std::forward<Fun>(fun))); using Ret = decltype(try_invoke_impl(arg, std::forward<Fun>(fun)));
constexpr bool returns_void = std::is_same_v<Ret, void>; constexpr bool returns_void = std::is_same_v<Ret, void>;
if constexpr (!returns_void) { if constexpr (!returns_void) {
if (!try_invoke_impl(arg, std::forward<Fun>(fun))) { if (!try_invoke_impl(std::forward<Arg>(arg),
std::forward<Fun>(fun))) {
handle_error_failed_check(); handle_error_failed_check();
} }
} else { } else {
@@ -2654,22 +2768,58 @@ private:
if (valid()) { if (valid()) {
try_invoke(*value, std::forward<Fun>(fun)); try_invoke(*value, std::forward<Fun>(fun));
} }
return {valid() ? std::move(value) : std::nullopt, *this}; return {valid() ? std::forward<T>(value) : std::nullopt, *this};
} }
//////////////// ////////////////
// header // header
//////////////// ////////////////
void assert_ignore_header_not_defined() const {
static_assert(!ignore_header,
"cannot use this method when 'ignore_header' is defined");
}
bool strict_split(header_splitter& splitter, std::string& header) {
if (header.empty()) {
return false;
}
if constexpr (throw_on_error) {
try {
splitter.split(header.data(), reader_.delim_);
} catch (const ss::exception& e) {
decorate_rethrow_invalid_header_split(e);
}
} else {
splitter.split(header.data(), reader_.delim_);
if (!splitter.valid()) {
handle_error_invalid_header_split(splitter);
return false;
}
}
return true;
}
void split_header_data() { void split_header_data() {
ss::splitter<Options...> splitter; header_splitter splitter;
std::string raw_header_copy = raw_header_; std::string raw_header_copy = raw_header_;
splitter.split(raw_header_copy.data(), reader_.delim_);
if (!strict_split(splitter, raw_header_copy)) {
return;
}
for (const auto& [begin, end] : splitter.split_data_) { for (const auto& [begin, end] : splitter.split_data_) {
std::string field{begin, end}; std::string field{begin, end};
if (field.empty()) {
handle_error_duplicate_header_field(field);
header_.clear();
return;
}
if (std::find(header_.begin(), header_.end(), field) != if (std::find(header_.begin(), header_.end(), field) !=
header_.end()) { header_.end()) {
handle_error_invalid_header(field); handle_error_duplicate_header_field(field);
header_.clear(); header_.clear();
return; return;
} }
@@ -2700,7 +2850,7 @@ private:
} }
void handle_error_failed_check() { void handle_error_failed_check() {
constexpr static auto error_msg = " failed check"; constexpr static auto error_msg = ": failed check";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -2713,7 +2863,7 @@ private:
} }
void handle_error_null_buffer() { void handle_error_null_buffer() {
constexpr static auto error_msg = " received null data buffer"; constexpr static auto error_msg = ": received null data buffer";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -2726,7 +2876,7 @@ private:
} }
void handle_error_file_not_open() { void handle_error_file_not_open() {
constexpr static auto error_msg = " could not be opened"; constexpr static auto error_msg = ": could not be opened";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -2739,7 +2889,7 @@ private:
} }
void handle_error_eof_reached() { void handle_error_eof_reached() {
constexpr static auto error_msg = " read on end of file"; constexpr static auto error_msg = ": read on end of file";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -2764,20 +2914,6 @@ private:
} }
} }
void handle_error_header_ignored() {
constexpr static auto error_msg =
": the header row is ignored within the setup it cannot be used";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg};
} else {
error_ = true;
}
}
void handle_error_invalid_field(const std::string& field) { void handle_error_invalid_field(const std::string& field) {
constexpr static auto error_msg = constexpr static auto error_msg =
": header does not contain given field: "; ": header does not contain given field: ";
@@ -2805,8 +2941,9 @@ private:
} }
} }
void handle_error_empty_mapping() { void handle_error_invalid_use_fields_argument() {
constexpr static auto error_msg = "received empty mapping"; constexpr static auto error_msg =
"received invalid argument for 'use_fields'";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
@@ -2818,19 +2955,53 @@ private:
} }
} }
void handle_error_invalid_header(const std::string& field) { void handle_error_invalid_header_field() {
constexpr static auto error_msg = "header contains duplicates: "; constexpr static auto error_msg = ": header contains empty field";
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();
error_.append(error_msg).append(error_msg); error_.append(file_name_).append(error_msg);
} else if constexpr (throw_on_error) { } else if constexpr (throw_on_error) {
throw ss::exception{error_msg + field}; throw ss::exception{file_name_ + error_msg};
} else { } else {
error_ = true; error_ = true;
} }
} }
void handle_error_duplicate_header_field(const std::string& field) {
constexpr static auto error_msg = ": header contains duplicate: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_).append(error_msg).append(field);
} else if constexpr (throw_on_error) {
throw ss::exception{file_name_ + error_msg + field};
} else {
error_ = true;
}
}
void handle_error_invalid_header_split(const header_splitter& splitter) {
constexpr static auto error_msg = ": failed header parsing: ";
if constexpr (string_error) {
error_.clear();
error_.append(file_name_)
.append(error_msg)
.append(splitter.error_msg());
} else {
error_ = true;
}
}
void decorate_rethrow_invalid_header_split(const ss::exception& e) const {
static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method");
throw ss::exception{std::string{file_name_}
.append(": failed header parsing: ")
.append(e.what())};
}
void decorate_rethrow(const ss::exception& e) const { void decorate_rethrow(const ss::exception& e) const {
static_assert(throw_on_error, static_assert(throw_on_error,
"throw_on_error needs to be enabled to use this method"); "throw_on_error needs to be enabled to use this method");
@@ -2850,17 +3021,18 @@ private:
} }
struct reader { struct reader {
reader(const std::string& file_name_, const std::string& delim) reader(const std::string& file_name_, std::string delim)
: delim_{delim}, file_{std::fopen(file_name_.c_str(), "rb")} { : delim_{std::move(delim)},
file_{std::fopen(file_name_.c_str(), "rb")} {
} }
reader(const char* const buffer, size_t csv_data_size, reader(const char* const buffer, size_t csv_data_size,
const std::string& delim) std::string delim)
: delim_{delim}, csv_data_buffer_{buffer}, : delim_{std::move(delim)}, csv_data_buffer_{buffer},
csv_data_size_{csv_data_size} { csv_data_size_{csv_data_size} {
} }
reader(reader&& other) reader(reader&& other) noexcept
: buffer_{other.buffer_}, : buffer_{other.buffer_},
next_line_buffer_{other.next_line_buffer_}, next_line_buffer_{other.next_line_buffer_},
helper_buffer_{other.helper_buffer_}, helper_buffer_{other.helper_buffer_},
@@ -2881,7 +3053,7 @@ private:
other.file_ = nullptr; other.file_ = nullptr;
} }
reader& operator=(reader&& other) { reader& operator=(reader&& other) noexcept {
if (this != &other) { if (this != &other) {
buffer_ = other.buffer_; buffer_ = other.buffer_;
next_line_buffer_ = other.next_line_buffer_; next_line_buffer_ = other.next_line_buffer_;
@@ -2925,46 +3097,9 @@ private:
reader(const reader& other) = delete; reader(const reader& other) = delete;
reader& operator=(const reader& other) = delete; reader& operator=(const reader& other) = delete;
ssize_t get_line_buffer(char** lineptr, size_t* n,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
if (curr_char >= csv_data_size) {
return -1;
}
if (*lineptr == nullptr || *n < get_line_initial_buffer_size) {
auto new_lineptr = static_cast<char*>(
strict_realloc(*lineptr, get_line_initial_buffer_size));
*lineptr = new_lineptr;
*n = get_line_initial_buffer_size;
}
size_t line_used = 0;
while (curr_char <= csv_data_size) {
if (line_used + 1 >= *n) {
size_t new_n = *n * 2;
char* new_lineptr =
static_cast<char*>(strict_realloc(*lineptr, new_n));
*n = new_n;
*lineptr = new_lineptr;
}
auto c = csv_data_buffer[curr_char++];
(*lineptr)[line_used++] = c;
if (c == '\n') {
(*lineptr)[line_used] = '\0';
return line_used;
}
}
return (line_used != 0) ? line_used : -1;
}
// read next line each time in order to set eof_ // read next line each time in order to set eof_
bool read_next() { bool read_next() {
next_line_converter_.clear_error(); next_line_converter_.clear_error();
ssize_t ssize = 0;
size_t size = 0; size_t size = 0;
while (size == 0) { while (size == 0) {
++line_number_; ++line_number_;
@@ -2973,21 +3108,11 @@ private:
} }
chars_read_ = curr_char_; chars_read_ = curr_char_;
if (file_) { auto [ssize, eof] =
ssize = get_line_file(&next_line_buffer_, get_line(next_line_buffer_, next_line_buffer_size_, file_,
&next_line_buffer_size_, file_); csv_data_buffer_, csv_data_size_, curr_char_);
curr_char_ = std::ftell(file_);
} else {
ssize = get_line_buffer(&next_line_buffer_,
&next_line_buffer_size_,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
if (ssize == -1) { if (eof) {
if (errno == ENOMEM) {
throw std::bad_alloc{};
}
return false; return false;
} }
@@ -3012,7 +3137,8 @@ private:
} }
if (!append_next_line_to_buffer(next_line_buffer_, if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) { next_line_size_,
next_line_buffer_size_)) {
next_line_converter_.handle_error_unterminated_escape(); next_line_converter_.handle_error_unterminated_escape();
return; return;
} }
@@ -3030,7 +3156,8 @@ private:
} }
if (!append_next_line_to_buffer(next_line_buffer_, if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) { next_line_size_,
next_line_buffer_size_)) {
next_line_converter_.handle_error_unterminated_quote(); next_line_converter_.handle_error_unterminated_quote();
return; return;
} }
@@ -3041,8 +3168,9 @@ private:
return; return;
} }
if (!append_next_line_to_buffer(next_line_buffer_, if (!append_next_line_to_buffer(
next_line_size_)) { next_line_buffer_, next_line_size_,
next_line_buffer_size_)) {
next_line_converter_ next_line_converter_
.handle_error_unterminated_escape(); .handle_error_unterminated_escape();
return; return;
@@ -3073,7 +3201,7 @@ private:
} }
bool escaped_eol(size_t size) { bool escaped_eol(size_t size) {
const char* curr; const char* curr = nullptr;
for (curr = next_line_buffer_ + size - 1; for (curr = next_line_buffer_ + size - 1;
curr >= next_line_buffer_ && curr >= next_line_buffer_ &&
setup<Options...>::escape::match(*curr); setup<Options...>::escape::match(*curr);
@@ -3086,18 +3214,20 @@ private:
return next_line_converter_.unterminated_quote(); return next_line_converter_.unterminated_quote();
} }
void undo_remove_eol(char* buffer, size_t& string_end) { void undo_remove_eol(char* buffer, size_t& line_size,
if (crlf_) { size_t buffer_size) {
std::copy_n("\r\n\0", 3, buffer + string_end); if (crlf_ && buffer_size >= line_size + 2) {
string_end += 2; std::copy_n("\r\n", 2, buffer + line_size);
} else { line_size += 2;
std::copy_n("\n\0", 2, buffer + string_end); } else if (buffer_size > line_size) {
string_end += 1; std::copy_n("\n", 1, buffer + line_size);
line_size += 1;
} }
} }
size_t remove_eol(char*& buffer, size_t ssize) { size_t remove_eol(char*& buffer, size_t ssize) {
if (buffer[ssize - 1] != '\n') { if (buffer[ssize - 1] != '\n') {
crlf_ = false;
return ssize; return ssize;
} }
@@ -3117,7 +3247,7 @@ private:
size_t& buffer_size, const char* const second, size_t& buffer_size, const char* const second,
size_t second_size) { size_t second_size) {
buffer_size = first_size + second_size + 3; buffer_size = first_size + second_size + 3;
auto new_first = static_cast<char*>( auto* new_first = static_cast<char*>(
strict_realloc(static_cast<void*>(first), buffer_size)); strict_realloc(static_cast<void*>(first), buffer_size));
first = new_first; first = new_first;
@@ -3125,33 +3255,28 @@ private:
first_size += second_size; first_size += second_size;
} }
bool append_next_line_to_buffer(char*& buffer, size_t& size) { bool append_next_line_to_buffer(char*& buffer, size_t& line_size,
undo_remove_eol(buffer, size); size_t buffer_size) {
undo_remove_eol(buffer, line_size, buffer_size);
ssize_t next_ssize; chars_read_ = curr_char_;
if (file_) { auto [next_ssize, eof] =
next_ssize = get_line(helper_buffer_, helper_buffer_size, file_,
get_line_file(&helper_buffer_, &helper_buffer_size, file_); csv_data_buffer_, csv_data_size_, curr_char_);
} else {
next_ssize =
get_line_buffer(&helper_buffer_, &helper_buffer_size,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
if (next_ssize == -1) { if (eof) {
return false; return false;
} }
++line_number_; ++line_number_;
size_t next_size = remove_eol(helper_buffer_, next_ssize); size_t next_size = remove_eol(helper_buffer_, next_ssize);
realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_, realloc_concat(buffer, line_size, next_line_buffer_size_,
next_size); helper_buffer_, next_size);
return true; return true;
} }
std::string get_buffer() { std::string get_buffer() {
return std::string{next_line_buffer_, next_line_buffer_size_}; return std::string{next_line_buffer_, next_line_size_};
} }
//////////////// ////////////////
@@ -3194,4 +3319,4 @@ private:
bool eof_{false}; bool eof_{false};
}; };
} /* ss */ } /* namespace ss */

View File

@@ -23,7 +23,7 @@ struct is_unsigned : public std::is_unsigned<T> {};
template <> template <>
struct is_unsigned<ss::uint8> : public std::true_type {}; struct is_unsigned<ss::uint8> : public std::true_type {};
} /* namespace */ } /* anonymous namespace */
static_assert(is_signed<ss::int8>::value); static_assert(is_signed<ss::int8>::value);
static_assert(is_unsigned<ss::uint8>::value); static_assert(is_unsigned<ss::uint8>::value);

View File

@@ -19,7 +19,7 @@
namespace ss { namespace ss {
template <typename... Ts> template <typename... Ts>
class parser; class parser;
} /* ss */ } /* namespace ss */
namespace { namespace {
@@ -145,6 +145,17 @@ struct unique_file_name {
CHECK_FALSE(std::string{e.what()}.empty()); \ CHECK_FALSE(std::string{e.what()}.empty()); \
} }
#define CHECK_EQ_ARRAY(first, second) \
{ \
const auto& first_ = (first); \
const auto& second_ = (second); \
CHECK_EQ(first_.size(), second_.size()); \
for (size_t i_ = 0; i_ < std::min(first_.size(), second_.size()); \
++i_) { \
CHECK_EQ(first_[i_], second_[i_]); \
} \
}
template <typename T> template <typename T>
[[maybe_unused]] std::vector<std::vector<T>> vector_combinations( [[maybe_unused]] std::vector<std::vector<T>> vector_combinations(
const std::vector<T>& v, size_t n) { const std::vector<T>& v, size_t n) {
@@ -166,6 +177,22 @@ template <typename T>
return ret; return ret;
} }
[[maybe_unused]] std::string merge_header(
const std::vector<std::string>& header,
const std::string& delimiter = ss::default_delimiter) {
std::string s;
if (!header.empty()) {
for (const auto& i : header) {
s.append(i);
s.append(delimiter);
}
for (size_t i = 0; i < delimiter.size(); ++i) {
s.pop_back();
}
}
return s;
};
[[maybe_unused]] std::string make_buffer(const std::string& file_name) { [[maybe_unused]] std::string make_buffer(const std::string& file_name) {
std::ifstream in{file_name, std::ios::binary}; std::ifstream in{file_name, std::ios::binary};
std::string tmp; std::string tmp;
@@ -224,4 +251,4 @@ make_parser(const std::string& file_name,
return make_parser_impl<buffer_mode, Ts...>(file_name, delim); return make_parser_impl<buffer_mode, Ts...>(file_name, delim);
} }
} /* namespace */ } /* anonymous namespace */

View File

@@ -28,6 +28,7 @@ void expect_error_on_command(ss::parser<Ts...>& p,
if (ss::setup<Ts...>::throw_on_error) { if (ss::setup<Ts...>::throw_on_error) {
try { try {
command(); command();
FAIL("expected exception");
} catch (const std::exception& e) { } catch (const std::exception& e) {
CHECK_FALSE(std::string{e.what()}.empty()); CHECK_FALSE(std::string{e.what()}.empty());
} }
@@ -109,4 +110,4 @@ static void make_and_write(const std::string& file_name,
} }
} }
} /* namespace */ } /* anonymous namespace */

View File

@@ -16,13 +16,14 @@ TEST_CASE_TEMPLATE("test multiline restricted", T, ParserOptionCombinations) {
out << "5,6,just\\\n\\\nstrings" << std::endl; out << "5,6,just\\\n\\\nstrings" << std::endl;
#endif #endif
out << "7,8,ju\\\n\\\n\\\nnk" << std::endl; out << "7,8,ju\\\n\\\n\\\nnk" << std::endl;
out << "99,100,\"\n\n\n\n" << std::endl;
out << "9,10,\"just\\\n\nstrings\"" << std::endl; out << "9,10,\"just\\\n\nstrings\"" << std::endl;
out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl; out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl;
out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl; out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl;
out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl; out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl;
out << "19,20,just strings" << std::endl; out << "19,20,just strings" << std::endl;
} }
auto bad_lines = 15; auto bad_lines = 20;
auto num_errors = 0; auto num_errors = 0;
auto [p, _] = auto [p, _] =

View File

@@ -9,6 +9,7 @@ struct has_type<T, std::tuple<Us...>>
template <typename T, typename... Ts> template <typename T, typename... Ts>
static void test_fields(const std::string file_name, const std::vector<X>& data, static void test_fields(const std::string file_name, const std::vector<X>& data,
const std::vector<std::string>& header,
const std::vector<std::string>& fields) { const std::vector<std::string>& fields) {
constexpr auto buffer_mode = T::BufferMode::value; constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode; using ErrorMode = typename T::ErrorMode;
@@ -17,9 +18,14 @@ static void test_fields(const std::string file_name, const std::vector<X>& data,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(file_name, ","); auto [p, _] = make_parser<buffer_mode, ErrorMode>(file_name, ",");
CHECK_FALSE(p.field_exists("Unknown")); CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(fields); p.use_fields(fields);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
std::vector<CaseType> i; std::vector<CaseType> i;
for (const auto& a : p.template iterate<CaseType>()) { for (const auto& a : p.template iterate<CaseType>()) {
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.push_back(a); i.push_back(a);
} }
@@ -40,12 +46,12 @@ static void test_fields(const std::string file_name, const std::vector<X>& data,
TEST_CASE_TEMPLATE("test various cases with header", T, TEST_CASE_TEMPLATE("test various cases with header", T,
ParserOptionCombinations) { ParserOptionCombinations) {
unique_file_name f{"various_cases_with_header"}; unique_file_name f{"various_cases_with_header"};
using str = std::string;
constexpr static auto Int = "Int"; constexpr static auto Int = "Int";
constexpr static auto Dbl = "Double"; constexpr static auto Dbl = "Double";
constexpr static auto Str = "String"; constexpr static auto Str = "String";
using str = std::string; const std::vector<std::string> header{Int, Dbl, Str};
std::vector<std::string> header{Int, Dbl, Str};
std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, std::vector<X> data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"},
{7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}};
@@ -59,6 +65,8 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
std::vector<X> i; std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) { for (const auto& a : p.iterate<int, double, std::string>()) {
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
@@ -71,46 +79,22 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
p.ignore_next(); p.ignore_next();
for (const auto& a : p.iterate<int, double, std::string>()) { for (const auto& a : p.iterate<int, double, std::string>()) {
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
i.emplace_back(ss::to_object<X>(a)); i.emplace_back(ss::to_object<X>(a));
} }
CHECK_EQ(i, data); CHECK_EQ(i, data);
} }
{
ss::parser<ss::ignore_header> p{f.name, ","};
std::vector<X> i;
for (const auto& a : p.iterate<int, double, std::string>()) {
i.emplace_back(ss::to_object<X>(a));
}
CHECK_EQ(i, data);
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Dbl, Str);
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
CHECK_FALSE(p.field_exists("Unknown"));
p.use_fields(Int, "Unknown");
CHECK_FALSE(p.valid());
}
{
ss::parser<ss::ignore_header, ss::string_error> p{f.name, ","};
p.use_fields(Int, Int);
CHECK_FALSE(p.valid());
}
{ {
ss::parser<ss::string_error> p{f.name, ","}; ss::parser<ss::string_error> p{f.name, ","};
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
p.use_fields(Int, Dbl); p.use_fields(Int, Dbl);
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
{ {
auto [int_, double_] = p.get_next<int, double>(); auto [int_, double_] = p.get_next<int, double>();
@@ -119,6 +103,8 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
} }
p.use_fields(Dbl, Int); p.use_fields(Dbl, Int);
CHECK_EQ(header, p.header());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
{ {
auto [double_, int_] = p.get_next<double, int>(); auto [double_, int_] = p.get_next<double, int>();
@@ -163,25 +149,25 @@ TEST_CASE_TEMPLATE("test various cases with header", T,
template_params.append(type) template_params.append(type)
arg_params.append(header[type]) arg_params.append(header[type])
call = 'testFields<' + ', '.join(template_params) + \ call = 'testFields<' + ', '.join(template_params) + \
'>(o, d, {' + ', '.join(arg_params) + '});' '>(o, d, header, {' + ', '.join(arg_params) + '});'
print(call) print(call)
*/ */
test_fields<T, str>(o, d, {Str}); test_fields<T, str>(o, d, header, {Str});
test_fields<T, int>(o, d, {Int}); test_fields<T, int>(o, d, header, {Int});
test_fields<T, double>(o, d, {Dbl}); test_fields<T, double>(o, d, header, {Dbl});
test_fields<T, str, int>(o, d, {Str, Int}); test_fields<T, str, int>(o, d, header, {Str, Int});
test_fields<T, str, double>(o, d, {Str, Dbl}); test_fields<T, str, double>(o, d, header, {Str, Dbl});
test_fields<T, int, str>(o, d, {Int, Str}); test_fields<T, int, str>(o, d, header, {Int, Str});
test_fields<T, int, double>(o, d, {Int, Dbl}); test_fields<T, int, double>(o, d, header, {Int, Dbl});
test_fields<T, double, str>(o, d, {Dbl, Str}); test_fields<T, double, str>(o, d, header, {Dbl, Str});
test_fields<T, double, int>(o, d, {Dbl, Int}); test_fields<T, double, int>(o, d, header, {Dbl, Int});
test_fields<T, str, int, double>(o, d, {Str, Int, Dbl}); test_fields<T, str, int, double>(o, d, header, {Str, Int, Dbl});
test_fields<T, str, double, int>(o, d, {Str, Dbl, Int}); test_fields<T, str, double, int>(o, d, header, {Str, Dbl, Int});
test_fields<T, int, str, double>(o, d, {Int, Str, Dbl}); test_fields<T, int, str, double>(o, d, header, {Int, Str, Dbl});
test_fields<T, int, double, str>(o, d, {Int, Dbl, Str}); test_fields<T, int, double, str>(o, d, header, {Int, Dbl, Str});
test_fields<T, double, str, int>(o, d, {Dbl, Str, Int}); test_fields<T, double, str, int>(o, d, header, {Dbl, Str, Int});
test_fields<T, double, int, str>(o, d, {Dbl, Int, Str}); test_fields<T, double, int, str>(o, d, header, {Dbl, Int, Str});
} }
template <typename T> template <typename T>
@@ -190,6 +176,17 @@ void test_invalid_fields(const std::vector<std::string>& lines,
constexpr auto buffer_mode = T::BufferMode::value; constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode; using ErrorMode = typename T::ErrorMode;
auto check_header = [&lines](auto& p) {
if (lines.empty()) {
CHECK(p.header().empty());
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
} else {
CHECK_EQ(lines[0], merge_header(p.header()));
CHECK_EQ(merge_header(p.header(), ","), p.raw_header());
}
CHECK(p.valid());
};
unique_file_name f{"invalid_fields"}; unique_file_name f{"invalid_fields"};
{ {
std::ofstream out{f.name}; std::ofstream out{f.name};
@@ -203,6 +200,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ","); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p] { p.use_fields(); }; auto command = [&p = p] { p.use_fields(); };
expect_error_on_command(p, command); expect_error_on_command(p, command);
check_header(p);
} }
{ {
@@ -210,6 +208,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ","); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p] { p.use_fields("Unknown"); }; auto command = [&p = p] { p.use_fields("Unknown"); };
expect_error_on_command(p, command); expect_error_on_command(p, command);
check_header(p);
} }
{ {
@@ -221,6 +220,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
if (!fields.empty()) { if (!fields.empty()) {
expect_error_on_command(p, command); expect_error_on_command(p, command);
} }
check_header(p);
} }
{ {
@@ -230,15 +230,19 @@ void test_invalid_fields(const std::vector<std::string>& lines,
p.use_fields(fields.at(0)); p.use_fields(fields.at(0));
p.template get_next<std::string, std::string>(); p.template get_next<std::string, std::string>();
}; };
check_header(p);
if (!fields.empty()) { if (!fields.empty()) {
expect_error_on_command(p, command); expect_error_on_command(p, command);
} }
check_header(p);
} }
{ {
// Invalid header // Invalid header
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ","); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name, ",");
auto command = [&p = p, &fields = fields] { p.use_fields(fields); }; auto command = [&p = p, &fields = fields] { p.use_fields(fields); };
check_header(p);
if (!fields.empty()) { if (!fields.empty()) {
// Pass if there are no duplicates, fail otherwise // Pass if there are no duplicates, fail otherwise
@@ -255,6 +259,7 @@ void test_invalid_fields(const std::vector<std::string>& lines,
} }
} }
} }
check_header(p);
} }
} }
@@ -289,7 +294,7 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
constexpr auto buffer_mode = T::BufferMode::value; constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode; using ErrorMode = typename T::ErrorMode;
unique_file_name f{"invalid rows with header"}; unique_file_name f{"invalid_rows_with_header"};
{ {
std::ofstream out{f.name}; std::ofstream out{f.name};
out << "Int,String,Double" << std::endl; out << "Int,String,Double" << std::endl;
@@ -301,8 +306,12 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
out << "six,line6,10.11" << std::endl; out << "six,line6,10.11" << std::endl;
} }
std::vector<std::string> header = {"Int", "String", "Double"};
{ {
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("Int", "String", "Double"); p.use_fields("Int", "String", "Double");
using data = std::tuple<int, std::string, double>; using data = std::tuple<int, std::string, double>;
@@ -325,10 +334,14 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
{3, "line3", 67.8}, {3, "line3", 67.8},
{5, "line5", 9.10}}; {5, "line5", 9.10}};
CHECK_EQ(i, expected); CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
} }
{ {
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("Double", "Int"); p.use_fields("Double", "Int");
using data = std::tuple<double, int>; using data = std::tuple<double, int>;
@@ -349,10 +362,14 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
std::vector<data> expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}}; std::vector<data> expected = {{2.34, 1}, {67.8, 3}, {9.10, 5}};
CHECK_EQ(i, expected); CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
} }
{ {
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name); auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
p.use_fields("String", "Double"); p.use_fields("String", "Double");
using data = std::tuple<std::string, double>; using data = std::tuple<std::string, double>;
@@ -376,6 +393,92 @@ TEST_CASE_TEMPLATE("test invalid rows with header", T,
{"line5", 9.10}, {"line5", 9.10},
{"line6", 10.11}}; {"line6", 10.11}};
CHECK_EQ(i, expected); CHECK_EQ(i, expected);
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(merge_header(p.header()), p.raw_header());
}
}
TEST_CASE_TEMPLATE("test invalid header", T, ParserOptionCombinations) {
constexpr auto buffer_mode = T::BufferMode::value;
using ErrorMode = typename T::ErrorMode;
unique_file_name f{"invalid_header"};
// Empty header
{
std::ofstream out{f.name};
out << "" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode>(f.name);
CHECK(p.header().empty());
CHECK_EQ(merge_header(p.header()), p.raw_header());
CHECK(p.valid());
}
// Unterminated quote in header
{
std::ofstream out{f.name};
out << "\"Int" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::quote<'"'>>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::quote<'"'>, ss::multiline>(
f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::quote<'"'>,
ss::escape<'\\'>, ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "\"Int");
}
// Unterminated escape in header
{
std::ofstream out{f.name};
out << "Int\\" << std::endl;
out << "1" << std::endl;
}
{
auto [p, _] =
make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>,
ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
}
{
auto [p, _] = make_parser<buffer_mode, ErrorMode, ss::escape<'\\'>,
ss::quote<'"'>, ss::multiline>(f.name);
auto command = [&p = p] { p.header(); };
expect_error_on_command(p, command);
CHECK_EQ(p.raw_header(), "Int\\");
} }
} }

View File

@@ -1,13 +1,7 @@
#include "test_helpers.hpp" #include "test_helpers.hpp"
#include <algorithm>
#include <filesystem>
#include <fstream> #include <fstream>
#include <iomanip>
#include <iostream> #include <iostream>
#include <regex>
#include <ss/parser.hpp> #include <ss/parser.hpp>
#include <sstream>
#include <unordered_map>
#include <unordered_set> #include <unordered_set>
#ifndef SEGMENT_NAME #ifndef SEGMENT_NAME
@@ -333,8 +327,10 @@ void test_data_combinations(const std::vector<column>& input_data,
field_header.push_back(field{el.header}); field_header.push_back(field{el.header});
} }
std::string header_line;
if (include_header) { if (include_header) {
auto header_data = generate_csv_data<Ts...>(field_header, delim); auto header_data = generate_csv_data<Ts...>(field_header, delim);
header_line = merge_header(header_data, delim);
if (input_data.size() == 0 && rand() % 10 == 0) { if (input_data.size() == 0 && rand() % 10 == 0) {
write_to_file(header_data, delim, f.name, false); write_to_file(header_data, delim, f.name, false);
} else { } else {
@@ -403,7 +399,9 @@ void test_data_combinations(const std::vector<column>& input_data,
fields.push_back(header[index]); fields.push_back(header[index]);
} }
if constexpr (!setup::ignore_header) {
p.use_fields(fields); p.use_fields(fields);
}
if (!p.valid()) { if (!p.valid()) {
if constexpr (setup::string_error) { if constexpr (setup::string_error) {
@@ -425,8 +423,19 @@ void test_data_combinations(const std::vector<column>& input_data,
} }
}; };
auto check_header = [&p = p, &header = header, include_header,
header_line] {
if (include_header) {
if constexpr (!setup::ignore_header) {
CHECK_EQ_ARRAY(header, p.header());
CHECK_EQ(header_line, p.raw_header());
}
}
};
int num_columns = layout.size(); int num_columns = layout.size();
for (size_t i = 0; i < n + 1; ++i) { for (size_t i = 0; i < n + 1; ++i) {
check_header();
try { try {
switch (num_columns) { switch (num_columns) {
case 1: { case 1: {
@@ -616,7 +625,7 @@ void test_option_combinations3() {
test_option_combinations2<Ts..., trim>(); test_option_combinations2<Ts..., trim>();
} }
} /* namespace */ } /* anonymous namespace */
// Tests split into multiple compilation units // Tests split into multiple compilation units
#if 0 #if 0

View File

@@ -3,12 +3,14 @@
set -x set -x
set -e set -e
python3 script/single_header_generator.py > ssp.cpp TMP_HDR=test_single_header.hpp
TMP_SRC=test_single_header.cpp
TMP_BIN=test_single_header
echo 'int main(){ ss::parser p{""}; p.get_next<int, float>(); return 0; }' \ python3 script/single_header_generator.py > ${TMP_HDR}
>> ssp.cpp cat ${TMP_HDR} test/test_single_header_main.txt > ${TMP_SRC}
g++ -std=c++17 ssp.cpp -o ssp.bin -Wall -Wextra g++ -std=c++17 ${TMP_SRC} -o ${TMP_BIN} -Wall -Wextra
./ssp.bin ./${TMP_BIN}
rm ssp.cpp ssp.bin rm ${TMP_HDR} ${TMP_SRC} ${TMP_BIN}

View File

@@ -0,0 +1,12 @@
int main() {
using quote = ss::quote<'"'>;
using escape = ss::escape<'\\'>;
using trim = ss::trim<' '>;
std::string data = "1,string,2.34,c";
ss::parser<quote, escape, trim, ss::multiline> p{data.c_str(), data.size()};
auto tup = p.get_next<int, std::string, float, std::optional<char>>();
return 0;
}

View File

@@ -145,7 +145,7 @@ make_combinations(const std::vector<std::string>& input,
return {std::move(lines), std::move(expectations)}; return {std::move(lines), std::move(expectations)};
} }
} /* namespace */ } /* anonymous namespace */
/* ********************************** */ /* ********************************** */
/* ********************************** */ /* ********************************** */
@@ -548,7 +548,7 @@ public:
return splitter.size_shifted(); return splitter.size_shifted();
} }
}; };
} /* ss */ } /* namespace ss */
TEST_CASE("splitter test resplit unterminated quote") { TEST_CASE("splitter test resplit unterminated quote") {