12 Commits

Author SHA1 Message Date
red0124
457defadaa Bugfix/odr violations (#47)
* Make common non-member functions inline, remove unreachable line from get_line_buffer

* [skip ci] Fix namespace comments
2024-03-12 10:22:10 +01:00
red0124
1b9a01f787 Feature/fuzz (#44)
* Add fuzzing ci, add bedge to README
2024-03-03 20:46:12 +01:00
red0124
f5b750dd93 Merge pull request #43 from red0124/bugfix/ftell_slowdown
Remove usage of ftell when updating cursor position value
2024-03-02 02:20:05 +01:00
ado
7f53b585f9 Remove usage of ftell when updating cursor position value 2024-03-02 00:34:19 +01:00
ado
67ef6651c1 Fix README typos 2024-03-01 17:23:26 +01:00
ado
fa4ec324de Update version 2024-03-01 16:22:45 +01:00
red0124
f229de61d6 Merge pull request #42 from red0124/dev
Merge with development
2024-03-01 16:17:16 +01:00
red0124
df2beab6c3 Fix buffer overflow on multiline restricted with unterminated quote and multiple empty lines (#41) 2024-03-01 15:46:34 +01:00
red0124
27bd60b5ce Fix bug with get_line_buffer when used with data buffer that is not null terminated and does not end with \n (#40) 2024-03-01 02:47:04 +01:00
red0124
c5b50f2b47 Fix compile issues for c++20 (#39) 2024-03-01 00:52:00 +01:00
red0124
d8dcce7f2a Fix buffer overflow on multiline csv data containing null characters (#38) 2024-02-29 22:03:20 +01:00
red0124
126329608c Add macOS ci (#36)
* Add macOS ci, update README
2024-02-28 22:20:26 +01:00
25 changed files with 444 additions and 237 deletions

7
.github/fuzz/makefile vendored Normal file
View File

@@ -0,0 +1,7 @@
EXE=ssp_fuzz
all:
clang++ ${CXXFLAGS} ssp_fuzz.cpp -fsanitize=fuzzer -std=c++17 -o ${EXE}
run:
./${EXE} -max_total_time=900

81
.github/fuzz/ssp_fuzz.cpp vendored Normal file
View File

@@ -0,0 +1,81 @@
#include "../../ssp.hpp"
#include <filesystem>
#include <iostream>
#include <unistd.h>
template <typename... Ts>
void test_ssp_file_mode(const uint8_t* data, size_t size,
std::string delim = ss::default_delimiter) {
std::string file_name = std::filesystem::temp_directory_path().append(
"ss_fuzzer" + std::to_string(getpid()) + ".csv");
FILE* file = std::fopen(file_name.c_str(), "wb");
if (!file) {
std::exit(1);
}
std::fwrite(data, size, 1, file);
std::fclose(file);
ss::parser<Ts...> p{file_name.c_str(), delim};
while (!p.eof()) {
try {
const auto& [s0, s1] =
p.template get_next<std::string, std::string>();
if (s0.size() == 10000) {
std::cout << s0.size() << std::endl;
}
} catch (ss::exception& e) {
continue;
}
}
std::remove(file_name.c_str());
}
template <typename... Ts>
void test_ssp_buffer_mode(const uint8_t* data, size_t size,
std::string delim = ss::default_delimiter) {
ss::parser<Ts...> p{(const char*)data, size, delim};
while (!p.eof()) {
try {
const auto& [s0, s1] =
p.template get_next<std::string, std::string>();
if (s0.size() == 10000) {
std::cout << s0.size() << std::endl;
}
} catch (ss::exception& e) {
continue;
}
}
}
template <typename... Ts>
void test_ssp(const uint8_t* data, size_t size) {
test_ssp_file_mode<Ts...>(data, size);
test_ssp_file_mode<Ts..., ss::throw_on_error>(data, size);
test_ssp_file_mode<Ts...>(data, size, ":::");
test_ssp_file_mode<Ts..., ss::throw_on_error>(data, size, ":::");
test_ssp_buffer_mode<Ts...>(data, size);
test_ssp_buffer_mode<Ts..., ss::throw_on_error>(data, size);
test_ssp_buffer_mode<Ts...>(data, size, ":::");
test_ssp_buffer_mode<Ts..., ss::throw_on_error>(data, size, ":::");
}
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
using escape = ss::escape<'\\'>;
using quote = ss::quote<'"'>;
using trim = ss::trim<' ', '\t'>;
using multiline_r = ss::multiline_restricted<5>;
test_ssp<>(data, size);
test_ssp<escape>(data, size);
test_ssp<quote>(data, size);
test_ssp<trim>(data, size);
test_ssp<quote, escape>(data, size);
test_ssp<escape, quote, multiline_r, trim>(data, size);
test_ssp<escape, quote, multiline_r, trim, ss::ignore_empty>(data, size);
return 0;
}

43
.github/workflows/fuzz.yml vendored Normal file
View File

@@ -0,0 +1,43 @@
name: fuzz-ci
on:
workflow_dispatch:
push:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
pull_request:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
jobs:
clang_tests:
if: >-
! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
! contains(toJSON(github.event.commits.*.message), '[skip github]')
runs-on: ubuntu-latest
name: "Fuzzing"
container:
image: silkeh/clang:15
options: -v /usr/local:/host_usr_local
steps:
- uses: actions/checkout@v1
- name: Build
working-directory: .github/fuzz
run: make
- name: Run
working-directory: .github/fuzz
run: make run

55
.github/workflows/macos-apple-clang.yml vendored Normal file
View File

@@ -0,0 +1,55 @@
name: macos-apple-clang-ci
on:
workflow_dispatch:
push:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
pull_request:
branches:
- master
- feature/**
- improvement/**
- bugfix/**
jobs:
clang_tests:
if: >-
! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
! contains(toJSON(github.event.commits.*.message), '[skip github]')
strategy:
matrix:
xcode: ['13.4.1', '14.1']
type: [Release, Debug]
runs-on: macos-12
env:
DEVELOPER_DIR: /Applications/Xcode_${{matrix.xcode}}.app/Contents/Developer
name: "Xcode ${{matrix.xcode}}: ${{matrix.type}}"
steps:
- uses: actions/checkout@v3
- uses: friendlyanon/fetch-core-count@v1
id: cores
- name: Install dependencies
run: script/ci_install_deps.sh
- name: Configure
run: cmake -S test -B build -DCMAKE_BUILD_TYPE=${{matrix.type}}
- name: Build
run: cmake --build build -j ${{steps.cores.outputs.count}}
- name: Run
working-directory: build
run: ctest --output-on-failure

2
.gitignore vendored
View File

@@ -1,6 +1,8 @@
compile_commands.json
.clang-format
.clang-tidy
.ccls-cache/*
.cache/
experiment/
build/
hbuild/

View File

@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14)
project(
ssp
VERSION 1.7.0
VERSION 1.7.2
DESCRIPTION "csv parser"
HOMEPAGE_URL "https://github.com/red0124/ssp"
LANGUAGES CXX

View File

@@ -9,6 +9,7 @@
[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![coverage](https://coveralls.io/repos/github/red0124/ssp/badge.svg?branch=master)](https://coveralls.io/github/red0124/ssp?branch=master)
[![fuzz](https://github.com/red0124/ssp/workflows/fuzz-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/fuzz.yml)
[![single-header](https://github.com/red0124/ssp/workflows/single-header-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/single-header.yml)
[![ubuntu-latest-gcc](https://github.com/red0124/ssp/workflows/ubuntu-latest-gcc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-gcc.yml)
[![ubuntu-latest-clang](https://github.com/red0124/ssp/workflows/ubuntu-latest-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-clang.yml)
@@ -16,6 +17,7 @@
[![windows-msys2-gcc](https://github.com/red0124/ssp/workflows/win-msys2-gcc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msys2-gcc.yml)
[![windows-msys2-clang](https://github.com/red0124/ssp/workflows/win-msys2-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msys2-clang.yml)
[![windows-msvc](https://github.com/red0124/ssp/workflows/win-msvc-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/win-msvc.yml)
[![macos-apple-clang](https://github.com/red0124/ssp/workflows/macos-apple-clang-ci/badge.svg)](https://github.com/red0124/ssp/actions/workflows/macos-apple-clang.yml)
A header only CSV parser which is fast and versatile with modern C++ API. Requires compiler with C++17 support. [Can also be used to efficiently convert strings to specific types.](#the-converter)
@@ -72,7 +74,7 @@ Bill (Heath) Gates 65 3.3
# Single header
The library can be used with a single header file **`ssp.hpp`**, but it sufferes a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file.
The library can be used with a single header file **`ssp.hpp`**, but it suffers a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file.
# Installation
@@ -114,11 +116,11 @@ James Bailey 2.5
Brian S. Wolfe 1.9
Bill (Heath) Gates 3.3
```
The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** metod after the parser has been constructed.
The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** method after the parser has been constructed.
```cpp
ss::parser<ss::ignore_header> p{file_name};
```
The fields with which the parser works with can be modified at any given time. The praser can also check if a field is present within the header by using the **`field_exists`** method.
The fields with which the parser works with can be modified at any given time. The parser can also check if a field is present within the header by using the **`field_exists`** method.
```cpp
// ...
ss::parser<ss::throw_on_error> p{"students_with_header.csv"};
@@ -248,7 +250,7 @@ By default, **`,`** is used as the delimiter, a custom delimiter can be specifie
```cpp
ss::parser p{file_name, "--"};
```
*Note, the delimiter can consist of multiple characters but the parser is slightliy faster when using single character delimiters.*
*Note, the delimiter can consist of multiple characters but the parser is slightly faster when using single character delimiters.*
### Empty lines
Empty lines can be ignored by defining **`ss::ignore_empty`** within the setup parameters:
@@ -397,7 +399,7 @@ if (std::holds_alternative<float>(grade)) {
// grade set as char
}
```
Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers arround the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character.
Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers around the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character.
```cpp
// returns std::tuple<std::string, ss::uint8, float>
auto [id, age, grade] = p.get_next<std::string, ss::uint8, float>();

View File

@@ -38,45 +38,40 @@ inline void* strict_realloc(void* ptr, size_t size) {
}
#if __unix__
inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) {
return getline(lineptr, n, stream);
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
return getline(&lineptr, &n, file);
}
#else
using ssize_t = intptr_t;
ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
if (lineptr == nullptr || n == nullptr || fp == nullptr) {
errno = EINVAL;
return -1;
}
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
char buff[get_line_initial_buffer_size];
if (*lineptr == nullptr || *n < sizeof(buff)) {
if (lineptr == nullptr || n < sizeof(buff)) {
size_t new_n = sizeof(buff);
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
*n = new_n;
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
n = new_n;
}
(*lineptr)[0] = '\0';
lineptr[0] = '\0';
size_t line_used = 0;
while (std::fgets(buff, sizeof(buff), fp) != nullptr) {
line_used = std::strlen(*lineptr);
while (std::fgets(buff, sizeof(buff), file) != nullptr) {
line_used = std::strlen(lineptr);
size_t buff_used = std::strlen(buff);
if (*n <= buff_used + line_used) {
size_t new_n = *n * 2;
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
*n = new_n;
if (n <= buff_used + line_used) {
size_t new_n = n * 2;
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
n = new_n;
}
std::memcpy(*lineptr + line_used, buff, buff_used);
std::memcpy(lineptr + line_used, buff, buff_used);
line_used += buff_used;
(*lineptr)[line_used] = '\0';
lineptr[line_used] = '\0';
if ((*lineptr)[line_used - 1] == '\n') {
if (lineptr[line_used - 1] == '\n') {
return line_used;
}
}
@@ -86,4 +81,64 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
#endif
} /* ss */
inline ssize_t get_line_buffer(char*& lineptr, size_t& n,
const char* const csv_data_buffer, size_t csv_data_size,
size_t& curr_char) {
if (curr_char >= csv_data_size) {
return -1;
}
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
auto new_lineptr = static_cast<char*>(
strict_realloc(lineptr, get_line_initial_buffer_size));
lineptr = new_lineptr;
n = get_line_initial_buffer_size;
}
size_t line_used = 0;
while (curr_char < csv_data_size) {
if (line_used + 1 >= n) {
size_t new_n = n * 2;
char* new_lineptr =
static_cast<char*>(strict_realloc(lineptr, new_n));
n = new_n;
lineptr = new_lineptr;
}
auto c = csv_data_buffer[curr_char++];
lineptr[line_used++] = c;
if (c == '\n') {
lineptr[line_used] = '\0';
return line_used;
}
}
lineptr[line_used] = '\0';
return line_used;
}
inline std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
FILE* file,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
ssize_t ssize;
if (file) {
ssize = get_line_file(buffer, buffer_size, file);
curr_char += ssize;
} else {
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
csv_data_size, curr_char);
}
if (ssize == -1) {
if (errno == ENOMEM) {
throw std::bad_alloc{};
}
return {ssize, true};
}
return {ssize, false};
}
} /* namespace ss */

View File

@@ -150,7 +150,7 @@ public:
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
return convert_impl(elems, static_cast<T*>(nullptr));
} else if constexpr (tied_class_v<T, Ts...>) {
using arg_ref_tuple = std::result_of_t<decltype (&T::tied)(T)>;
using arg_ref_tuple = std::invoke_result_t<decltype(&T::tied), T>;
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
return to_object<T>(
@@ -269,6 +269,7 @@ private:
void handle_error_multiline_limit_reached() {
constexpr static auto error_msg = "multiline limit reached";
splitter_.unterminated_quote_ = false;
if constexpr (string_error) {
error_.clear();
@@ -492,4 +493,4 @@ private:
size_t number_of_columns_;
};
} /* ss */
} /* namespace ss */

View File

@@ -20,4 +20,4 @@ public:
}
};
} /* ss */
} /* namespace ss */

View File

@@ -142,7 +142,7 @@ template <typename T>
struct unsupported_type {
constexpr static bool value = false;
};
} /* namespace */
} /* namespace errors */
template <typename T>
std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> &&
@@ -247,4 +247,4 @@ inline bool extract(const char* begin, const char* end,
return true;
}
} /* ss */
} /* namespace ss */

View File

@@ -77,4 +77,4 @@ struct member_wrapper<R T::*> {
template <typename T> \
constexpr bool has_m_##method##_t = has_m_##method<T>::value;
} /* trait */
} /* namespace ss */

View File

@@ -749,46 +749,9 @@ private:
reader(const reader& other) = delete;
reader& operator=(const reader& other) = delete;
ssize_t get_line_buffer(char** lineptr, size_t* n,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
if (curr_char >= csv_data_size) {
return -1;
}
if (*lineptr == nullptr || *n < get_line_initial_buffer_size) {
auto new_lineptr = static_cast<char*>(
strict_realloc(*lineptr, get_line_initial_buffer_size));
*lineptr = new_lineptr;
*n = get_line_initial_buffer_size;
}
size_t line_used = 0;
while (curr_char <= csv_data_size) {
if (line_used + 1 >= *n) {
size_t new_n = *n * 2;
char* new_lineptr =
static_cast<char*>(strict_realloc(*lineptr, new_n));
*n = new_n;
*lineptr = new_lineptr;
}
auto c = csv_data_buffer[curr_char++];
(*lineptr)[line_used++] = c;
if (c == '\n') {
(*lineptr)[line_used] = '\0';
return line_used;
}
}
return (line_used != 0) ? line_used : -1;
}
// read next line each time in order to set eof_
bool read_next() {
next_line_converter_.clear_error();
ssize_t ssize = 0;
size_t size = 0;
while (size == 0) {
++line_number_;
@@ -797,21 +760,11 @@ private:
}
chars_read_ = curr_char_;
if (file_) {
ssize = get_line_file(&next_line_buffer_,
&next_line_buffer_size_, file_);
curr_char_ = std::ftell(file_);
} else {
ssize = get_line_buffer(&next_line_buffer_,
&next_line_buffer_size_,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
auto [ssize, eof] =
get_line(next_line_buffer_, next_line_buffer_size_, file_,
csv_data_buffer_, csv_data_size_, curr_char_);
if (ssize == -1) {
if (errno == ENOMEM) {
throw std::bad_alloc{};
}
if (eof) {
return false;
}
@@ -836,7 +789,8 @@ private:
}
if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) {
next_line_size_,
next_line_buffer_size_)) {
next_line_converter_.handle_error_unterminated_escape();
return;
}
@@ -854,7 +808,8 @@ private:
}
if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) {
next_line_size_,
next_line_buffer_size_)) {
next_line_converter_.handle_error_unterminated_quote();
return;
}
@@ -865,8 +820,9 @@ private:
return;
}
if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) {
if (!append_next_line_to_buffer(
next_line_buffer_, next_line_size_,
next_line_buffer_size_)) {
next_line_converter_
.handle_error_unterminated_escape();
return;
@@ -910,18 +866,20 @@ private:
return next_line_converter_.unterminated_quote();
}
void undo_remove_eol(char* buffer, size_t& string_end) {
if (crlf_) {
std::copy_n("\r\n\0", 3, buffer + string_end);
string_end += 2;
} else {
std::copy_n("\n\0", 2, buffer + string_end);
string_end += 1;
void undo_remove_eol(char* buffer, size_t& line_size,
size_t buffer_size) {
if (crlf_ && buffer_size >= line_size + 2) {
std::copy_n("\r\n", 2, buffer + line_size);
line_size += 2;
} else if (buffer_size > line_size) {
std::copy_n("\n", 1, buffer + line_size);
line_size += 1;
}
}
size_t remove_eol(char*& buffer, size_t ssize) {
if (buffer[ssize - 1] != '\n') {
crlf_ = false;
return ssize;
}
@@ -949,28 +907,23 @@ private:
first_size += second_size;
}
bool append_next_line_to_buffer(char*& buffer, size_t& size) {
undo_remove_eol(buffer, size);
bool append_next_line_to_buffer(char*& buffer, size_t& line_size,
size_t buffer_size) {
undo_remove_eol(buffer, line_size, buffer_size);
ssize_t next_ssize;
if (file_) {
next_ssize =
get_line_file(&helper_buffer_, &helper_buffer_size, file_);
} else {
next_ssize =
get_line_buffer(&helper_buffer_, &helper_buffer_size,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
chars_read_ = curr_char_;
auto [next_ssize, eof] =
get_line(helper_buffer_, helper_buffer_size, file_,
csv_data_buffer_, csv_data_size_, curr_char_);
if (next_ssize == -1) {
if (eof) {
return false;
}
++line_number_;
size_t next_size = remove_eol(helper_buffer_, next_ssize);
realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_,
next_size);
realloc_concat(buffer, line_size, next_line_buffer_size_,
helper_buffer_, next_size);
return true;
}
@@ -1018,4 +971,4 @@ private:
bool eof_{false};
};
} /* ss */
} /* namespace ss */

View File

@@ -124,4 +124,4 @@ struct ne {
}
};
} /* ss */
} /* namespace ss */

View File

@@ -293,4 +293,4 @@ private:
template <typename... Options>
struct setup<setup<Options...>> : setup<Options...> {};
} /* ss */
} /* namespace ss */

View File

@@ -6,7 +6,6 @@
#include <algorithm>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <string>
#include <vector>
@@ -479,4 +478,4 @@ public:
friend class converter;
};
} /* ss */
} /* namespace ss */

View File

@@ -378,4 +378,4 @@ T to_object(U&& data) {
}
}
} /* trait */
} /* namespace ss */

View File

@@ -6,7 +6,7 @@ project(
'cpp_std=c++17',
'buildtype=debugoptimized',
'wrap_mode=forcefallback'],
version: '1.7.0',
version: '1.7.2',
meson_version:'>=0.54.0')
fast_float_dep = dependency('fast_float')

230
ssp.hpp
View File

@@ -8,7 +8,6 @@
#include <cstring>
#include <exception>
#include <functional>
#include <memory>
#include <optional>
#include <string>
#include <string_view>
@@ -394,7 +393,7 @@ T to_object(U&& data) {
}
}
} /* trait */
} /* namespace ss */
namespace ss {
@@ -414,7 +413,7 @@ public:
}
};
} /* ss */
} /* namespace ss */
namespace ss {
@@ -490,7 +489,7 @@ struct member_wrapper<R T::*> {
template <typename T> \
constexpr bool has_m_##method##_t = has_m_##method<T>::value;
} /* trait */
} /* namespace ss */
namespace ss {
@@ -616,7 +615,7 @@ struct ne {
}
};
} /* ss */
} /* namespace ss */
namespace ss {
@@ -650,45 +649,40 @@ inline void* strict_realloc(void* ptr, size_t size) {
}
#if __unix__
inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) {
return getline(lineptr, n, stream);
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
return getline(&lineptr, &n, file);
}
#else
using ssize_t = intptr_t;
ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
if (lineptr == nullptr || n == nullptr || fp == nullptr) {
errno = EINVAL;
return -1;
}
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
char buff[get_line_initial_buffer_size];
if (*lineptr == nullptr || *n < sizeof(buff)) {
if (lineptr == nullptr || n < sizeof(buff)) {
size_t new_n = sizeof(buff);
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
*n = new_n;
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
n = new_n;
}
(*lineptr)[0] = '\0';
lineptr[0] = '\0';
size_t line_used = 0;
while (std::fgets(buff, sizeof(buff), fp) != nullptr) {
line_used = std::strlen(*lineptr);
while (std::fgets(buff, sizeof(buff), file) != nullptr) {
line_used = std::strlen(lineptr);
size_t buff_used = std::strlen(buff);
if (*n <= buff_used + line_used) {
size_t new_n = *n * 2;
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
*n = new_n;
if (n <= buff_used + line_used) {
size_t new_n = n * 2;
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
n = new_n;
}
std::memcpy(*lineptr + line_used, buff, buff_used);
std::memcpy(lineptr + line_used, buff, buff_used);
line_used += buff_used;
(*lineptr)[line_used] = '\0';
lineptr[line_used] = '\0';
if ((*lineptr)[line_used - 1] == '\n') {
if (lineptr[line_used - 1] == '\n') {
return line_used;
}
}
@@ -698,7 +692,67 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
#endif
} /* ss */
inline ssize_t get_line_buffer(char*& lineptr, size_t& n,
const char* const csv_data_buffer, size_t csv_data_size,
size_t& curr_char) {
if (curr_char >= csv_data_size) {
return -1;
}
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
auto new_lineptr = static_cast<char*>(
strict_realloc(lineptr, get_line_initial_buffer_size));
lineptr = new_lineptr;
n = get_line_initial_buffer_size;
}
size_t line_used = 0;
while (curr_char < csv_data_size) {
if (line_used + 1 >= n) {
size_t new_n = n * 2;
char* new_lineptr =
static_cast<char*>(strict_realloc(lineptr, new_n));
n = new_n;
lineptr = new_lineptr;
}
auto c = csv_data_buffer[curr_char++];
lineptr[line_used++] = c;
if (c == '\n') {
lineptr[line_used] = '\0';
return line_used;
}
}
lineptr[line_used] = '\0';
return line_used;
}
inline std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
FILE* file,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
ssize_t ssize;
if (file) {
ssize = get_line_file(buffer, buffer_size, file);
curr_char += ssize;
} else {
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
csv_data_size, curr_char);
}
if (ssize == -1) {
if (errno == ENOMEM) {
throw std::bad_alloc{};
}
return {ssize, true};
}
return {ssize, false};
}
} /* namespace ss */
namespace ss {
@@ -991,7 +1045,7 @@ private:
template <typename... Options>
struct setup<setup<Options...>> : setup<Options...> {};
} /* ss */
} /* namespace ss */
namespace ss {
@@ -1462,7 +1516,7 @@ public:
friend class converter;
};
} /* ss */
} /* namespace ss */
#ifndef SSP_DISABLE_FAST_FLOAT
@@ -1595,7 +1649,7 @@ template <typename T>
struct unsupported_type {
constexpr static bool value = false;
};
} /* namespace */
} /* namespace error */
template <typename T>
std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> &&
@@ -1700,7 +1754,7 @@ inline bool extract(const char* begin, const char* end,
return true;
}
} /* ss */
} /* namespace ss */
namespace ss {
INIT_HAS_METHOD(tied)
@@ -1843,7 +1897,7 @@ public:
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
return convert_impl(elems, static_cast<T*>(nullptr));
} else if constexpr (tied_class_v<T, Ts...>) {
using arg_ref_tuple = std::result_of_t<decltype (&T::tied)(T)>;
using arg_ref_tuple = std::invoke_result_t<decltype(&T::tied), T>;
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
return to_object<T>(
@@ -1962,6 +2016,7 @@ private:
void handle_error_multiline_limit_reached() {
constexpr static auto error_msg = "multiline limit reached";
splitter_.unterminated_quote_ = false;
if constexpr (string_error) {
error_.clear();
@@ -2185,7 +2240,7 @@ private:
size_t number_of_columns_;
};
} /* ss */
} /* namespace ss */
namespace ss {
@@ -2925,46 +2980,9 @@ private:
reader(const reader& other) = delete;
reader& operator=(const reader& other) = delete;
ssize_t get_line_buffer(char** lineptr, size_t* n,
const char* const csv_data_buffer,
size_t csv_data_size, size_t& curr_char) {
if (curr_char >= csv_data_size) {
return -1;
}
if (*lineptr == nullptr || *n < get_line_initial_buffer_size) {
auto new_lineptr = static_cast<char*>(
strict_realloc(*lineptr, get_line_initial_buffer_size));
*lineptr = new_lineptr;
*n = get_line_initial_buffer_size;
}
size_t line_used = 0;
while (curr_char <= csv_data_size) {
if (line_used + 1 >= *n) {
size_t new_n = *n * 2;
char* new_lineptr =
static_cast<char*>(strict_realloc(*lineptr, new_n));
*n = new_n;
*lineptr = new_lineptr;
}
auto c = csv_data_buffer[curr_char++];
(*lineptr)[line_used++] = c;
if (c == '\n') {
(*lineptr)[line_used] = '\0';
return line_used;
}
}
return (line_used != 0) ? line_used : -1;
}
// read next line each time in order to set eof_
bool read_next() {
next_line_converter_.clear_error();
ssize_t ssize = 0;
size_t size = 0;
while (size == 0) {
++line_number_;
@@ -2973,21 +2991,11 @@ private:
}
chars_read_ = curr_char_;
if (file_) {
ssize = get_line_file(&next_line_buffer_,
&next_line_buffer_size_, file_);
curr_char_ = std::ftell(file_);
} else {
ssize = get_line_buffer(&next_line_buffer_,
&next_line_buffer_size_,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
auto [ssize, eof] =
get_line(next_line_buffer_, next_line_buffer_size_, file_,
csv_data_buffer_, csv_data_size_, curr_char_);
if (ssize == -1) {
if (errno == ENOMEM) {
throw std::bad_alloc{};
}
if (eof) {
return false;
}
@@ -3012,7 +3020,8 @@ private:
}
if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) {
next_line_size_,
next_line_buffer_size_)) {
next_line_converter_.handle_error_unterminated_escape();
return;
}
@@ -3030,7 +3039,8 @@ private:
}
if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) {
next_line_size_,
next_line_buffer_size_)) {
next_line_converter_.handle_error_unterminated_quote();
return;
}
@@ -3041,8 +3051,9 @@ private:
return;
}
if (!append_next_line_to_buffer(next_line_buffer_,
next_line_size_)) {
if (!append_next_line_to_buffer(
next_line_buffer_, next_line_size_,
next_line_buffer_size_)) {
next_line_converter_
.handle_error_unterminated_escape();
return;
@@ -3086,18 +3097,20 @@ private:
return next_line_converter_.unterminated_quote();
}
void undo_remove_eol(char* buffer, size_t& string_end) {
if (crlf_) {
std::copy_n("\r\n\0", 3, buffer + string_end);
string_end += 2;
} else {
std::copy_n("\n\0", 2, buffer + string_end);
string_end += 1;
void undo_remove_eol(char* buffer, size_t& line_size,
size_t buffer_size) {
if (crlf_ && buffer_size >= line_size + 2) {
std::copy_n("\r\n", 2, buffer + line_size);
line_size += 2;
} else if (buffer_size > line_size) {
std::copy_n("\n", 1, buffer + line_size);
line_size += 1;
}
}
size_t remove_eol(char*& buffer, size_t ssize) {
if (buffer[ssize - 1] != '\n') {
crlf_ = false;
return ssize;
}
@@ -3125,28 +3138,23 @@ private:
first_size += second_size;
}
bool append_next_line_to_buffer(char*& buffer, size_t& size) {
undo_remove_eol(buffer, size);
bool append_next_line_to_buffer(char*& buffer, size_t& line_size,
size_t buffer_size) {
undo_remove_eol(buffer, line_size, buffer_size);
ssize_t next_ssize;
if (file_) {
next_ssize =
get_line_file(&helper_buffer_, &helper_buffer_size, file_);
} else {
next_ssize =
get_line_buffer(&helper_buffer_, &helper_buffer_size,
csv_data_buffer_, csv_data_size_,
curr_char_);
}
chars_read_ = curr_char_;
auto [next_ssize, eof] =
get_line(helper_buffer_, helper_buffer_size, file_,
csv_data_buffer_, csv_data_size_, curr_char_);
if (next_ssize == -1) {
if (eof) {
return false;
}
++line_number_;
size_t next_size = remove_eol(helper_buffer_, next_ssize);
realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_,
next_size);
realloc_concat(buffer, line_size, next_line_buffer_size_,
helper_buffer_, next_size);
return true;
}
@@ -3194,4 +3202,4 @@ private:
bool eof_{false};
};
} /* ss */
} /* namespace ss */

View File

@@ -23,7 +23,7 @@ struct is_unsigned : public std::is_unsigned<T> {};
template <>
struct is_unsigned<ss::uint8> : public std::true_type {};
} /* namespace */
} /* anonymous namespace */
static_assert(is_signed<ss::int8>::value);
static_assert(is_unsigned<ss::uint8>::value);

View File

@@ -19,7 +19,7 @@
namespace ss {
template <typename... Ts>
class parser;
} /* ss */
} /* namespace ss */
namespace {
@@ -224,4 +224,4 @@ make_parser(const std::string& file_name,
return make_parser_impl<buffer_mode, Ts...>(file_name, delim);
}
} /* namespace */
} /* anonymous namespace */

View File

@@ -109,4 +109,4 @@ static void make_and_write(const std::string& file_name,
}
}
} /* namespace */
} /* anonymous namespace */

View File

@@ -16,13 +16,14 @@ TEST_CASE_TEMPLATE("test multiline restricted", T, ParserOptionCombinations) {
out << "5,6,just\\\n\\\nstrings" << std::endl;
#endif
out << "7,8,ju\\\n\\\n\\\nnk" << std::endl;
out << "99,100,\"\n\n\n\n" << std::endl;
out << "9,10,\"just\\\n\nstrings\"" << std::endl;
out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl;
out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl;
out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl;
out << "19,20,just strings" << std::endl;
}
auto bad_lines = 15;
auto bad_lines = 20;
auto num_errors = 0;
auto [p, _] =

View File

@@ -616,7 +616,7 @@ void test_option_combinations3() {
test_option_combinations2<Ts..., trim>();
}
} /* namespace */
} /* anonymous namespace */
// Tests split into multiple compilation units
#if 0

View File

@@ -145,7 +145,7 @@ make_combinations(const std::vector<std::string>& input,
return {std::move(lines), std::move(expectations)};
}
} /* namespace */
} /* anonymous namespace */
/* ********************************** */
/* ********************************** */
@@ -548,7 +548,7 @@ public:
return splitter.size_shifted();
}
};
} /* ss */
} /* namespace ss */
TEST_CASE("splitter test resplit unterminated quote") {