mirror of
https://github.com/red0124/ssp.git
synced 2025-12-16 14:49:56 +01:00
Compare commits
19 Commits
b660310acf
...
v1.7.2
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f5b750dd93 | ||
| 7f53b585f9 | |||
| 67ef6651c1 | |||
| fa4ec324de | |||
|
|
f229de61d6 | ||
|
|
df2beab6c3 | ||
|
|
27bd60b5ce | ||
|
|
c5b50f2b47 | ||
|
|
d8dcce7f2a | ||
|
|
126329608c | ||
| ddaa446819 | |||
|
|
8bad2d72ea | ||
| 899a6e6f5e | |||
| 0d3d8fa83e | |||
| 7bbe2879cd | |||
| 063d56fad9 | |||
| df78865f04 | |||
| 852481d233 | |||
| c516a6f826 |
55
.github/workflows/macos-apple-clang.yml
vendored
Normal file
55
.github/workflows/macos-apple-clang.yml
vendored
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
name: macos-apple-clang-ci
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- feature/**
|
||||||
|
- improvement/**
|
||||||
|
- bugfix/**
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
- feature/**
|
||||||
|
- improvement/**
|
||||||
|
- bugfix/**
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
clang_tests:
|
||||||
|
if: >-
|
||||||
|
! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
|
||||||
|
! contains(toJSON(github.event.commits.*.message), '[skip github]')
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
xcode: ['13.4.1', '14.1']
|
||||||
|
type: [Release, Debug]
|
||||||
|
|
||||||
|
runs-on: macos-12
|
||||||
|
|
||||||
|
env:
|
||||||
|
DEVELOPER_DIR: /Applications/Xcode_${{matrix.xcode}}.app/Contents/Developer
|
||||||
|
|
||||||
|
name: "Xcode ${{matrix.xcode}}: ${{matrix.type}}"
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- uses: friendlyanon/fetch-core-count@v1
|
||||||
|
id: cores
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: script/ci_install_deps.sh
|
||||||
|
|
||||||
|
- name: Configure
|
||||||
|
run: cmake -S test -B build -DCMAKE_BUILD_TYPE=${{matrix.type}}
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
run: cmake --build build -j ${{steps.cores.outputs.count}}
|
||||||
|
|
||||||
|
- name: Run
|
||||||
|
working-directory: build
|
||||||
|
run: ctest --output-on-failure
|
||||||
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14)
|
|||||||
|
|
||||||
project(
|
project(
|
||||||
ssp
|
ssp
|
||||||
VERSION 1.6.2
|
VERSION 1.7.2
|
||||||
DESCRIPTION "csv parser"
|
DESCRIPTION "csv parser"
|
||||||
HOMEPAGE_URL "https://github.com/red0124/ssp"
|
HOMEPAGE_URL "https://github.com/red0124/ssp"
|
||||||
LANGUAGES CXX
|
LANGUAGES CXX
|
||||||
|
|||||||
58
README.md
58
README.md
@@ -16,14 +16,15 @@
|
|||||||
[](https://github.com/red0124/ssp/actions/workflows/win-msys2-gcc.yml)
|
[](https://github.com/red0124/ssp/actions/workflows/win-msys2-gcc.yml)
|
||||||
[](https://github.com/red0124/ssp/actions/workflows/win-msys2-clang.yml)
|
[](https://github.com/red0124/ssp/actions/workflows/win-msys2-clang.yml)
|
||||||
[](https://github.com/red0124/ssp/actions/workflows/win-msvc.yml)
|
[](https://github.com/red0124/ssp/actions/workflows/win-msvc.yml)
|
||||||
|
[](https://github.com/red0124/ssp/actions/workflows/macos-apple-clang.yml)
|
||||||
|
|
||||||
A header only "csv" parser which is fast and versatile with modern C++ api. Requires compiler with C++17 support. [Can also be used to convert strings to specific types.](#the-converter)
|
A header only CSV parser which is fast and versatile with modern C++ API. Requires compiler with C++17 support. [Can also be used to efficiently convert strings to specific types.](#the-converter)
|
||||||
|
|
||||||
Conversion for floating point values invoked using [fast-float](https://github.com/fastfloat/fast_float) . \
|
Conversion for floating point values invoked using [fast-float](https://github.com/fastfloat/fast_float) . \
|
||||||
Function traits taken from *qt-creator* .
|
Function traits taken from *qt-creator* .
|
||||||
|
|
||||||
# Example
|
# Example
|
||||||
Lets say we have a csv file containing students in a given format \<Id,Age,Grade\> and we want to parse and print all the valid values:
|
Lets say we have a CSV file containing students in a given format (Id,Age,Grade) and we want to parse and print all the valid values:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ cat students.csv
|
$ cat students.csv
|
||||||
@@ -58,6 +59,7 @@ Bill (Heath) Gates 65 3.3
|
|||||||
* Can work without exceptions
|
* Can work without exceptions
|
||||||
* [Works with headers](#headers)
|
* [Works with headers](#headers)
|
||||||
* [Works with quotes, escapes and spacings](#setup)
|
* [Works with quotes, escapes and spacings](#setup)
|
||||||
|
* [Works with CSV data stored in buffers](#buffer-mode)
|
||||||
* [Works with values containing new lines](#multiline)
|
* [Works with values containing new lines](#multiline)
|
||||||
* [Columns and rows can be ignored](#special-types)
|
* [Columns and rows can be ignored](#special-types)
|
||||||
* [Works with any type of delimiter](#delimiter)
|
* [Works with any type of delimiter](#delimiter)
|
||||||
@@ -71,7 +73,7 @@ Bill (Heath) Gates 65 3.3
|
|||||||
|
|
||||||
# Single header
|
# Single header
|
||||||
|
|
||||||
The library can be used with a single header file **`ssp.hpp`**, but it sufferes a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file.
|
The library can be used with a single header file **`ssp.hpp`**, but it suffers a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file.
|
||||||
|
|
||||||
# Installation
|
# Installation
|
||||||
|
|
||||||
@@ -113,11 +115,11 @@ James Bailey 2.5
|
|||||||
Brian S. Wolfe 1.9
|
Brian S. Wolfe 1.9
|
||||||
Bill (Heath) Gates 3.3
|
Bill (Heath) Gates 3.3
|
||||||
```
|
```
|
||||||
The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** metod after the parser has been constructed.
|
The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** method after the parser has been constructed.
|
||||||
```cpp
|
```cpp
|
||||||
ss::parser<ss::ignore_header> p{file_name};
|
ss::parser<ss::ignore_header> p{file_name};
|
||||||
```
|
```
|
||||||
The fields with which the parser works with can be modified at any given time. The praser can also check if a field is present within the header by using the **`field_exists`** method.
|
The fields with which the parser works with can be modified at any given time. The parser can also check if a field is present within the header by using the **`field_exists`** method.
|
||||||
```cpp
|
```cpp
|
||||||
// ...
|
// ...
|
||||||
ss::parser<ss::throw_on_error> p{"students_with_header.csv"};
|
ss::parser<ss::throw_on_error> p{"students_with_header.csv"};
|
||||||
@@ -158,7 +160,7 @@ while (!p.eof()) {
|
|||||||
|
|
||||||
The alternate example with exceptions disabled will be used to show some of the features of the library. The **`get_next`** method returns a tuple of objects specified inside the template type list.
|
The alternate example with exceptions disabled will be used to show some of the features of the library. The **`get_next`** method returns a tuple of objects specified inside the template type list.
|
||||||
|
|
||||||
If a conversion could not be applied, the method would return a tuple of default constructed objects, and the **`valid`** method would return **`false`**, for example if the third (grade) column in our csv could not be converted to a float the conversion would fail.
|
If a conversion could not be applied, the method would return a tuple of default constructed objects, and the **`valid`** method would return **`false`**, for example if the third (grade) column in our CSV could not be converted to a float the conversion would fail.
|
||||||
|
|
||||||
If **`get_next`** is called with a **`tuple`** as template parameter it would behave identically to passing the same tuple parameters to **`get_next`**:
|
If **`get_next`** is called with a **`tuple`** as template parameter it would behave identically to passing the same tuple parameters to **`get_next`**:
|
||||||
```cpp
|
```cpp
|
||||||
@@ -202,14 +204,27 @@ struct student {
|
|||||||
auto tied() { return std::tie(id, age, grade); }
|
auto tied() { return std::tie(id, age, grade); }
|
||||||
};
|
};
|
||||||
```
|
```
|
||||||
The method can be used to compare the object, serialize it, deserialize it, etc. Now **`get_next`** can accept such a struct and deduce the types to which to convert the csv.
|
The method can be used to compare the object, serialize it, deserialize it, etc. Now **`get_next`** can accept such a struct and deduce the types to which to convert the CSV.
|
||||||
```cpp
|
```cpp
|
||||||
// returns student
|
// returns student
|
||||||
auto s = p.get_next<student>();
|
auto s = p.get_next<student>();
|
||||||
```
|
```
|
||||||
This works with the iteration loop too.
|
This works with the iteration loop too.
|
||||||
*Note, the order in which the members of the tied method are returned must match the order of the elements in the csv*.
|
*Note, the order in which the members of the tied method are returned must match the order of the elements in the CSV*.
|
||||||
|
|
||||||
|
## Buffer mode
|
||||||
|
The parser also works with buffers containing CSV data instead of files. To parse buffer data with the parser simply create the parser by giving it the buffer, as **`const char*`**, and its size. The initial example using a buffer instead of a file would look similar to this:
|
||||||
|
```cpp
|
||||||
|
std::string buffer = "James Bailey,65,2.5\nBrian S. Wolfe,40,1.9\n";
|
||||||
|
|
||||||
|
ss::parser<ss::throw_on_error> p{buffer.c_str(), buffer.size()};
|
||||||
|
|
||||||
|
for (const auto& [id, age, grade] : p.iterate<std::string, int, float>()) {
|
||||||
|
std::cout << id << ' ' << age << ' ' << grade << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
```
|
||||||
## Setup
|
## Setup
|
||||||
By default, many of the features supported by the parser are disabled. They can be enabled within the template parameters of the parser. For example, to enable quoting and escaping the parser would look like:
|
By default, many of the features supported by the parser are disabled. They can be enabled within the template parameters of the parser. For example, to enable quoting and escaping the parser would look like:
|
||||||
```cpp
|
```cpp
|
||||||
@@ -234,14 +249,14 @@ By default, **`,`** is used as the delimiter, a custom delimiter can be specifie
|
|||||||
```cpp
|
```cpp
|
||||||
ss::parser p{file_name, "--"};
|
ss::parser p{file_name, "--"};
|
||||||
```
|
```
|
||||||
*Note, the delimiter can consist of multiple characters but the parser is slightliy faster when using single character delimiters.*
|
*Note, the delimiter can consist of multiple characters but the parser is slightly faster when using single character delimiters.*
|
||||||
|
|
||||||
### Empty lines
|
### Empty lines
|
||||||
Empty lines can be ignored by defining **`ss::ignore_empty`** within the setup parameters:
|
Empty lines can be ignored by defining **`ss::ignore_empty`** within the setup parameters:
|
||||||
```cpp
|
```cpp
|
||||||
ss::parser<ss::ignore_empty> p{file_name};
|
ss::parser<ss::ignore_empty> p{file_name};
|
||||||
```
|
```
|
||||||
If this setup option is not set then reading an empty line will result in an error (unless only one column is present within the csv).
|
If this setup option is not set then reading an empty line will result in an error (unless only one column is present within the CSV).
|
||||||
|
|
||||||
### Quoting
|
### Quoting
|
||||||
Quoting can be enabled by defining **`ss::quote`** within the setup parameters. A single character can be defined as the quoting character, for example to use **`"`** as a quoting character:
|
Quoting can be enabled by defining **`ss::quote`** within the setup parameters. A single character can be defined as the quoting character, for example to use **`"`** as a quoting character:
|
||||||
@@ -290,7 +305,7 @@ Escaping and quoting can be used to leave the space if needed.
|
|||||||
```
|
```
|
||||||
|
|
||||||
### Multiline
|
### Multiline
|
||||||
Multiline can be enabled by defining **`ss::multilne`** within the setup parameters. It enables the possibility to have the new line characters within rows. The new line character needs to be either escaped or within quotes so either **`ss::escape`** or **`ss::quote`** need to be enabled. There is a specific problem when using multiline, for example, if a row had an unterminated quote, the parser would assume it to be a new line within the row, so until another quote is found, it will treat it as one line which is fine usually, but it can cause the whole csv file to be treated as a single line by mistake. To prevent this **`ss::multiline_restricted`** can be used which accepts an unsigned number representing the maximum number of lines which can be allowed as a single multiline. Examples:
|
Multiline can be enabled by defining **`ss::multilne`** within the setup parameters. It enables the possibility to have the new line characters within rows. The new line character needs to be either escaped or within quotes so either **`ss::escape`** or **`ss::quote`** need to be enabled. There is a specific problem when using multiline, for example, if a row had an unterminated quote, the parser would assume it to be a new line within the row, so until another quote is found, it will treat it as one line which is fine usually, but it can cause the whole CSV file to be treated as a single line by mistake. To prevent this **`ss::multiline_restricted`** can be used which accepts an unsigned number representing the maximum number of lines which can be allowed as a single multiline. Examples:
|
||||||
|
|
||||||
```cpp
|
```cpp
|
||||||
ss::parser<ss::multiline, ss::quote<'\"'>, ss::escape<'\\'>> p{file_name};
|
ss::parser<ss::multiline, ss::quote<'\"'>, ss::escape<'\\'>> p{file_name};
|
||||||
@@ -341,7 +356,7 @@ Gates 65 3.3'
|
|||||||
```
|
```
|
||||||
## Special types
|
## Special types
|
||||||
|
|
||||||
Passing **`void`** makes the parser ignore a column. In the initial example **`void`** could be given as the second template parameter to ignore the second (age) column in the csv, a tuple of only 2 parameters would be retuned:
|
Passing **`void`** makes the parser ignore a column. In the initial example **`void`** could be given as the second template parameter to ignore the second (age) column in the CSV, a tuple of only 2 parameters would be retuned:
|
||||||
```cpp
|
```cpp
|
||||||
// returns std::tuple<std::string, float>
|
// returns std::tuple<std::string, float>
|
||||||
auto [id, grade] = p.get_next<std::string, void, float>();
|
auto [id, grade] = p.get_next<std::string, void, float>();
|
||||||
@@ -383,6 +398,12 @@ if (std::holds_alternative<float>(grade)) {
|
|||||||
// grade set as char
|
// grade set as char
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers around the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character.
|
||||||
|
```cpp
|
||||||
|
// returns std::tuple<std::string, ss::uint8, float>
|
||||||
|
auto [id, age, grade] = p.get_next<std::string, ss::uint8, float>();
|
||||||
|
uint8_t age_copy = age;
|
||||||
|
```
|
||||||
## Restrictions
|
## Restrictions
|
||||||
|
|
||||||
Custom **`restrictions`** can be used to narrow down the conversions of unwanted values. **`ss::ir`** (in range) and **`ss::ne`** (none empty) are some of those:
|
Custom **`restrictions`** can be used to narrow down the conversions of unwanted values. **`ss::ir`** (in range) and **`ss::ne`** (none empty) are some of those:
|
||||||
@@ -454,12 +475,13 @@ The **`eof`** method can be used to detect if the end of the file was reached.
|
|||||||
Detailed error messages can be accessed via the **`error_msg`** method, and to enable them **`ss::string_error`** needs to be included in the setup. If **`ss::string_error`** is not defined, the **`error_msg`** method will not be defined either.
|
Detailed error messages can be accessed via the **`error_msg`** method, and to enable them **`ss::string_error`** needs to be included in the setup. If **`ss::string_error`** is not defined, the **`error_msg`** method will not be defined either.
|
||||||
|
|
||||||
The line number can be fetched using the **`line`** method.
|
The line number can be fetched using the **`line`** method.
|
||||||
|
The cursor position can be fetched using the **`position`** method.
|
||||||
```cpp
|
```cpp
|
||||||
const std::string& parser::error_msg();
|
const std::string& parser::error_msg() const;
|
||||||
bool parser::valid();
|
bool parser::valid() const;
|
||||||
bool parser::eof();
|
bool parser::eof() const;
|
||||||
size_t parser::line();
|
size_t parser::line() const;
|
||||||
|
size_t parser::position() const;
|
||||||
|
|
||||||
// ...
|
// ...
|
||||||
ss::parser<ss::string_error> parser;
|
ss::parser<ss::string_error> parser;
|
||||||
@@ -474,7 +496,7 @@ ss::parser<ss::throw_on_error> parser;
|
|||||||
|
|
||||||
## Substitute conversions
|
## Substitute conversions
|
||||||
|
|
||||||
The parser can also be used to effectively parse files whose rows are not always in the same format (not a classical csv but still csv-like). A more complicated example would be the best way to demonstrate such a scenario.\
|
The parser can also be used to effectively parse files whose rows are not always in the same format (not a classical CSV but still CSV-like). A more complicated example would be the best way to demonstrate such a scenario.\
|
||||||
***Important, substitute conversions do not work when throw_on_error is enabled.***
|
***Important, substitute conversions do not work when throw_on_error is enabled.***
|
||||||
|
|
||||||
Supposing we have a file containing different shapes in given formats:
|
Supposing we have a file containing different shapes in given formats:
|
||||||
|
|||||||
@@ -38,45 +38,40 @@ inline void* strict_realloc(void* ptr, size_t size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if __unix__
|
#if __unix__
|
||||||
inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) {
|
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||||
return getline(lineptr, n, stream);
|
return getline(&lineptr, &n, file);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
using ssize_t = intptr_t;
|
using ssize_t = intptr_t;
|
||||||
|
|
||||||
ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
|
ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||||
if (lineptr == nullptr || n == nullptr || fp == nullptr) {
|
|
||||||
errno = EINVAL;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
char buff[get_line_initial_buffer_size];
|
char buff[get_line_initial_buffer_size];
|
||||||
|
|
||||||
if (*lineptr == nullptr || *n < sizeof(buff)) {
|
if (lineptr == nullptr || n < sizeof(buff)) {
|
||||||
size_t new_n = sizeof(buff);
|
size_t new_n = sizeof(buff);
|
||||||
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
|
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||||
*n = new_n;
|
n = new_n;
|
||||||
}
|
}
|
||||||
|
|
||||||
(*lineptr)[0] = '\0';
|
lineptr[0] = '\0';
|
||||||
|
|
||||||
size_t line_used = 0;
|
size_t line_used = 0;
|
||||||
while (std::fgets(buff, sizeof(buff), fp) != nullptr) {
|
while (std::fgets(buff, sizeof(buff), file) != nullptr) {
|
||||||
line_used = std::strlen(*lineptr);
|
line_used = std::strlen(lineptr);
|
||||||
size_t buff_used = std::strlen(buff);
|
size_t buff_used = std::strlen(buff);
|
||||||
|
|
||||||
if (*n <= buff_used + line_used) {
|
if (n <= buff_used + line_used) {
|
||||||
size_t new_n = *n * 2;
|
size_t new_n = n * 2;
|
||||||
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
|
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||||
*n = new_n;
|
n = new_n;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::memcpy(*lineptr + line_used, buff, buff_used);
|
std::memcpy(lineptr + line_used, buff, buff_used);
|
||||||
line_used += buff_used;
|
line_used += buff_used;
|
||||||
(*lineptr)[line_used] = '\0';
|
lineptr[line_used] = '\0';
|
||||||
|
|
||||||
if ((*lineptr)[line_used - 1] == '\n') {
|
if (lineptr[line_used - 1] == '\n') {
|
||||||
return line_used;
|
return line_used;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -86,4 +81,68 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||||
|
const char* const csv_data_buffer, size_t csv_data_size,
|
||||||
|
size_t& curr_char) {
|
||||||
|
if (curr_char >= csv_data_size) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
|
||||||
|
auto new_lineptr = static_cast<char*>(
|
||||||
|
strict_realloc(lineptr, get_line_initial_buffer_size));
|
||||||
|
lineptr = new_lineptr;
|
||||||
|
n = get_line_initial_buffer_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t line_used = 0;
|
||||||
|
while (curr_char < csv_data_size) {
|
||||||
|
if (line_used + 1 >= n) {
|
||||||
|
size_t new_n = n * 2;
|
||||||
|
|
||||||
|
char* new_lineptr =
|
||||||
|
static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||||
|
n = new_n;
|
||||||
|
lineptr = new_lineptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto c = csv_data_buffer[curr_char++];
|
||||||
|
lineptr[line_used++] = c;
|
||||||
|
if (c == '\n') {
|
||||||
|
lineptr[line_used] = '\0';
|
||||||
|
return line_used;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line_used != 0) {
|
||||||
|
lineptr[line_used] = '\0';
|
||||||
|
return line_used;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||||
|
FILE* file,
|
||||||
|
const char* const csv_data_buffer,
|
||||||
|
size_t csv_data_size, size_t& curr_char) {
|
||||||
|
ssize_t ssize;
|
||||||
|
if (file) {
|
||||||
|
ssize = get_line_file(buffer, buffer_size, file);
|
||||||
|
curr_char += ssize;
|
||||||
|
} else {
|
||||||
|
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
|
||||||
|
csv_data_size, curr_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ssize == -1) {
|
||||||
|
if (errno == ENOMEM) {
|
||||||
|
throw std::bad_alloc{};
|
||||||
|
}
|
||||||
|
return {ssize, true};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {ssize, false};
|
||||||
|
}
|
||||||
|
|
||||||
} /* ss */
|
} /* ss */
|
||||||
|
|||||||
@@ -150,7 +150,7 @@ public:
|
|||||||
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
|
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
|
||||||
return convert_impl(elems, static_cast<T*>(nullptr));
|
return convert_impl(elems, static_cast<T*>(nullptr));
|
||||||
} else if constexpr (tied_class_v<T, Ts...>) {
|
} else if constexpr (tied_class_v<T, Ts...>) {
|
||||||
using arg_ref_tuple = std::result_of_t<decltype (&T::tied)(T)>;
|
using arg_ref_tuple = std::invoke_result_t<decltype(&T::tied), T>;
|
||||||
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
|
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
|
||||||
|
|
||||||
return to_object<T>(
|
return to_object<T>(
|
||||||
@@ -269,6 +269,7 @@ private:
|
|||||||
|
|
||||||
void handle_error_multiline_limit_reached() {
|
void handle_error_multiline_limit_reached() {
|
||||||
constexpr static auto error_msg = "multiline limit reached";
|
constexpr static auto error_msg = "multiline limit reached";
|
||||||
|
splitter_.unterminated_quote_ = false;
|
||||||
|
|
||||||
if constexpr (string_error) {
|
if constexpr (string_error) {
|
||||||
error_.clear();
|
error_.clear();
|
||||||
|
|||||||
@@ -749,46 +749,9 @@ private:
|
|||||||
reader(const reader& other) = delete;
|
reader(const reader& other) = delete;
|
||||||
reader& operator=(const reader& other) = delete;
|
reader& operator=(const reader& other) = delete;
|
||||||
|
|
||||||
ssize_t get_line_buffer(char** lineptr, size_t* n,
|
|
||||||
const char* const csv_data_buffer,
|
|
||||||
size_t csv_data_size, size_t& curr_char) {
|
|
||||||
if (curr_char >= csv_data_size) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*lineptr == nullptr || *n < get_line_initial_buffer_size) {
|
|
||||||
auto new_lineptr = static_cast<char*>(
|
|
||||||
strict_realloc(*lineptr, get_line_initial_buffer_size));
|
|
||||||
*lineptr = new_lineptr;
|
|
||||||
*n = get_line_initial_buffer_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t line_used = 0;
|
|
||||||
while (curr_char <= csv_data_size) {
|
|
||||||
if (line_used + 1 >= *n) {
|
|
||||||
size_t new_n = *n * 2;
|
|
||||||
|
|
||||||
char* new_lineptr =
|
|
||||||
static_cast<char*>(strict_realloc(*lineptr, new_n));
|
|
||||||
*n = new_n;
|
|
||||||
*lineptr = new_lineptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto c = csv_data_buffer[curr_char++];
|
|
||||||
(*lineptr)[line_used++] = c;
|
|
||||||
if (c == '\n') {
|
|
||||||
(*lineptr)[line_used] = '\0';
|
|
||||||
return line_used;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (line_used != 0) ? line_used : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// read next line each time in order to set eof_
|
// read next line each time in order to set eof_
|
||||||
bool read_next() {
|
bool read_next() {
|
||||||
next_line_converter_.clear_error();
|
next_line_converter_.clear_error();
|
||||||
ssize_t ssize = 0;
|
|
||||||
size_t size = 0;
|
size_t size = 0;
|
||||||
while (size == 0) {
|
while (size == 0) {
|
||||||
++line_number_;
|
++line_number_;
|
||||||
@@ -797,21 +760,11 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
chars_read_ = curr_char_;
|
chars_read_ = curr_char_;
|
||||||
if (file_) {
|
auto [ssize, eof] =
|
||||||
ssize = get_line_file(&next_line_buffer_,
|
get_line(next_line_buffer_, next_line_buffer_size_, file_,
|
||||||
&next_line_buffer_size_, file_);
|
csv_data_buffer_, csv_data_size_, curr_char_);
|
||||||
curr_char_ = std::ftell(file_);
|
|
||||||
} else {
|
|
||||||
ssize = get_line_buffer(&next_line_buffer_,
|
|
||||||
&next_line_buffer_size_,
|
|
||||||
csv_data_buffer_, csv_data_size_,
|
|
||||||
curr_char_);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ssize == -1) {
|
if (eof) {
|
||||||
if (errno == ENOMEM) {
|
|
||||||
throw std::bad_alloc{};
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -836,7 +789,8 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||||
next_line_size_)) {
|
next_line_size_,
|
||||||
|
next_line_buffer_size_)) {
|
||||||
next_line_converter_.handle_error_unterminated_escape();
|
next_line_converter_.handle_error_unterminated_escape();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -854,7 +808,8 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||||
next_line_size_)) {
|
next_line_size_,
|
||||||
|
next_line_buffer_size_)) {
|
||||||
next_line_converter_.handle_error_unterminated_quote();
|
next_line_converter_.handle_error_unterminated_quote();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -865,8 +820,9 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
if (!append_next_line_to_buffer(
|
||||||
next_line_size_)) {
|
next_line_buffer_, next_line_size_,
|
||||||
|
next_line_buffer_size_)) {
|
||||||
next_line_converter_
|
next_line_converter_
|
||||||
.handle_error_unterminated_escape();
|
.handle_error_unterminated_escape();
|
||||||
return;
|
return;
|
||||||
@@ -910,18 +866,20 @@ private:
|
|||||||
return next_line_converter_.unterminated_quote();
|
return next_line_converter_.unterminated_quote();
|
||||||
}
|
}
|
||||||
|
|
||||||
void undo_remove_eol(char* buffer, size_t& string_end) {
|
void undo_remove_eol(char* buffer, size_t& line_size,
|
||||||
if (crlf_) {
|
size_t buffer_size) {
|
||||||
std::copy_n("\r\n\0", 3, buffer + string_end);
|
if (crlf_ && buffer_size >= line_size + 2) {
|
||||||
string_end += 2;
|
std::copy_n("\r\n", 2, buffer + line_size);
|
||||||
} else {
|
line_size += 2;
|
||||||
std::copy_n("\n\0", 2, buffer + string_end);
|
} else if (buffer_size > line_size) {
|
||||||
string_end += 1;
|
std::copy_n("\n", 1, buffer + line_size);
|
||||||
|
line_size += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t remove_eol(char*& buffer, size_t ssize) {
|
size_t remove_eol(char*& buffer, size_t ssize) {
|
||||||
if (buffer[ssize - 1] != '\n') {
|
if (buffer[ssize - 1] != '\n') {
|
||||||
|
crlf_ = false;
|
||||||
return ssize;
|
return ssize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -949,28 +907,23 @@ private:
|
|||||||
first_size += second_size;
|
first_size += second_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool append_next_line_to_buffer(char*& buffer, size_t& size) {
|
bool append_next_line_to_buffer(char*& buffer, size_t& line_size,
|
||||||
undo_remove_eol(buffer, size);
|
size_t buffer_size) {
|
||||||
|
undo_remove_eol(buffer, line_size, buffer_size);
|
||||||
|
|
||||||
ssize_t next_ssize;
|
chars_read_ = curr_char_;
|
||||||
if (file_) {
|
auto [next_ssize, eof] =
|
||||||
next_ssize =
|
get_line(helper_buffer_, helper_buffer_size, file_,
|
||||||
get_line_file(&helper_buffer_, &helper_buffer_size, file_);
|
csv_data_buffer_, csv_data_size_, curr_char_);
|
||||||
} else {
|
|
||||||
next_ssize =
|
|
||||||
get_line_buffer(&helper_buffer_, &helper_buffer_size,
|
|
||||||
csv_data_buffer_, csv_data_size_,
|
|
||||||
curr_char_);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (next_ssize == -1) {
|
if (eof) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
++line_number_;
|
++line_number_;
|
||||||
size_t next_size = remove_eol(helper_buffer_, next_ssize);
|
size_t next_size = remove_eol(helper_buffer_, next_ssize);
|
||||||
realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_,
|
realloc_concat(buffer, line_size, next_line_buffer_size_,
|
||||||
next_size);
|
helper_buffer_, next_size);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ project(
|
|||||||
'cpp_std=c++17',
|
'cpp_std=c++17',
|
||||||
'buildtype=debugoptimized',
|
'buildtype=debugoptimized',
|
||||||
'wrap_mode=forcefallback'],
|
'wrap_mode=forcefallback'],
|
||||||
version: '1.6.2',
|
version: '1.7.2',
|
||||||
meson_version:'>=0.54.0')
|
meson_version:'>=0.54.0')
|
||||||
|
|
||||||
fast_float_dep = dependency('fast_float')
|
fast_float_dep = dependency('fast_float')
|
||||||
|
|||||||
211
ssp.hpp
211
ssp.hpp
@@ -650,45 +650,40 @@ inline void* strict_realloc(void* ptr, size_t size) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if __unix__
|
#if __unix__
|
||||||
inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) {
|
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||||
return getline(lineptr, n, stream);
|
return getline(&lineptr, &n, file);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
||||||
using ssize_t = intptr_t;
|
using ssize_t = intptr_t;
|
||||||
|
|
||||||
ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
|
ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||||
if (lineptr == nullptr || n == nullptr || fp == nullptr) {
|
|
||||||
errno = EINVAL;
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
char buff[get_line_initial_buffer_size];
|
char buff[get_line_initial_buffer_size];
|
||||||
|
|
||||||
if (*lineptr == nullptr || *n < sizeof(buff)) {
|
if (lineptr == nullptr || n < sizeof(buff)) {
|
||||||
size_t new_n = sizeof(buff);
|
size_t new_n = sizeof(buff);
|
||||||
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
|
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||||
*n = new_n;
|
n = new_n;
|
||||||
}
|
}
|
||||||
|
|
||||||
(*lineptr)[0] = '\0';
|
lineptr[0] = '\0';
|
||||||
|
|
||||||
size_t line_used = 0;
|
size_t line_used = 0;
|
||||||
while (std::fgets(buff, sizeof(buff), fp) != nullptr) {
|
while (std::fgets(buff, sizeof(buff), file) != nullptr) {
|
||||||
line_used = std::strlen(*lineptr);
|
line_used = std::strlen(lineptr);
|
||||||
size_t buff_used = std::strlen(buff);
|
size_t buff_used = std::strlen(buff);
|
||||||
|
|
||||||
if (*n <= buff_used + line_used) {
|
if (n <= buff_used + line_used) {
|
||||||
size_t new_n = *n * 2;
|
size_t new_n = n * 2;
|
||||||
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
|
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||||
*n = new_n;
|
n = new_n;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::memcpy(*lineptr + line_used, buff, buff_used);
|
std::memcpy(lineptr + line_used, buff, buff_used);
|
||||||
line_used += buff_used;
|
line_used += buff_used;
|
||||||
(*lineptr)[line_used] = '\0';
|
lineptr[line_used] = '\0';
|
||||||
|
|
||||||
if ((*lineptr)[line_used - 1] == '\n') {
|
if (lineptr[line_used - 1] == '\n') {
|
||||||
return line_used;
|
return line_used;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -698,6 +693,70 @@ ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
|
|||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||||
|
const char* const csv_data_buffer, size_t csv_data_size,
|
||||||
|
size_t& curr_char) {
|
||||||
|
if (curr_char >= csv_data_size) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
|
||||||
|
auto new_lineptr = static_cast<char*>(
|
||||||
|
strict_realloc(lineptr, get_line_initial_buffer_size));
|
||||||
|
lineptr = new_lineptr;
|
||||||
|
n = get_line_initial_buffer_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t line_used = 0;
|
||||||
|
while (curr_char < csv_data_size) {
|
||||||
|
if (line_used + 1 >= n) {
|
||||||
|
size_t new_n = n * 2;
|
||||||
|
|
||||||
|
char* new_lineptr =
|
||||||
|
static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||||
|
n = new_n;
|
||||||
|
lineptr = new_lineptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto c = csv_data_buffer[curr_char++];
|
||||||
|
lineptr[line_used++] = c;
|
||||||
|
if (c == '\n') {
|
||||||
|
lineptr[line_used] = '\0';
|
||||||
|
return line_used;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (line_used != 0) {
|
||||||
|
lineptr[line_used] = '\0';
|
||||||
|
return line_used;
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||||
|
FILE* file,
|
||||||
|
const char* const csv_data_buffer,
|
||||||
|
size_t csv_data_size, size_t& curr_char) {
|
||||||
|
ssize_t ssize;
|
||||||
|
if (file) {
|
||||||
|
ssize = get_line_file(buffer, buffer_size, file);
|
||||||
|
curr_char += ssize;
|
||||||
|
} else {
|
||||||
|
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
|
||||||
|
csv_data_size, curr_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ssize == -1) {
|
||||||
|
if (errno == ENOMEM) {
|
||||||
|
throw std::bad_alloc{};
|
||||||
|
}
|
||||||
|
return {ssize, true};
|
||||||
|
}
|
||||||
|
|
||||||
|
return {ssize, false};
|
||||||
|
}
|
||||||
|
|
||||||
} /* ss */
|
} /* ss */
|
||||||
|
|
||||||
namespace ss {
|
namespace ss {
|
||||||
@@ -1843,7 +1902,7 @@ public:
|
|||||||
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
|
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
|
||||||
return convert_impl(elems, static_cast<T*>(nullptr));
|
return convert_impl(elems, static_cast<T*>(nullptr));
|
||||||
} else if constexpr (tied_class_v<T, Ts...>) {
|
} else if constexpr (tied_class_v<T, Ts...>) {
|
||||||
using arg_ref_tuple = std::result_of_t<decltype (&T::tied)(T)>;
|
using arg_ref_tuple = std::invoke_result_t<decltype(&T::tied), T>;
|
||||||
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
|
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
|
||||||
|
|
||||||
return to_object<T>(
|
return to_object<T>(
|
||||||
@@ -1962,6 +2021,7 @@ private:
|
|||||||
|
|
||||||
void handle_error_multiline_limit_reached() {
|
void handle_error_multiline_limit_reached() {
|
||||||
constexpr static auto error_msg = "multiline limit reached";
|
constexpr static auto error_msg = "multiline limit reached";
|
||||||
|
splitter_.unterminated_quote_ = false;
|
||||||
|
|
||||||
if constexpr (string_error) {
|
if constexpr (string_error) {
|
||||||
error_.clear();
|
error_.clear();
|
||||||
@@ -2925,46 +2985,9 @@ private:
|
|||||||
reader(const reader& other) = delete;
|
reader(const reader& other) = delete;
|
||||||
reader& operator=(const reader& other) = delete;
|
reader& operator=(const reader& other) = delete;
|
||||||
|
|
||||||
ssize_t get_line_buffer(char** lineptr, size_t* n,
|
|
||||||
const char* const csv_data_buffer,
|
|
||||||
size_t csv_data_size, size_t& curr_char) {
|
|
||||||
if (curr_char >= csv_data_size) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*lineptr == nullptr || *n < get_line_initial_buffer_size) {
|
|
||||||
auto new_lineptr = static_cast<char*>(
|
|
||||||
strict_realloc(*lineptr, get_line_initial_buffer_size));
|
|
||||||
*lineptr = new_lineptr;
|
|
||||||
*n = get_line_initial_buffer_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t line_used = 0;
|
|
||||||
while (curr_char <= csv_data_size) {
|
|
||||||
if (line_used + 1 >= *n) {
|
|
||||||
size_t new_n = *n * 2;
|
|
||||||
|
|
||||||
char* new_lineptr =
|
|
||||||
static_cast<char*>(strict_realloc(*lineptr, new_n));
|
|
||||||
*n = new_n;
|
|
||||||
*lineptr = new_lineptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto c = csv_data_buffer[curr_char++];
|
|
||||||
(*lineptr)[line_used++] = c;
|
|
||||||
if (c == '\n') {
|
|
||||||
(*lineptr)[line_used] = '\0';
|
|
||||||
return line_used;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (line_used != 0) ? line_used : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// read next line each time in order to set eof_
|
// read next line each time in order to set eof_
|
||||||
bool read_next() {
|
bool read_next() {
|
||||||
next_line_converter_.clear_error();
|
next_line_converter_.clear_error();
|
||||||
ssize_t ssize = 0;
|
|
||||||
size_t size = 0;
|
size_t size = 0;
|
||||||
while (size == 0) {
|
while (size == 0) {
|
||||||
++line_number_;
|
++line_number_;
|
||||||
@@ -2973,21 +2996,11 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
chars_read_ = curr_char_;
|
chars_read_ = curr_char_;
|
||||||
if (file_) {
|
auto [ssize, eof] =
|
||||||
ssize = get_line_file(&next_line_buffer_,
|
get_line(next_line_buffer_, next_line_buffer_size_, file_,
|
||||||
&next_line_buffer_size_, file_);
|
csv_data_buffer_, csv_data_size_, curr_char_);
|
||||||
curr_char_ = std::ftell(file_);
|
|
||||||
} else {
|
|
||||||
ssize = get_line_buffer(&next_line_buffer_,
|
|
||||||
&next_line_buffer_size_,
|
|
||||||
csv_data_buffer_, csv_data_size_,
|
|
||||||
curr_char_);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ssize == -1) {
|
if (eof) {
|
||||||
if (errno == ENOMEM) {
|
|
||||||
throw std::bad_alloc{};
|
|
||||||
}
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3012,7 +3025,8 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||||
next_line_size_)) {
|
next_line_size_,
|
||||||
|
next_line_buffer_size_)) {
|
||||||
next_line_converter_.handle_error_unterminated_escape();
|
next_line_converter_.handle_error_unterminated_escape();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -3030,7 +3044,8 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||||
next_line_size_)) {
|
next_line_size_,
|
||||||
|
next_line_buffer_size_)) {
|
||||||
next_line_converter_.handle_error_unterminated_quote();
|
next_line_converter_.handle_error_unterminated_quote();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -3041,8 +3056,9 @@ private:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
if (!append_next_line_to_buffer(
|
||||||
next_line_size_)) {
|
next_line_buffer_, next_line_size_,
|
||||||
|
next_line_buffer_size_)) {
|
||||||
next_line_converter_
|
next_line_converter_
|
||||||
.handle_error_unterminated_escape();
|
.handle_error_unterminated_escape();
|
||||||
return;
|
return;
|
||||||
@@ -3086,18 +3102,20 @@ private:
|
|||||||
return next_line_converter_.unterminated_quote();
|
return next_line_converter_.unterminated_quote();
|
||||||
}
|
}
|
||||||
|
|
||||||
void undo_remove_eol(char* buffer, size_t& string_end) {
|
void undo_remove_eol(char* buffer, size_t& line_size,
|
||||||
if (crlf_) {
|
size_t buffer_size) {
|
||||||
std::copy_n("\r\n\0", 3, buffer + string_end);
|
if (crlf_ && buffer_size >= line_size + 2) {
|
||||||
string_end += 2;
|
std::copy_n("\r\n", 2, buffer + line_size);
|
||||||
} else {
|
line_size += 2;
|
||||||
std::copy_n("\n\0", 2, buffer + string_end);
|
} else if (buffer_size > line_size) {
|
||||||
string_end += 1;
|
std::copy_n("\n", 1, buffer + line_size);
|
||||||
|
line_size += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t remove_eol(char*& buffer, size_t ssize) {
|
size_t remove_eol(char*& buffer, size_t ssize) {
|
||||||
if (buffer[ssize - 1] != '\n') {
|
if (buffer[ssize - 1] != '\n') {
|
||||||
|
crlf_ = false;
|
||||||
return ssize;
|
return ssize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3125,28 +3143,23 @@ private:
|
|||||||
first_size += second_size;
|
first_size += second_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool append_next_line_to_buffer(char*& buffer, size_t& size) {
|
bool append_next_line_to_buffer(char*& buffer, size_t& line_size,
|
||||||
undo_remove_eol(buffer, size);
|
size_t buffer_size) {
|
||||||
|
undo_remove_eol(buffer, line_size, buffer_size);
|
||||||
|
|
||||||
ssize_t next_ssize;
|
chars_read_ = curr_char_;
|
||||||
if (file_) {
|
auto [next_ssize, eof] =
|
||||||
next_ssize =
|
get_line(helper_buffer_, helper_buffer_size, file_,
|
||||||
get_line_file(&helper_buffer_, &helper_buffer_size, file_);
|
csv_data_buffer_, csv_data_size_, curr_char_);
|
||||||
} else {
|
|
||||||
next_ssize =
|
|
||||||
get_line_buffer(&helper_buffer_, &helper_buffer_size,
|
|
||||||
csv_data_buffer_, csv_data_size_,
|
|
||||||
curr_char_);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (next_ssize == -1) {
|
if (eof) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
++line_number_;
|
++line_number_;
|
||||||
size_t next_size = remove_eol(helper_buffer_, next_ssize);
|
size_t next_size = remove_eol(helper_buffer_, next_ssize);
|
||||||
realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_,
|
realloc_concat(buffer, line_size, next_line_buffer_size_,
|
||||||
next_size);
|
helper_buffer_, next_size);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ TEST_CASE_TEMPLATE("converter test valid conversions", T, int, ss::uint8) {
|
|||||||
c.convert<void, std::variant<T, double>, double>("junk;5;6.6", ";");
|
c.convert<void, std::variant<T, double>, double>("junk;5;6.6", ";");
|
||||||
REQUIRE(c.valid());
|
REQUIRE(c.valid());
|
||||||
REQUIRE(std::holds_alternative<T>(std::get<0>(tup)));
|
REQUIRE(std::holds_alternative<T>(std::get<0>(tup)));
|
||||||
CHECK_EQ(tup, std::make_tuple(std::variant<T, double>{5}, 6.6));
|
CHECK_EQ(tup, std::make_tuple(std::variant<T, double>{T(5)}, 6.6));
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto tup =
|
auto tup =
|
||||||
@@ -248,7 +248,7 @@ TEST_CASE_TEMPLATE("converter test valid conversions with exceptions", T, int,
|
|||||||
c.convert<void, std::variant<T, double>, double>("junk;5;6.6", ";");
|
c.convert<void, std::variant<T, double>, double>("junk;5;6.6", ";");
|
||||||
REQUIRE(c.valid());
|
REQUIRE(c.valid());
|
||||||
REQUIRE(std::holds_alternative<T>(std::get<0>(tup)));
|
REQUIRE(std::holds_alternative<T>(std::get<0>(tup)));
|
||||||
CHECK_EQ(tup, std::make_tuple(std::variant<T, double>{5}, 6.6));
|
CHECK_EQ(tup, std::make_tuple(std::variant<T, double>{T(5)}, 6.6));
|
||||||
} catch (ss::exception& e) {
|
} catch (ss::exception& e) {
|
||||||
FAIL(std::string{e.what()});
|
FAIL(std::string{e.what()});
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,15 +2,31 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <ss/extract.hpp>
|
#include <ss/extract.hpp>
|
||||||
|
|
||||||
template <typename T>
|
namespace {
|
||||||
struct std::numeric_limits<ss::numeric_wrapper<T>>
|
|
||||||
: public std::numeric_limits<T> {};
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct std::is_signed<ss::numeric_wrapper<T>> : public std::is_signed<T> {};
|
struct numeric_limits : public std::numeric_limits<T> {};
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
struct std::is_unsigned<ss::numeric_wrapper<T>> : public std::is_unsigned<T> {};
|
struct numeric_limits<ss::numeric_wrapper<T>> : public std::numeric_limits<T> {
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct is_signed : public std::is_signed<T> {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct is_signed<ss::int8> : public std::true_type {};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
struct is_unsigned : public std::is_unsigned<T> {};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct is_unsigned<ss::uint8> : public std::true_type {};
|
||||||
|
|
||||||
|
} /* namespace */
|
||||||
|
|
||||||
|
static_assert(is_signed<ss::int8>::value);
|
||||||
|
static_assert(is_unsigned<ss::uint8>::value);
|
||||||
|
|
||||||
TEST_CASE("testing extract functions for floating point values") {
|
TEST_CASE("testing extract functions for floating point values") {
|
||||||
CHECK_FLOATING_CONVERSION(123.456, float);
|
CHECK_FLOATING_CONVERSION(123.456, float);
|
||||||
@@ -38,7 +54,7 @@ TEST_CASE("testing extract functions for floating point values") {
|
|||||||
CHECK_EQ(value, type(input)); \
|
CHECK_EQ(value, type(input)); \
|
||||||
} \
|
} \
|
||||||
/* check negative too */ \
|
/* check negative too */ \
|
||||||
if (std::is_signed_v<type>) { \
|
if (is_signed<type>::value) { \
|
||||||
std::string s = std::string("-") + #input; \
|
std::string s = std::string("-") + #input; \
|
||||||
type value; \
|
type value; \
|
||||||
bool valid = ss::extract(s.c_str(), s.c_str() + s.size(), value); \
|
bool valid = ss::extract(s.c_str(), s.c_str() + s.size(), value); \
|
||||||
@@ -89,7 +105,7 @@ TEST_CASE_TEMPLATE(
|
|||||||
"extract test functions for numbers with out of range inputs", T, short, us,
|
"extract test functions for numbers with out of range inputs", T, short, us,
|
||||||
int, ui, long, ul, ll, ull, ss::uint8) {
|
int, ui, long, ul, ll, ull, ss::uint8) {
|
||||||
{
|
{
|
||||||
std::string s = std::to_string(std::numeric_limits<T>::max());
|
std::string s = std::to_string(numeric_limits<T>::max());
|
||||||
auto t = ss::to_num<T>(s.c_str(), s.c_str() + s.size());
|
auto t = ss::to_num<T>(s.c_str(), s.c_str() + s.size());
|
||||||
CHECK(t.has_value());
|
CHECK(t.has_value());
|
||||||
for (auto& i : s) {
|
for (auto& i : s) {
|
||||||
@@ -102,14 +118,14 @@ TEST_CASE_TEMPLATE(
|
|||||||
CHECK_FALSE(t.has_value());
|
CHECK_FALSE(t.has_value());
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
std::string s = std::to_string(std::numeric_limits<T>::min());
|
std::string s = std::to_string(numeric_limits<T>::min());
|
||||||
auto t = ss::to_num<T>(s.c_str(), s.c_str() + s.size());
|
auto t = ss::to_num<T>(s.c_str(), s.c_str() + s.size());
|
||||||
CHECK(t.has_value());
|
CHECK(t.has_value());
|
||||||
for (auto& i : s) {
|
for (auto& i : s) {
|
||||||
if (std::is_signed_v<T> && i != '9' && i != '.') {
|
if (is_signed<T>::value && i != '9' && i != '.') {
|
||||||
i = '9';
|
i = '9';
|
||||||
break;
|
break;
|
||||||
} else if (std::is_unsigned_v<T>) {
|
} else if (is_unsigned<T>::value) {
|
||||||
s = "-1";
|
s = "-1";
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,13 +16,14 @@ TEST_CASE_TEMPLATE("test multiline restricted", T, ParserOptionCombinations) {
|
|||||||
out << "5,6,just\\\n\\\nstrings" << std::endl;
|
out << "5,6,just\\\n\\\nstrings" << std::endl;
|
||||||
#endif
|
#endif
|
||||||
out << "7,8,ju\\\n\\\n\\\nnk" << std::endl;
|
out << "7,8,ju\\\n\\\n\\\nnk" << std::endl;
|
||||||
|
out << "99,100,\"\n\n\n\n" << std::endl;
|
||||||
out << "9,10,\"just\\\n\nstrings\"" << std::endl;
|
out << "9,10,\"just\\\n\nstrings\"" << std::endl;
|
||||||
out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl;
|
out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl;
|
||||||
out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl;
|
out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl;
|
||||||
out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl;
|
out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl;
|
||||||
out << "19,20,just strings" << std::endl;
|
out << "19,20,just strings" << std::endl;
|
||||||
}
|
}
|
||||||
auto bad_lines = 15;
|
auto bad_lines = 20;
|
||||||
auto num_errors = 0;
|
auto num_errors = 0;
|
||||||
|
|
||||||
auto [p, _] =
|
auto [p, _] =
|
||||||
|
|||||||
Reference in New Issue
Block a user