mirror of
https://github.com/red0124/ssp.git
synced 2025-06-08 05:12:30 +02:00
Compare commits
No commits in common. "67ef6651c156a04574be5408de760b730d2ed607" and "126329608cda1e026f4c267205180d6e725955bc" have entirely different histories.
67ef6651c1
...
126329608c
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14)
|
||||
|
||||
project(
|
||||
ssp
|
||||
VERSION 1.7.1
|
||||
VERSION 1.7.0
|
||||
DESCRIPTION "csv parser"
|
||||
HOMEPAGE_URL "https://github.com/red0124/ssp"
|
||||
LANGUAGES CXX
|
||||
|
10
README.md
10
README.md
@ -73,7 +73,7 @@ Bill (Heath) Gates 65 3.3
|
||||
|
||||
# Single header
|
||||
|
||||
The library can be used with a single header file **`ssp.hpp`**, but it suffers a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file.
|
||||
The library can be used with a single header file **`ssp.hpp`**, but it sufferes a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file.
|
||||
|
||||
# Installation
|
||||
|
||||
@ -115,11 +115,11 @@ James Bailey 2.5
|
||||
Brian S. Wolfe 1.9
|
||||
Bill (Heath) Gates 3.3
|
||||
```
|
||||
The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** method after the parser has been constructed.
|
||||
The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** metod after the parser has been constructed.
|
||||
```cpp
|
||||
ss::parser<ss::ignore_header> p{file_name};
|
||||
```
|
||||
The fields with which the parser works with can be modified at any given time. The parser can also check if a field is present within the header by using the **`field_exists`** method.
|
||||
The fields with which the parser works with can be modified at any given time. The praser can also check if a field is present within the header by using the **`field_exists`** method.
|
||||
```cpp
|
||||
// ...
|
||||
ss::parser<ss::throw_on_error> p{"students_with_header.csv"};
|
||||
@ -249,7 +249,7 @@ By default, **`,`** is used as the delimiter, a custom delimiter can be specifie
|
||||
```cpp
|
||||
ss::parser p{file_name, "--"};
|
||||
```
|
||||
*Note, the delimiter can consist of multiple characters but the parser is slightly faster when using single character delimiters.*
|
||||
*Note, the delimiter can consist of multiple characters but the parser is slightliy faster when using single character delimiters.*
|
||||
|
||||
### Empty lines
|
||||
Empty lines can be ignored by defining **`ss::ignore_empty`** within the setup parameters:
|
||||
@ -398,7 +398,7 @@ if (std::holds_alternative<float>(grade)) {
|
||||
// grade set as char
|
||||
}
|
||||
```
|
||||
Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers around the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character.
|
||||
Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers arround the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character.
|
||||
```cpp
|
||||
// returns std::tuple<std::string, ss::uint8, float>
|
||||
auto [id, age, grade] = p.get_next<std::string, ss::uint8, float>();
|
||||
|
@ -38,40 +38,45 @@ inline void* strict_realloc(void* ptr, size_t size) {
|
||||
}
|
||||
|
||||
#if __unix__
|
||||
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
return getline(&lineptr, &n, file);
|
||||
inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) {
|
||||
return getline(lineptr, n, stream);
|
||||
}
|
||||
#else
|
||||
|
||||
using ssize_t = intptr_t;
|
||||
|
||||
ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
char buff[get_line_initial_buffer_size];
|
||||
|
||||
if (lineptr == nullptr || n < sizeof(buff)) {
|
||||
size_t new_n = sizeof(buff);
|
||||
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||
n = new_n;
|
||||
ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
|
||||
if (lineptr == nullptr || n == nullptr || fp == nullptr) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
lineptr[0] = '\0';
|
||||
char buff[get_line_initial_buffer_size];
|
||||
|
||||
if (*lineptr == nullptr || *n < sizeof(buff)) {
|
||||
size_t new_n = sizeof(buff);
|
||||
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
|
||||
*n = new_n;
|
||||
}
|
||||
|
||||
(*lineptr)[0] = '\0';
|
||||
|
||||
size_t line_used = 0;
|
||||
while (std::fgets(buff, sizeof(buff), file) != nullptr) {
|
||||
line_used = std::strlen(lineptr);
|
||||
while (std::fgets(buff, sizeof(buff), fp) != nullptr) {
|
||||
line_used = std::strlen(*lineptr);
|
||||
size_t buff_used = std::strlen(buff);
|
||||
|
||||
if (n <= buff_used + line_used) {
|
||||
size_t new_n = n * 2;
|
||||
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||
n = new_n;
|
||||
if (*n <= buff_used + line_used) {
|
||||
size_t new_n = *n * 2;
|
||||
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
|
||||
*n = new_n;
|
||||
}
|
||||
|
||||
std::memcpy(lineptr + line_used, buff, buff_used);
|
||||
std::memcpy(*lineptr + line_used, buff, buff_used);
|
||||
line_used += buff_used;
|
||||
lineptr[line_used] = '\0';
|
||||
(*lineptr)[line_used] = '\0';
|
||||
|
||||
if (lineptr[line_used - 1] == '\n') {
|
||||
if ((*lineptr)[line_used - 1] == '\n') {
|
||||
return line_used;
|
||||
}
|
||||
}
|
||||
@ -81,68 +86,4 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
|
||||
#endif
|
||||
|
||||
ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||
const char* const csv_data_buffer, size_t csv_data_size,
|
||||
size_t& curr_char) {
|
||||
if (curr_char >= csv_data_size) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
|
||||
auto new_lineptr = static_cast<char*>(
|
||||
strict_realloc(lineptr, get_line_initial_buffer_size));
|
||||
lineptr = new_lineptr;
|
||||
n = get_line_initial_buffer_size;
|
||||
}
|
||||
|
||||
size_t line_used = 0;
|
||||
while (curr_char < csv_data_size) {
|
||||
if (line_used + 1 >= n) {
|
||||
size_t new_n = n * 2;
|
||||
|
||||
char* new_lineptr =
|
||||
static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||
n = new_n;
|
||||
lineptr = new_lineptr;
|
||||
}
|
||||
|
||||
auto c = csv_data_buffer[curr_char++];
|
||||
lineptr[line_used++] = c;
|
||||
if (c == '\n') {
|
||||
lineptr[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
}
|
||||
|
||||
if (line_used != 0) {
|
||||
lineptr[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||
FILE* file,
|
||||
const char* const csv_data_buffer,
|
||||
size_t csv_data_size, size_t& curr_char) {
|
||||
ssize_t ssize;
|
||||
if (file) {
|
||||
ssize = get_line_file(buffer, buffer_size, file);
|
||||
curr_char = std::ftell(file);
|
||||
} else {
|
||||
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
|
||||
csv_data_size, curr_char);
|
||||
}
|
||||
|
||||
if (ssize == -1) {
|
||||
if (errno == ENOMEM) {
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
return {ssize, true};
|
||||
}
|
||||
|
||||
return {ssize, false};
|
||||
}
|
||||
|
||||
} /* ss */
|
||||
|
@ -150,7 +150,7 @@ public:
|
||||
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
|
||||
return convert_impl(elems, static_cast<T*>(nullptr));
|
||||
} else if constexpr (tied_class_v<T, Ts...>) {
|
||||
using arg_ref_tuple = std::invoke_result_t<decltype(&T::tied), T>;
|
||||
using arg_ref_tuple = std::result_of_t<decltype (&T::tied)(T)>;
|
||||
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
|
||||
|
||||
return to_object<T>(
|
||||
@ -269,7 +269,6 @@ private:
|
||||
|
||||
void handle_error_multiline_limit_reached() {
|
||||
constexpr static auto error_msg = "multiline limit reached";
|
||||
splitter_.unterminated_quote_ = false;
|
||||
|
||||
if constexpr (string_error) {
|
||||
error_.clear();
|
||||
|
@ -749,9 +749,46 @@ private:
|
||||
reader(const reader& other) = delete;
|
||||
reader& operator=(const reader& other) = delete;
|
||||
|
||||
ssize_t get_line_buffer(char** lineptr, size_t* n,
|
||||
const char* const csv_data_buffer,
|
||||
size_t csv_data_size, size_t& curr_char) {
|
||||
if (curr_char >= csv_data_size) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (*lineptr == nullptr || *n < get_line_initial_buffer_size) {
|
||||
auto new_lineptr = static_cast<char*>(
|
||||
strict_realloc(*lineptr, get_line_initial_buffer_size));
|
||||
*lineptr = new_lineptr;
|
||||
*n = get_line_initial_buffer_size;
|
||||
}
|
||||
|
||||
size_t line_used = 0;
|
||||
while (curr_char <= csv_data_size) {
|
||||
if (line_used + 1 >= *n) {
|
||||
size_t new_n = *n * 2;
|
||||
|
||||
char* new_lineptr =
|
||||
static_cast<char*>(strict_realloc(*lineptr, new_n));
|
||||
*n = new_n;
|
||||
*lineptr = new_lineptr;
|
||||
}
|
||||
|
||||
auto c = csv_data_buffer[curr_char++];
|
||||
(*lineptr)[line_used++] = c;
|
||||
if (c == '\n') {
|
||||
(*lineptr)[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
}
|
||||
|
||||
return (line_used != 0) ? line_used : -1;
|
||||
}
|
||||
|
||||
// read next line each time in order to set eof_
|
||||
bool read_next() {
|
||||
next_line_converter_.clear_error();
|
||||
ssize_t ssize = 0;
|
||||
size_t size = 0;
|
||||
while (size == 0) {
|
||||
++line_number_;
|
||||
@ -760,11 +797,21 @@ private:
|
||||
}
|
||||
|
||||
chars_read_ = curr_char_;
|
||||
auto [ssize, eof] =
|
||||
get_line(next_line_buffer_, next_line_buffer_size_, file_,
|
||||
csv_data_buffer_, csv_data_size_, curr_char_);
|
||||
if (file_) {
|
||||
ssize = get_line_file(&next_line_buffer_,
|
||||
&next_line_buffer_size_, file_);
|
||||
curr_char_ = std::ftell(file_);
|
||||
} else {
|
||||
ssize = get_line_buffer(&next_line_buffer_,
|
||||
&next_line_buffer_size_,
|
||||
csv_data_buffer_, csv_data_size_,
|
||||
curr_char_);
|
||||
}
|
||||
|
||||
if (eof) {
|
||||
if (ssize == -1) {
|
||||
if (errno == ENOMEM) {
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -789,8 +836,7 @@ private:
|
||||
}
|
||||
|
||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||
next_line_size_,
|
||||
next_line_buffer_size_)) {
|
||||
next_line_size_)) {
|
||||
next_line_converter_.handle_error_unterminated_escape();
|
||||
return;
|
||||
}
|
||||
@ -808,8 +854,7 @@ private:
|
||||
}
|
||||
|
||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||
next_line_size_,
|
||||
next_line_buffer_size_)) {
|
||||
next_line_size_)) {
|
||||
next_line_converter_.handle_error_unterminated_quote();
|
||||
return;
|
||||
}
|
||||
@ -820,9 +865,8 @@ private:
|
||||
return;
|
||||
}
|
||||
|
||||
if (!append_next_line_to_buffer(
|
||||
next_line_buffer_, next_line_size_,
|
||||
next_line_buffer_size_)) {
|
||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||
next_line_size_)) {
|
||||
next_line_converter_
|
||||
.handle_error_unterminated_escape();
|
||||
return;
|
||||
@ -866,20 +910,18 @@ private:
|
||||
return next_line_converter_.unterminated_quote();
|
||||
}
|
||||
|
||||
void undo_remove_eol(char* buffer, size_t& line_size,
|
||||
size_t buffer_size) {
|
||||
if (crlf_ && buffer_size >= line_size + 2) {
|
||||
std::copy_n("\r\n", 2, buffer + line_size);
|
||||
line_size += 2;
|
||||
} else if (buffer_size > line_size) {
|
||||
std::copy_n("\n", 1, buffer + line_size);
|
||||
line_size += 1;
|
||||
void undo_remove_eol(char* buffer, size_t& string_end) {
|
||||
if (crlf_) {
|
||||
std::copy_n("\r\n\0", 3, buffer + string_end);
|
||||
string_end += 2;
|
||||
} else {
|
||||
std::copy_n("\n\0", 2, buffer + string_end);
|
||||
string_end += 1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remove_eol(char*& buffer, size_t ssize) {
|
||||
if (buffer[ssize - 1] != '\n') {
|
||||
crlf_ = false;
|
||||
return ssize;
|
||||
}
|
||||
|
||||
@ -907,23 +949,28 @@ private:
|
||||
first_size += second_size;
|
||||
}
|
||||
|
||||
bool append_next_line_to_buffer(char*& buffer, size_t& line_size,
|
||||
size_t buffer_size) {
|
||||
undo_remove_eol(buffer, line_size, buffer_size);
|
||||
bool append_next_line_to_buffer(char*& buffer, size_t& size) {
|
||||
undo_remove_eol(buffer, size);
|
||||
|
||||
chars_read_ = curr_char_;
|
||||
auto [next_ssize, eof] =
|
||||
get_line(helper_buffer_, helper_buffer_size, file_,
|
||||
csv_data_buffer_, csv_data_size_, curr_char_);
|
||||
ssize_t next_ssize;
|
||||
if (file_) {
|
||||
next_ssize =
|
||||
get_line_file(&helper_buffer_, &helper_buffer_size, file_);
|
||||
} else {
|
||||
next_ssize =
|
||||
get_line_buffer(&helper_buffer_, &helper_buffer_size,
|
||||
csv_data_buffer_, csv_data_size_,
|
||||
curr_char_);
|
||||
}
|
||||
|
||||
if (eof) {
|
||||
if (next_ssize == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
++line_number_;
|
||||
size_t next_size = remove_eol(helper_buffer_, next_ssize);
|
||||
realloc_concat(buffer, line_size, next_line_buffer_size_,
|
||||
helper_buffer_, next_size);
|
||||
realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_,
|
||||
next_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,7 @@ project(
|
||||
'cpp_std=c++17',
|
||||
'buildtype=debugoptimized',
|
||||
'wrap_mode=forcefallback'],
|
||||
version: '1.7.1',
|
||||
version: '1.7.0',
|
||||
meson_version:'>=0.54.0')
|
||||
|
||||
fast_float_dep = dependency('fast_float')
|
||||
|
217
ssp.hpp
217
ssp.hpp
@ -650,40 +650,45 @@ inline void* strict_realloc(void* ptr, size_t size) {
|
||||
}
|
||||
|
||||
#if __unix__
|
||||
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
return getline(&lineptr, &n, file);
|
||||
inline ssize_t get_line_file(char** lineptr, size_t* n, FILE* stream) {
|
||||
return getline(lineptr, n, stream);
|
||||
}
|
||||
#else
|
||||
|
||||
using ssize_t = intptr_t;
|
||||
|
||||
ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
char buff[get_line_initial_buffer_size];
|
||||
|
||||
if (lineptr == nullptr || n < sizeof(buff)) {
|
||||
size_t new_n = sizeof(buff);
|
||||
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||
n = new_n;
|
||||
ssize_t get_line_file(char** lineptr, size_t* n, FILE* fp) {
|
||||
if (lineptr == nullptr || n == nullptr || fp == nullptr) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
lineptr[0] = '\0';
|
||||
char buff[get_line_initial_buffer_size];
|
||||
|
||||
if (*lineptr == nullptr || *n < sizeof(buff)) {
|
||||
size_t new_n = sizeof(buff);
|
||||
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
|
||||
*n = new_n;
|
||||
}
|
||||
|
||||
(*lineptr)[0] = '\0';
|
||||
|
||||
size_t line_used = 0;
|
||||
while (std::fgets(buff, sizeof(buff), file) != nullptr) {
|
||||
line_used = std::strlen(lineptr);
|
||||
while (std::fgets(buff, sizeof(buff), fp) != nullptr) {
|
||||
line_used = std::strlen(*lineptr);
|
||||
size_t buff_used = std::strlen(buff);
|
||||
|
||||
if (n <= buff_used + line_used) {
|
||||
size_t new_n = n * 2;
|
||||
lineptr = static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||
n = new_n;
|
||||
if (*n <= buff_used + line_used) {
|
||||
size_t new_n = *n * 2;
|
||||
*lineptr = static_cast<char*>(strict_realloc(*lineptr, new_n));
|
||||
*n = new_n;
|
||||
}
|
||||
|
||||
std::memcpy(lineptr + line_used, buff, buff_used);
|
||||
std::memcpy(*lineptr + line_used, buff, buff_used);
|
||||
line_used += buff_used;
|
||||
lineptr[line_used] = '\0';
|
||||
(*lineptr)[line_used] = '\0';
|
||||
|
||||
if (lineptr[line_used - 1] == '\n') {
|
||||
if ((*lineptr)[line_used - 1] == '\n') {
|
||||
return line_used;
|
||||
}
|
||||
}
|
||||
@ -693,70 +698,6 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
|
||||
#endif
|
||||
|
||||
ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||
const char* const csv_data_buffer, size_t csv_data_size,
|
||||
size_t& curr_char) {
|
||||
if (curr_char >= csv_data_size) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (lineptr == nullptr || n < get_line_initial_buffer_size) {
|
||||
auto new_lineptr = static_cast<char*>(
|
||||
strict_realloc(lineptr, get_line_initial_buffer_size));
|
||||
lineptr = new_lineptr;
|
||||
n = get_line_initial_buffer_size;
|
||||
}
|
||||
|
||||
size_t line_used = 0;
|
||||
while (curr_char < csv_data_size) {
|
||||
if (line_used + 1 >= n) {
|
||||
size_t new_n = n * 2;
|
||||
|
||||
char* new_lineptr =
|
||||
static_cast<char*>(strict_realloc(lineptr, new_n));
|
||||
n = new_n;
|
||||
lineptr = new_lineptr;
|
||||
}
|
||||
|
||||
auto c = csv_data_buffer[curr_char++];
|
||||
lineptr[line_used++] = c;
|
||||
if (c == '\n') {
|
||||
lineptr[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
}
|
||||
|
||||
if (line_used != 0) {
|
||||
lineptr[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||
FILE* file,
|
||||
const char* const csv_data_buffer,
|
||||
size_t csv_data_size, size_t& curr_char) {
|
||||
ssize_t ssize;
|
||||
if (file) {
|
||||
ssize = get_line_file(buffer, buffer_size, file);
|
||||
curr_char = std::ftell(file);
|
||||
} else {
|
||||
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
|
||||
csv_data_size, curr_char);
|
||||
}
|
||||
|
||||
if (ssize == -1) {
|
||||
if (errno == ENOMEM) {
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
return {ssize, true};
|
||||
}
|
||||
|
||||
return {ssize, false};
|
||||
}
|
||||
|
||||
} /* ss */
|
||||
|
||||
namespace ss {
|
||||
@ -1902,7 +1843,7 @@ public:
|
||||
if constexpr (sizeof...(Ts) == 0 && is_instance_of_v<std::tuple, T>) {
|
||||
return convert_impl(elems, static_cast<T*>(nullptr));
|
||||
} else if constexpr (tied_class_v<T, Ts...>) {
|
||||
using arg_ref_tuple = std::invoke_result_t<decltype(&T::tied), T>;
|
||||
using arg_ref_tuple = std::result_of_t<decltype (&T::tied)(T)>;
|
||||
using arg_tuple = apply_trait_t<std::decay, arg_ref_tuple>;
|
||||
|
||||
return to_object<T>(
|
||||
@ -2021,7 +1962,6 @@ private:
|
||||
|
||||
void handle_error_multiline_limit_reached() {
|
||||
constexpr static auto error_msg = "multiline limit reached";
|
||||
splitter_.unterminated_quote_ = false;
|
||||
|
||||
if constexpr (string_error) {
|
||||
error_.clear();
|
||||
@ -2985,9 +2925,46 @@ private:
|
||||
reader(const reader& other) = delete;
|
||||
reader& operator=(const reader& other) = delete;
|
||||
|
||||
ssize_t get_line_buffer(char** lineptr, size_t* n,
|
||||
const char* const csv_data_buffer,
|
||||
size_t csv_data_size, size_t& curr_char) {
|
||||
if (curr_char >= csv_data_size) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (*lineptr == nullptr || *n < get_line_initial_buffer_size) {
|
||||
auto new_lineptr = static_cast<char*>(
|
||||
strict_realloc(*lineptr, get_line_initial_buffer_size));
|
||||
*lineptr = new_lineptr;
|
||||
*n = get_line_initial_buffer_size;
|
||||
}
|
||||
|
||||
size_t line_used = 0;
|
||||
while (curr_char <= csv_data_size) {
|
||||
if (line_used + 1 >= *n) {
|
||||
size_t new_n = *n * 2;
|
||||
|
||||
char* new_lineptr =
|
||||
static_cast<char*>(strict_realloc(*lineptr, new_n));
|
||||
*n = new_n;
|
||||
*lineptr = new_lineptr;
|
||||
}
|
||||
|
||||
auto c = csv_data_buffer[curr_char++];
|
||||
(*lineptr)[line_used++] = c;
|
||||
if (c == '\n') {
|
||||
(*lineptr)[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
}
|
||||
|
||||
return (line_used != 0) ? line_used : -1;
|
||||
}
|
||||
|
||||
// read next line each time in order to set eof_
|
||||
bool read_next() {
|
||||
next_line_converter_.clear_error();
|
||||
ssize_t ssize = 0;
|
||||
size_t size = 0;
|
||||
while (size == 0) {
|
||||
++line_number_;
|
||||
@ -2996,11 +2973,21 @@ private:
|
||||
}
|
||||
|
||||
chars_read_ = curr_char_;
|
||||
auto [ssize, eof] =
|
||||
get_line(next_line_buffer_, next_line_buffer_size_, file_,
|
||||
csv_data_buffer_, csv_data_size_, curr_char_);
|
||||
if (file_) {
|
||||
ssize = get_line_file(&next_line_buffer_,
|
||||
&next_line_buffer_size_, file_);
|
||||
curr_char_ = std::ftell(file_);
|
||||
} else {
|
||||
ssize = get_line_buffer(&next_line_buffer_,
|
||||
&next_line_buffer_size_,
|
||||
csv_data_buffer_, csv_data_size_,
|
||||
curr_char_);
|
||||
}
|
||||
|
||||
if (eof) {
|
||||
if (ssize == -1) {
|
||||
if (errno == ENOMEM) {
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -3025,8 +3012,7 @@ private:
|
||||
}
|
||||
|
||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||
next_line_size_,
|
||||
next_line_buffer_size_)) {
|
||||
next_line_size_)) {
|
||||
next_line_converter_.handle_error_unterminated_escape();
|
||||
return;
|
||||
}
|
||||
@ -3044,8 +3030,7 @@ private:
|
||||
}
|
||||
|
||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||
next_line_size_,
|
||||
next_line_buffer_size_)) {
|
||||
next_line_size_)) {
|
||||
next_line_converter_.handle_error_unterminated_quote();
|
||||
return;
|
||||
}
|
||||
@ -3056,9 +3041,8 @@ private:
|
||||
return;
|
||||
}
|
||||
|
||||
if (!append_next_line_to_buffer(
|
||||
next_line_buffer_, next_line_size_,
|
||||
next_line_buffer_size_)) {
|
||||
if (!append_next_line_to_buffer(next_line_buffer_,
|
||||
next_line_size_)) {
|
||||
next_line_converter_
|
||||
.handle_error_unterminated_escape();
|
||||
return;
|
||||
@ -3102,20 +3086,18 @@ private:
|
||||
return next_line_converter_.unterminated_quote();
|
||||
}
|
||||
|
||||
void undo_remove_eol(char* buffer, size_t& line_size,
|
||||
size_t buffer_size) {
|
||||
if (crlf_ && buffer_size >= line_size + 2) {
|
||||
std::copy_n("\r\n", 2, buffer + line_size);
|
||||
line_size += 2;
|
||||
} else if (buffer_size > line_size) {
|
||||
std::copy_n("\n", 1, buffer + line_size);
|
||||
line_size += 1;
|
||||
void undo_remove_eol(char* buffer, size_t& string_end) {
|
||||
if (crlf_) {
|
||||
std::copy_n("\r\n\0", 3, buffer + string_end);
|
||||
string_end += 2;
|
||||
} else {
|
||||
std::copy_n("\n\0", 2, buffer + string_end);
|
||||
string_end += 1;
|
||||
}
|
||||
}
|
||||
|
||||
size_t remove_eol(char*& buffer, size_t ssize) {
|
||||
if (buffer[ssize - 1] != '\n') {
|
||||
crlf_ = false;
|
||||
return ssize;
|
||||
}
|
||||
|
||||
@ -3143,23 +3125,28 @@ private:
|
||||
first_size += second_size;
|
||||
}
|
||||
|
||||
bool append_next_line_to_buffer(char*& buffer, size_t& line_size,
|
||||
size_t buffer_size) {
|
||||
undo_remove_eol(buffer, line_size, buffer_size);
|
||||
bool append_next_line_to_buffer(char*& buffer, size_t& size) {
|
||||
undo_remove_eol(buffer, size);
|
||||
|
||||
chars_read_ = curr_char_;
|
||||
auto [next_ssize, eof] =
|
||||
get_line(helper_buffer_, helper_buffer_size, file_,
|
||||
csv_data_buffer_, csv_data_size_, curr_char_);
|
||||
ssize_t next_ssize;
|
||||
if (file_) {
|
||||
next_ssize =
|
||||
get_line_file(&helper_buffer_, &helper_buffer_size, file_);
|
||||
} else {
|
||||
next_ssize =
|
||||
get_line_buffer(&helper_buffer_, &helper_buffer_size,
|
||||
csv_data_buffer_, csv_data_size_,
|
||||
curr_char_);
|
||||
}
|
||||
|
||||
if (eof) {
|
||||
if (next_ssize == -1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
++line_number_;
|
||||
size_t next_size = remove_eol(helper_buffer_, next_ssize);
|
||||
realloc_concat(buffer, line_size, next_line_buffer_size_,
|
||||
helper_buffer_, next_size);
|
||||
realloc_concat(buffer, size, next_line_buffer_size_, helper_buffer_,
|
||||
next_size);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -16,14 +16,13 @@ TEST_CASE_TEMPLATE("test multiline restricted", T, ParserOptionCombinations) {
|
||||
out << "5,6,just\\\n\\\nstrings" << std::endl;
|
||||
#endif
|
||||
out << "7,8,ju\\\n\\\n\\\nnk" << std::endl;
|
||||
out << "99,100,\"\n\n\n\n" << std::endl;
|
||||
out << "9,10,\"just\\\n\nstrings\"" << std::endl;
|
||||
out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl;
|
||||
out << "13,14,\"ju\\\n\\\n15,16\"\\\n\\\\\n\nnk\"" << std::endl;
|
||||
out << "17,18,\"ju\\\n\\\n\\\n\\\\\n\nnk\"" << std::endl;
|
||||
out << "19,20,just strings" << std::endl;
|
||||
}
|
||||
auto bad_lines = 20;
|
||||
auto bad_lines = 15;
|
||||
auto num_errors = 0;
|
||||
|
||||
auto [p, _] =
|
||||
|
Loading…
Reference in New Issue
Block a user