mirror of
https://github.com/red0124/ssp.git
synced 2025-06-07 21:02:31 +02:00
Compare commits
7 Commits
df2beab6c3
...
457defadaa
Author | SHA1 | Date | |
---|---|---|---|
![]() |
457defadaa | ||
![]() |
1b9a01f787 | ||
![]() |
f5b750dd93 | ||
7f53b585f9 | |||
67ef6651c1 | |||
fa4ec324de | |||
![]() |
f229de61d6 |
7
.github/fuzz/makefile
vendored
Normal file
7
.github/fuzz/makefile
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
EXE=ssp_fuzz
|
||||
|
||||
all:
|
||||
clang++ ${CXXFLAGS} ssp_fuzz.cpp -fsanitize=fuzzer -std=c++17 -o ${EXE}
|
||||
|
||||
run:
|
||||
./${EXE} -max_total_time=900
|
81
.github/fuzz/ssp_fuzz.cpp
vendored
Normal file
81
.github/fuzz/ssp_fuzz.cpp
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
#include "../../ssp.hpp"
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <unistd.h>
|
||||
|
||||
template <typename... Ts>
|
||||
void test_ssp_file_mode(const uint8_t* data, size_t size,
|
||||
std::string delim = ss::default_delimiter) {
|
||||
std::string file_name = std::filesystem::temp_directory_path().append(
|
||||
"ss_fuzzer" + std::to_string(getpid()) + ".csv");
|
||||
FILE* file = std::fopen(file_name.c_str(), "wb");
|
||||
if (!file) {
|
||||
std::exit(1);
|
||||
}
|
||||
std::fwrite(data, size, 1, file);
|
||||
std::fclose(file);
|
||||
|
||||
ss::parser<Ts...> p{file_name.c_str(), delim};
|
||||
while (!p.eof()) {
|
||||
try {
|
||||
const auto& [s0, s1] =
|
||||
p.template get_next<std::string, std::string>();
|
||||
if (s0.size() == 10000) {
|
||||
std::cout << s0.size() << std::endl;
|
||||
}
|
||||
} catch (ss::exception& e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
std::remove(file_name.c_str());
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
void test_ssp_buffer_mode(const uint8_t* data, size_t size,
|
||||
std::string delim = ss::default_delimiter) {
|
||||
ss::parser<Ts...> p{(const char*)data, size, delim};
|
||||
while (!p.eof()) {
|
||||
try {
|
||||
const auto& [s0, s1] =
|
||||
p.template get_next<std::string, std::string>();
|
||||
if (s0.size() == 10000) {
|
||||
std::cout << s0.size() << std::endl;
|
||||
}
|
||||
} catch (ss::exception& e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
void test_ssp(const uint8_t* data, size_t size) {
|
||||
test_ssp_file_mode<Ts...>(data, size);
|
||||
test_ssp_file_mode<Ts..., ss::throw_on_error>(data, size);
|
||||
|
||||
test_ssp_file_mode<Ts...>(data, size, ":::");
|
||||
test_ssp_file_mode<Ts..., ss::throw_on_error>(data, size, ":::");
|
||||
|
||||
test_ssp_buffer_mode<Ts...>(data, size);
|
||||
test_ssp_buffer_mode<Ts..., ss::throw_on_error>(data, size);
|
||||
|
||||
test_ssp_buffer_mode<Ts...>(data, size, ":::");
|
||||
test_ssp_buffer_mode<Ts..., ss::throw_on_error>(data, size, ":::");
|
||||
}
|
||||
|
||||
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
|
||||
using escape = ss::escape<'\\'>;
|
||||
using quote = ss::quote<'"'>;
|
||||
using trim = ss::trim<' ', '\t'>;
|
||||
using multiline_r = ss::multiline_restricted<5>;
|
||||
|
||||
test_ssp<>(data, size);
|
||||
test_ssp<escape>(data, size);
|
||||
test_ssp<quote>(data, size);
|
||||
test_ssp<trim>(data, size);
|
||||
test_ssp<quote, escape>(data, size);
|
||||
test_ssp<escape, quote, multiline_r, trim>(data, size);
|
||||
test_ssp<escape, quote, multiline_r, trim, ss::ignore_empty>(data, size);
|
||||
|
||||
return 0;
|
||||
}
|
43
.github/workflows/fuzz.yml
vendored
Normal file
43
.github/workflows/fuzz.yml
vendored
Normal file
@ -0,0 +1,43 @@
|
||||
name: fuzz-ci
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- feature/**
|
||||
- improvement/**
|
||||
- bugfix/**
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
- feature/**
|
||||
- improvement/**
|
||||
- bugfix/**
|
||||
|
||||
jobs:
|
||||
clang_tests:
|
||||
if: >-
|
||||
! contains(toJSON(github.event.commits.*.message), '[skip ci]') &&
|
||||
! contains(toJSON(github.event.commits.*.message), '[skip github]')
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
name: "Fuzzing"
|
||||
|
||||
container:
|
||||
image: silkeh/clang:15
|
||||
options: -v /usr/local:/host_usr_local
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v1
|
||||
|
||||
- name: Build
|
||||
working-directory: .github/fuzz
|
||||
run: make
|
||||
|
||||
- name: Run
|
||||
working-directory: .github/fuzz
|
||||
run: make run
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,6 +1,8 @@
|
||||
compile_commands.json
|
||||
.clang-format
|
||||
.clang-tidy
|
||||
.ccls-cache/*
|
||||
.cache/
|
||||
experiment/
|
||||
build/
|
||||
hbuild/
|
||||
|
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.14)
|
||||
|
||||
project(
|
||||
ssp
|
||||
VERSION 1.7.0
|
||||
VERSION 1.7.2
|
||||
DESCRIPTION "csv parser"
|
||||
HOMEPAGE_URL "https://github.com/red0124/ssp"
|
||||
LANGUAGES CXX
|
||||
|
11
README.md
11
README.md
@ -9,6 +9,7 @@
|
||||
|
||||
[](https://opensource.org/licenses/MIT)
|
||||
[](https://coveralls.io/github/red0124/ssp?branch=master)
|
||||
[](https://github.com/red0124/ssp/actions/workflows/fuzz.yml)
|
||||
[](https://github.com/red0124/ssp/actions/workflows/single-header.yml)
|
||||
[](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-gcc.yml)
|
||||
[](https://github.com/red0124/ssp/actions/workflows/ubuntu-latest-clang.yml)
|
||||
@ -73,7 +74,7 @@ Bill (Heath) Gates 65 3.3
|
||||
|
||||
# Single header
|
||||
|
||||
The library can be used with a single header file **`ssp.hpp`**, but it sufferes a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file.
|
||||
The library can be used with a single header file **`ssp.hpp`**, but it suffers a slight performance loss when converting floating point values since the **`fast_float`** library is not present within the file.
|
||||
|
||||
# Installation
|
||||
|
||||
@ -115,11 +116,11 @@ James Bailey 2.5
|
||||
Brian S. Wolfe 1.9
|
||||
Bill (Heath) Gates 3.3
|
||||
```
|
||||
The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** metod after the parser has been constructed.
|
||||
The header can be ignored using the **`ss::ignore_header`** [setup](#Setup) option or by calling the **`ignore_next`** method after the parser has been constructed.
|
||||
```cpp
|
||||
ss::parser<ss::ignore_header> p{file_name};
|
||||
```
|
||||
The fields with which the parser works with can be modified at any given time. The praser can also check if a field is present within the header by using the **`field_exists`** method.
|
||||
The fields with which the parser works with can be modified at any given time. The parser can also check if a field is present within the header by using the **`field_exists`** method.
|
||||
```cpp
|
||||
// ...
|
||||
ss::parser<ss::throw_on_error> p{"students_with_header.csv"};
|
||||
@ -249,7 +250,7 @@ By default, **`,`** is used as the delimiter, a custom delimiter can be specifie
|
||||
```cpp
|
||||
ss::parser p{file_name, "--"};
|
||||
```
|
||||
*Note, the delimiter can consist of multiple characters but the parser is slightliy faster when using single character delimiters.*
|
||||
*Note, the delimiter can consist of multiple characters but the parser is slightly faster when using single character delimiters.*
|
||||
|
||||
### Empty lines
|
||||
Empty lines can be ignored by defining **`ss::ignore_empty`** within the setup parameters:
|
||||
@ -398,7 +399,7 @@ if (std::holds_alternative<float>(grade)) {
|
||||
// grade set as char
|
||||
}
|
||||
```
|
||||
Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers arround the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character.
|
||||
Passing **`char`** and types that are aliases to it such as **`uint8_t`** and **`int8_t`** make the parser interpret the input data as a single character in a similar way to how **`std::cin`** does it. To read numeric values into something like **`uint8_t`** the **`ss::uint8`** and **`ss::int8`** types can be used. These are wrappers around the corresponding char aliases and can be implicitly converted to and from them. When these types are given to the parser he will try to read the given data and store it in the underlying element, but this time as a numeric value instead of a single character.
|
||||
```cpp
|
||||
// returns std::tuple<std::string, ss::uint8, float>
|
||||
auto [id, age, grade] = p.get_next<std::string, ss::uint8, float>();
|
||||
|
@ -45,7 +45,7 @@ inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
|
||||
using ssize_t = intptr_t;
|
||||
|
||||
ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
char buff[get_line_initial_buffer_size];
|
||||
|
||||
if (lineptr == nullptr || n < sizeof(buff)) {
|
||||
@ -81,7 +81,7 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
|
||||
#endif
|
||||
|
||||
ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||
inline ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||
const char* const csv_data_buffer, size_t csv_data_size,
|
||||
size_t& curr_char) {
|
||||
if (curr_char >= csv_data_size) {
|
||||
@ -114,22 +114,18 @@ ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||
}
|
||||
}
|
||||
|
||||
if (line_used != 0) {
|
||||
lineptr[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
|
||||
return -1;
|
||||
lineptr[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
|
||||
std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||
inline std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||
FILE* file,
|
||||
const char* const csv_data_buffer,
|
||||
size_t csv_data_size, size_t& curr_char) {
|
||||
ssize_t ssize;
|
||||
if (file) {
|
||||
ssize = get_line_file(buffer, buffer_size, file);
|
||||
curr_char = std::ftell(file);
|
||||
curr_char += ssize;
|
||||
} else {
|
||||
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
|
||||
csv_data_size, curr_char);
|
||||
@ -145,4 +141,4 @@ std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||
return {ssize, false};
|
||||
}
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
@ -493,4 +493,4 @@ private:
|
||||
size_t number_of_columns_;
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
@ -20,4 +20,4 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
@ -142,7 +142,7 @@ template <typename T>
|
||||
struct unsupported_type {
|
||||
constexpr static bool value = false;
|
||||
};
|
||||
} /* namespace */
|
||||
} /* namespace errors */
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> &&
|
||||
@ -247,4 +247,4 @@ inline bool extract(const char* begin, const char* end,
|
||||
return true;
|
||||
}
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
@ -77,4 +77,4 @@ struct member_wrapper<R T::*> {
|
||||
template <typename T> \
|
||||
constexpr bool has_m_##method##_t = has_m_##method<T>::value;
|
||||
|
||||
} /* trait */
|
||||
} /* namespace ss */
|
||||
|
@ -971,4 +971,4 @@ private:
|
||||
bool eof_{false};
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
@ -124,4 +124,4 @@ struct ne {
|
||||
}
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
@ -293,4 +293,4 @@ private:
|
||||
template <typename... Options>
|
||||
struct setup<setup<Options...>> : setup<Options...> {};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -479,4 +478,4 @@ public:
|
||||
friend class converter;
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
@ -378,4 +378,4 @@ T to_object(U&& data) {
|
||||
}
|
||||
}
|
||||
|
||||
} /* trait */
|
||||
} /* namespace ss */
|
||||
|
@ -6,7 +6,7 @@ project(
|
||||
'cpp_std=c++17',
|
||||
'buildtype=debugoptimized',
|
||||
'wrap_mode=forcefallback'],
|
||||
version: '1.7.0',
|
||||
version: '1.7.2',
|
||||
meson_version:'>=0.54.0')
|
||||
|
||||
fast_float_dep = dependency('fast_float')
|
||||
|
39
ssp.hpp
39
ssp.hpp
@ -8,7 +8,6 @@
|
||||
#include <cstring>
|
||||
#include <exception>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
@ -394,7 +393,7 @@ T to_object(U&& data) {
|
||||
}
|
||||
}
|
||||
|
||||
} /* trait */
|
||||
} /* namespace ss */
|
||||
|
||||
namespace ss {
|
||||
|
||||
@ -414,7 +413,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
||||
|
||||
namespace ss {
|
||||
@ -490,7 +489,7 @@ struct member_wrapper<R T::*> {
|
||||
template <typename T> \
|
||||
constexpr bool has_m_##method##_t = has_m_##method<T>::value;
|
||||
|
||||
} /* trait */
|
||||
} /* namespace ss */
|
||||
|
||||
namespace ss {
|
||||
|
||||
@ -616,7 +615,7 @@ struct ne {
|
||||
}
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
||||
namespace ss {
|
||||
|
||||
@ -657,7 +656,7 @@ inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
|
||||
using ssize_t = intptr_t;
|
||||
|
||||
ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
inline ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
char buff[get_line_initial_buffer_size];
|
||||
|
||||
if (lineptr == nullptr || n < sizeof(buff)) {
|
||||
@ -693,7 +692,7 @@ ssize_t get_line_file(char*& lineptr, size_t& n, FILE* file) {
|
||||
|
||||
#endif
|
||||
|
||||
ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||
inline ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||
const char* const csv_data_buffer, size_t csv_data_size,
|
||||
size_t& curr_char) {
|
||||
if (curr_char >= csv_data_size) {
|
||||
@ -726,22 +725,18 @@ ssize_t get_line_buffer(char*& lineptr, size_t& n,
|
||||
}
|
||||
}
|
||||
|
||||
if (line_used != 0) {
|
||||
lineptr[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
|
||||
return -1;
|
||||
lineptr[line_used] = '\0';
|
||||
return line_used;
|
||||
}
|
||||
|
||||
std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||
inline std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||
FILE* file,
|
||||
const char* const csv_data_buffer,
|
||||
size_t csv_data_size, size_t& curr_char) {
|
||||
ssize_t ssize;
|
||||
if (file) {
|
||||
ssize = get_line_file(buffer, buffer_size, file);
|
||||
curr_char = std::ftell(file);
|
||||
curr_char += ssize;
|
||||
} else {
|
||||
ssize = get_line_buffer(buffer, buffer_size, csv_data_buffer,
|
||||
csv_data_size, curr_char);
|
||||
@ -757,7 +752,7 @@ std::tuple<ssize_t, bool> get_line(char*& buffer, size_t& buffer_size,
|
||||
return {ssize, false};
|
||||
}
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
||||
namespace ss {
|
||||
|
||||
@ -1050,7 +1045,7 @@ private:
|
||||
template <typename... Options>
|
||||
struct setup<setup<Options...>> : setup<Options...> {};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
||||
namespace ss {
|
||||
|
||||
@ -1521,7 +1516,7 @@ public:
|
||||
friend class converter;
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
||||
|
||||
#ifndef SSP_DISABLE_FAST_FLOAT
|
||||
@ -1654,7 +1649,7 @@ template <typename T>
|
||||
struct unsupported_type {
|
||||
constexpr static bool value = false;
|
||||
};
|
||||
} /* namespace */
|
||||
} /* namespace error */
|
||||
|
||||
template <typename T>
|
||||
std::enable_if_t<!std::is_integral_v<T> && !std::is_floating_point_v<T> &&
|
||||
@ -1759,7 +1754,7 @@ inline bool extract(const char* begin, const char* end,
|
||||
return true;
|
||||
}
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
||||
namespace ss {
|
||||
INIT_HAS_METHOD(tied)
|
||||
@ -2245,7 +2240,7 @@ private:
|
||||
size_t number_of_columns_;
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
||||
|
||||
namespace ss {
|
||||
@ -3207,4 +3202,4 @@ private:
|
||||
bool eof_{false};
|
||||
};
|
||||
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
@ -23,7 +23,7 @@ struct is_unsigned : public std::is_unsigned<T> {};
|
||||
template <>
|
||||
struct is_unsigned<ss::uint8> : public std::true_type {};
|
||||
|
||||
} /* namespace */
|
||||
} /* anonymous namespace */
|
||||
|
||||
static_assert(is_signed<ss::int8>::value);
|
||||
static_assert(is_unsigned<ss::uint8>::value);
|
||||
|
@ -19,7 +19,7 @@
|
||||
namespace ss {
|
||||
template <typename... Ts>
|
||||
class parser;
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
||||
namespace {
|
||||
|
||||
@ -224,4 +224,4 @@ make_parser(const std::string& file_name,
|
||||
return make_parser_impl<buffer_mode, Ts...>(file_name, delim);
|
||||
}
|
||||
|
||||
} /* namespace */
|
||||
} /* anonymous namespace */
|
||||
|
@ -109,4 +109,4 @@ static void make_and_write(const std::string& file_name,
|
||||
}
|
||||
}
|
||||
|
||||
} /* namespace */
|
||||
} /* anonymous namespace */
|
||||
|
@ -616,7 +616,7 @@ void test_option_combinations3() {
|
||||
test_option_combinations2<Ts..., trim>();
|
||||
}
|
||||
|
||||
} /* namespace */
|
||||
} /* anonymous namespace */
|
||||
|
||||
// Tests split into multiple compilation units
|
||||
#if 0
|
||||
|
@ -145,7 +145,7 @@ make_combinations(const std::vector<std::string>& input,
|
||||
|
||||
return {std::move(lines), std::move(expectations)};
|
||||
}
|
||||
} /* namespace */
|
||||
} /* anonymous namespace */
|
||||
|
||||
/* ********************************** */
|
||||
/* ********************************** */
|
||||
@ -548,7 +548,7 @@ public:
|
||||
return splitter.size_shifted();
|
||||
}
|
||||
};
|
||||
} /* ss */
|
||||
} /* namespace ss */
|
||||
|
||||
TEST_CASE("splitter test resplit unterminated quote") {
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user