From 2c1fe9be9fe8e551391192651ac149417ec1a7b3 Mon Sep 17 00:00:00 2001 From: ado Date: Mon, 28 Mar 2022 19:11:41 +0200 Subject: [PATCH] update documentation --- README.md | 105 +++++++++++++++++++++++++++++++++--------- include/ss/parser.hpp | 27 ++++++++--- include/ss/setup.hpp | 17 ++++++- test/test_parser.cpp | 97 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 216 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 177e7b6..60fe9d1 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Bill (Heath) Gates,65,3.3 int main() { ss::parser p{"students.csv", ","}; - for(auto& [name, age, grade] : p.iterate()) { + for(const auto& [name, age, grade] : p.iterate()) { if (p.valid()) { std::cout << name << ' ' << age << ' ' << grade << std::endl; } @@ -56,6 +56,7 @@ Bill (Heath) Gates 65 3.3 * [Works on any type](#Custom-conversions) * Easy to use * No exceptions + * [Works headers](Headers) * [Works with quotes, escapes and spacings](#Setup) * [Works with values containing new lines](#Multiline) * [Columns and rows can be ignored](#Special-types) @@ -86,7 +87,7 @@ The library supports [CMake](#Cmake) and [meson](#Meson) build systems An alternate loop to the example above would look like: ```cpp while(!p.eof()) { - auto [name, age, grade] = p.get_next(); + auto [name, age, grade] = p.get_next(); if (p.valid()) { std::cout << name << ' ' << age << ' ' << grade << std::endl; } @@ -95,13 +96,13 @@ while(!p.eof()) { The alternate example will be used to show some of the features of the library. The **get_next** method returns a tuple of objects specified inside the template type list. -If a conversion could not be applied, the method would return a tuple of default constructed objects, and the **valid** method would return **false**, for example if the third (grade) column in our csv could not be converted to a double the conversion would fail. +If a conversion could not be applied, the method would return a tuple of default constructed objects, and the **valid** method would return **false**, for example if the third (grade) column in our csv could not be converted to a float the conversion would fail. If **get_next** is called with a **tuple** as template parameter it would behave identically to passing the same tuple parameters to **get_next**: ```cpp -using student = std::tuple; +using student = std::tuple; -// returns std::tuple +// returns std::tuple auto [name, age, grade] = p.get_next(); ``` *Note, it does not always return a student tuple since the returned tuples parameters may be altered as explained below (no void, no restrictions, ...)* @@ -111,12 +112,12 @@ Whole objects can be returned using the **get_object** function which takes the struct student { std::string name; int age; - double grade; + float grade; }; ``` ```cpp // returns student -auto student = p.get_object(); +auto student = p.get_object(); ``` This works with any object if the constructor could be invoked using the template arguments given to **get_object**: ```cpp @@ -126,7 +127,7 @@ auto vec = p.get_object, std::string, std::string, ``` An iteration loop as in the first example which returns objects would look like: ```cpp -for(auto& student : p.iterate_object()) { +for(const auto& student : p.iterate_object()) { // ... } ``` @@ -135,7 +136,7 @@ And finally, using something I personally like to do, a struct (class) with a ** struct student { std::string name; int age; - double grade; + float grade; auto tied() { return std::tie(name, age, grade); } }; @@ -148,6 +149,59 @@ auto s = p.get_next(); This works with the iteration loop too. *Note, the order in which the members of the tied method are returned must match the order of the elements in the csv*. +## Headers + +The parser can be told to use only certain columns by parsing the header. This is done using the **use_fields** method. It accepts any number of string-like arguments or even an std::vector with the field names. If any of the fields is not found within the header or if it is defined multiple times it will result in an error. +```shell +$ cat students_with_header.csv +Name,Age,Grade +James Bailey,65,2.5 +Brian S. Wolfe,40,1.9 +Bill (Heath) Gates,65,3.3 +``` +```cpp + // ... + ss::parser p{"students.csv", ","}; + p.use_fields("Name", "Grade"); + + for(const auto& [name, grade] : p.iterate()) { + std::cout << name << ' ' << grade << std::endl; + } + // ... +``` +```shell +$ ./a.out +James Bailey 2.5 +Brian S. Wolfe 1.9 +Bill (Heath) Gates 3.3 +``` +The header can be ignored using the **ss::ignore_header** [setup](#Setup) option or by calling the **ignore_next** metod after the parser has been constructed. +```cpp +ss::parser p{file_name}; +``` +The fields with which the parser works with can be modified at any given time. The paser can also check if a field is present within the header by using the **has_field** method. +```cpp + // ... + ss::parser p{"students.csv", ","}; + p.use_fields("Name", "Grade"); + + const auto& [name, grade] : p.get_next(); + std::cout << name << ' ' << grade << std::endl; + + if(p.field_exists("Age")) { + p.use_fields("Grade", "Name", "Age") + for(const auto& [grade, name, age] : p.iterate()) { + std::cout << grade << ' ' << name << ' ' << age << std::endl; + } + } + // ... +``` +```shell +$ ./a.out +James Bailey 2.5 +40 Brian S. Wolfe 1.9 +65 Bill (Heath) Gates 3.3 +``` ## Setup By default, many of the features supported by the parser are disabled. They can be enabled within the template parameters of the parser. For example, to enable quoting and escaping the parser would look like: ```cpp @@ -167,6 +221,13 @@ ss::parser p2{file_name}; Invalid setups will be met with **static_asserts**. *Note, each setup parameter defined comes with a slight performance loss, so use them only if needed.* +### Empty lines +Empty lines can be ignored by defining **ss::ignore_empty** within the setup parameters: +```cpp +ss::parser p{file_name}; +``` +If this setup option is not set then reading an empty line will result in an error (unless only one column is present within the parser). + ### Quoting Quoting can be enabled by defining **ss::quote** within the setup parameters. A single character can be defined as the quoting character, for example to use **"** as a quoting character: ```cpp @@ -241,7 +302,7 @@ ss::parser, ss::multiline_restricted<5>> p{file_name}; while(!p.eof()) { - auto [name, age, grade] = p.get_next(); + auto [name, age, grade] = p.get_next(); if(!p.valid()) { continue; } @@ -270,14 +331,14 @@ Gates 65 3.3' Passing **void** makes the parser ignore a column. In the given example **void** could be given as the second template parameter to ignore the second (age) column in the csv, a tuple of only 2 parameters would be retuned: ```cpp -// returns std::tuple -auto [name, grade] = p.get_next(); +// returns std::tuple +auto [name, grade] = p.get_next(); ``` Works with different types of conversions too: ```cpp -using student = std::tuple; +using student = std::tuple; -// returns std::tuple +// returns std::tuple auto [name, grade] = p.get_next(); ``` To ignore a whole row, **ignore_next** could be used, returns **false** if **eof**: @@ -287,19 +348,19 @@ bool parser::ignore_next(); **std::optional** could be passed if we wanted the conversion to proceed in the case of a failure returning **std::nullopt** for the specified column: ```cpp -// returns std::tuple> -auto [name, age, grade] = p.get_next(); +// returns std::tuple> +auto [name, age, grade] = p.get_next(); if(grade) { // do something with grade } ``` Similar to **std::optional**, **std::variant** could be used to try other conversions if the previous failed _(Note, conversion to std::string will always pass)_: ```cpp -// returns std::tuple> +// returns std::tuple> auto [name, age, grade] = - p.get_next(); -if(std::holds_alternative(grade)) { - // grade set as double + p.get_next(); +if(std::holds_alternative(grade)) { + // grade set as float } else if(std::holds_alternative(grade)) { // grade set as char } @@ -310,9 +371,9 @@ Custom **restrictions** can be used to narrow down the conversions of unwanted v ```cpp // ss::ne makes sure that the name is not empty // ss::ir makes sure that the grade will be in range [0, 10] -// returns std::tuple +// returns std::tuple auto [name, age, grade] = - p.get_next, int, ss::ir>(); + p.get_next, int, ss::ir>(); ``` If the restrictions are not met, the conversion will fail. Other predefined restrictions are **ss::ax** (all except), **ss::nx** (none except) and **ss::oor** (out of range), **ss::lt** (less than), ...(see *restrictions.hpp*): ```cpp diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index 5f91fda..0d112ec 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -27,6 +27,8 @@ class parser { constexpr static bool ignore_header = setup::ignore_header; + constexpr static bool ignore_empty = setup::ignore_empty; + public: parser(const std::string& file_name, const std::string& delim = ss::default_delimiter) @@ -558,16 +560,27 @@ private: reader& operator=(const reader& other) = delete; bool read_next() { - ++line_number_; - memset(next_line_buffer_, '\0', next_line_size_); - ssize_t ssize = - get_line(&next_line_buffer_, &next_line_size_, file_); - if (ssize == -1) { - return false; + ssize_t ssize; + size_t size = 0; + while (size == 0) { + ++line_number_; + if (next_line_size_ > 0) { + next_line_buffer_[0] = '\0'; + } + ssize = get_line(&next_line_buffer_, &next_line_size_, file_); + + if (ssize == -1) { + return false; + } + + size = remove_eol(next_line_buffer_, ssize); + + if constexpr (!ignore_empty) { + break; + } } - size_t size = remove_eol(next_line_buffer_, ssize); size_t limit = 0; if constexpr (escaped_multiline_enabled) { diff --git a/include/ss/setup.hpp b/include/ss/setup.hpp index 7a62657..1740387 100644 --- a/include/ss/setup.hpp +++ b/include/ss/setup.hpp @@ -173,6 +173,12 @@ class string_error; class ignore_header; +//////////////// +// ignore_empty +//////////////// + +class ignore_empty; + //////////////// // setup implementation //////////////// @@ -194,16 +200,24 @@ private: template struct is_ignore_header : std::is_same {}; + template + struct is_ignore_empty : std::is_same {}; + constexpr static auto count_matcher = count_v; + constexpr static auto count_multiline = count_v; + constexpr static auto count_string_error = count_v; + constexpr static auto count_ignore_header = count_v; + constexpr static auto count_ignore_empty = count_v; + constexpr static auto number_of_valid_setup_types = count_matcher + count_multiline + count_string_error + - count_ignore_header; + count_ignore_header + count_ignore_empty; using trim_left_only = get_matcher_t; using trim_right_only = get_matcher_t; @@ -219,6 +233,7 @@ public: using multiline = get_multiline_t; constexpr static bool string_error = (count_string_error == 1); constexpr static bool ignore_header = (count_ignore_header == 1); + constexpr static bool ignore_empty = (count_ignore_empty == 1); private: #define ASSERT_MSG "cannot have the same match character in multiple matchers" diff --git a/test/test_parser.cpp b/test/test_parser.cpp index 60fbce9..11dad95 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -49,11 +49,16 @@ void update_if_crlf(std::string& s) { struct X { constexpr static auto delim = ","; + constexpr static auto make_empty = "_EMPTY_"; int i; double d; std::string s; std::string to_string() const { + if (s == make_empty) { + return ""; + } + return std::to_string(i) .append(delim) .append(std::to_string(d)) @@ -1002,3 +1007,95 @@ TEST_CASE("parser test various cases with header") { testFields(o, d, {Dbl, Str, Int}); testFields(o, d, {Dbl, Int, Str}); } + +void testIgnoreEmpty(const std::vector& data) { + unique_file_name f; + make_and_write(f.name, data); + + std::vector expected; + for (const auto& d : data) { + if (d.s != X::make_empty) { + expected.push_back(d); + } + } + + { + ss::parser p{f.name, ","}; + + std::vector i; + for (const auto& a : p.iterate()) { + i.push_back(a); + } + + CHECK_EQ(i, expected); + } + + { + ss::parser p{f.name, ","}; + std::vector i; + size_t n = 0; + for (const auto& a : p.iterate()) { + if (data.at(n).s == X::make_empty) { + CHECK_FALSE(p.valid()); + } + i.push_back(a); + ++n; + } + + if (data != expected) { + CHECK_NE(i, expected); + } + } +} + +TEST_CASE("parser test various cases with empty lines") { + testIgnoreEmpty({{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); + + testIgnoreEmpty( + {{1, 2, X::make_empty}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, "w"}}); + + testIgnoreEmpty( + {{1, 2, "x"}, {3, 4, "y"}, {9, 10, "v"}, {11, 12, X::make_empty}}); + + testIgnoreEmpty( + {{1, 2, "x"}, {5, 6, X::make_empty}, {9, 10, "v"}, {11, 12, "w"}}); + + testIgnoreEmpty({{1, 2, X::make_empty}, + {5, 6, X::make_empty}, + {9, 10, "v"}, + {11, 12, "w"}}); + + testIgnoreEmpty({{1, 2, X::make_empty}, + {3, 4, "y"}, + {9, 10, "v"}, + {11, 12, X::make_empty}}); + + testIgnoreEmpty({{1, 2, "x"}, + {3, 4, "y"}, + {9, 10, X::make_empty}, + {11, 12, X::make_empty}}); + + testIgnoreEmpty({{1, 2, X::make_empty}, + {3, 4, "y"}, + {9, 10, X::make_empty}, + {11, 12, X::make_empty}}); + + testIgnoreEmpty({{1, 2, X::make_empty}, + {3, 4, X::make_empty}, + {9, 10, X::make_empty}, + {11, 12, X::make_empty}}); + + testIgnoreEmpty({{1, 2, "x"}, + {3, 4, X::make_empty}, + {9, 10, X::make_empty}, + {11, 12, X::make_empty}}); + + testIgnoreEmpty({{1, 2, X::make_empty}, + {3, 4, X::make_empty}, + {9, 10, X::make_empty}, + {11, 12, "w"}}); + + testIgnoreEmpty({{11, 12, X::make_empty}}); + + testIgnoreEmpty({}); +}