update README, made parser handle invalid last line

This commit is contained in:
ado 2021-02-27 20:18:38 +01:00
parent fa185fc655
commit a9b73dfc37
3 changed files with 36 additions and 26 deletions

View File

@ -14,13 +14,13 @@
![windows-msys2-gcc](https://github.com/red0124/ssp/workflows/win-msys2-gcc-ci/badge.svg) ![windows-msys2-gcc](https://github.com/red0124/ssp/workflows/win-msys2-gcc-ci/badge.svg)
![windows-msys2-clang](https://github.com/red0124/ssp/workflows/win-msys2-clang-ci/badge.svg) ![windows-msys2-clang](https://github.com/red0124/ssp/workflows/win-msys2-clang-ci/badge.svg)
A header only "csv" parser which is fast and versatile with modern C++ api. Requires compiler with C++17 support. Can also be used to convert strings to values. A header only "csv" parser which is fast and versatile with modern C++ api. Requires compiler with C++17 support. Can also be used to convert strings to specific types.
Conversion for floating point values invoked using [fast-float](https://github.com/fastfloat/fast_float) . Conversion for floating point values invoked using [fast-float](https://github.com/fastfloat/fast_float) .
Function traits taken from [qt-creator](https://code.woboq.org/qt5/qt-creator/src/libs/utils/functiontraits.h.html) . Function traits taken from [qt-creator](https://code.woboq.org/qt5/qt-creator/src/libs/utils/functiontraits.h.html) .
# Example # Example
Lets say we have a csv file containing students in a given format (NAME,AGE,GRADE) and we want to parse and print all the valid values: Lets say we have a csv file containing students in a given format '$name,$age,$grade' and we want to parse and print all the valid values:
```shell ```shell
$ cat students.csv $ cat students.csv

View File

@ -234,6 +234,17 @@ private:
} }
} }
void set_error_unterminated_escape() {
if constexpr (string_error) {
error_.clear();
splitter_.set_error_unterminated_escape();
error_.append(splitter_.error_msg());
} else {
error_ = true;
}
}
void set_error_multiline_limit_reached() { void set_error_multiline_limit_reached() {
if constexpr (string_error) { if constexpr (string_error) {
error_.clear(); error_.clear();

View File

@ -57,13 +57,9 @@ public:
return error_; return error_;
} }
bool eof() const { bool eof() const { return eof_; }
return eof_;
}
bool ignore_next() { bool ignore_next() { return reader_.read_next(); }
return reader_.read_next();
}
template <typename T, typename... Ts> template <typename T, typename... Ts>
T get_object() { T get_object() {
@ -96,8 +92,7 @@ public:
class composite { class composite {
public: public:
composite(std::tuple<Ts...>&& values, parser& parser) composite(std::tuple<Ts...>&& values, parser& parser)
: values_{std::move(values)}, parser_{parser} { : values_{std::move(values)}, parser_{parser} {}
}
// tries to convert the same line with a different output type // tries to convert the same line with a different output type
// only if the previous conversion was not successful, // only if the previous conversion was not successful,
@ -123,9 +118,7 @@ public:
return composite_with(std::move(value)); return composite_with(std::move(value));
} }
std::tuple<Ts...> values() { std::tuple<Ts...> values() { return values_; }
return values_;
}
template <typename Fun> template <typename Fun>
auto on_error(Fun&& fun) { auto on_error(Fun&& fun) {
@ -300,7 +293,7 @@ private:
if constexpr (string_error) { if constexpr (string_error) {
error_.append(file_name_) error_.append(file_name_)
.append(" ") .append(" ")
.append(std::to_string(line_number_)) .append(std::to_string(reader_.line_number_))
.append(": ") .append(": ")
.append(reader_.converter_.error_msg()) .append(reader_.converter_.error_msg())
.append(": \"") .append(": \"")
@ -315,15 +308,11 @@ private:
// line reading // line reading
//////////////// ////////////////
void read_line() { void read_line() { eof_ = !reader_.read_next(); }
eof_ = !reader_.read_next();
++line_number_;
}
struct reader { struct reader {
reader(const std::string& file_name_, const std::string& delim) reader(const std::string& file_name_, const std::string& delim)
: delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} { : delim_{delim}, file_{fopen(file_name_.c_str(), "rb")} {}
}
reader(reader&& other) reader(reader&& other)
: buffer_{other.buffer_}, : buffer_{other.buffer_},
@ -333,7 +322,8 @@ private:
next_line_converter_{std::move(other.next_line_converter_)}, next_line_converter_{std::move(other.next_line_converter_)},
size_{other.size_}, next_line_size_{other.size_}, size_{other.size_}, next_line_size_{other.size_},
helper_size_{other.helper_size_}, delim_{std::move(other.delim_)}, helper_size_{other.helper_size_}, delim_{std::move(other.delim_)},
file_{other.file_}, crlf_{other.crlf_} { file_{other.file_}, crlf_{other.crlf_}, line_number_{
other.line_number_} {
other.buffer_ = nullptr; other.buffer_ = nullptr;
other.next_line_buffer_ = nullptr; other.next_line_buffer_ = nullptr;
other.helper_buffer_ = nullptr; other.helper_buffer_ = nullptr;
@ -353,6 +343,7 @@ private:
delim_ = std::move(other.delim_); delim_ = std::move(other.delim_);
file_ = other.file_; file_ = other.file_;
crlf_ = other.crlf_; crlf_ = other.crlf_;
line_number_ = other.line_number_;
other.buffer_ = nullptr; other.buffer_ = nullptr;
other.next_line_buffer_ = nullptr; other.next_line_buffer_ = nullptr;
@ -378,6 +369,7 @@ private:
reader& operator=(const reader& other) = delete; reader& operator=(const reader& other) = delete;
bool read_next() { bool read_next() {
++line_number_;
memset(next_line_buffer_, '\0', next_line_size_); memset(next_line_buffer_, '\0', next_line_size_);
ssize_t ssize = ssize_t ssize =
get_line(&next_line_buffer_, &next_line_size_, file_); get_line(&next_line_buffer_, &next_line_size_, file_);
@ -395,7 +387,9 @@ private:
return true; return true;
} }
if (!append_next_line_to_buffer(next_line_buffer_, size)) { if (!append_next_line_to_buffer(next_line_buffer_, size)) {
return false; remove_eol(next_line_buffer_, ssize);
next_line_converter_.set_error_unterminated_escape();
return true;
} }
} }
} }
@ -408,7 +402,8 @@ private:
return true; return true;
} }
if (!append_next_line_to_buffer(next_line_buffer_, size)) { if (!append_next_line_to_buffer(next_line_buffer_, size)) {
return false; remove_eol(next_line_buffer_, ssize);
return true;
} }
if constexpr (escaped_multiline_enabled) { if constexpr (escaped_multiline_enabled) {
@ -418,7 +413,10 @@ private:
} }
if (!append_next_line_to_buffer(next_line_buffer_, if (!append_next_line_to_buffer(next_line_buffer_,
size)) { size)) {
return false; remove_eol(next_line_buffer_, ssize);
next_line_converter_
.set_error_unterminated_escape();
return true;
} }
} }
} }
@ -491,7 +489,7 @@ private:
void realloc_concat(char*& first, size_t& first_size, void realloc_concat(char*& first, size_t& first_size,
const char* const second, size_t second_size) { const char* const second, size_t second_size) {
next_line_size_ = first_size + second_size + 2; next_line_size_ = first_size + second_size + 3;
first = static_cast<char*>( first = static_cast<char*>(
realloc(static_cast<void*>(first), next_line_size_)); realloc(static_cast<void*>(first), next_line_size_));
std::copy_n(second, second_size + 1, first + first_size); std::copy_n(second, second_size + 1, first + first_size);
@ -507,6 +505,7 @@ private:
return false; return false;
} }
++line_number_;
size_t next_size = remove_eol(helper_buffer_, next_ssize); size_t next_size = remove_eol(helper_buffer_, next_ssize);
realloc_concat(buffer, size, helper_buffer_, next_size); realloc_concat(buffer, size, helper_buffer_, next_size);
return true; return true;
@ -530,6 +529,7 @@ private:
FILE* file_{nullptr}; FILE* file_{nullptr};
bool crlf_; bool crlf_;
size_t line_number_{0};
}; };
//////////////// ////////////////
@ -539,7 +539,6 @@ private:
std::string file_name_; std::string file_name_;
error_type error_{}; error_type error_{};
reader reader_; reader reader_;
size_t line_number_{0};
bool eof_{false}; bool eof_{false};
}; };