diff --git a/.github/workflows/ci.yml b/.github/workflows/ubuntu-latest-clang.yml similarity index 58% rename from .github/workflows/ci.yml rename to .github/workflows/ubuntu-latest-clang.yml index cd1b7e2..b08c86f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ubuntu-latest-clang.yml @@ -1,4 +1,4 @@ -name: Continuous Integration +name: ubuntu-latest-clang-ci on: push: @@ -63,43 +63,3 @@ jobs: - name: Run working-directory: build run: ctest --output-on-failure -j ${{ steps.cores.outputs.count }} - - gcc_tests: - if: >- - ! contains(toJSON(github.event.commits.*.message), '[skip ci]') && - ! contains(toJSON(github.event.commits.*.message), '[skip github]') - - strategy: - matrix: - version: [10, 9, 8] - - runs-on: ubuntu-latest - - name: GCC ${{ matrix.version }} - - container: - image: gcc:${{ matrix.version }} - - options: -v /usr/local:/host_usr_local - - steps: - - uses: actions/checkout@v1 - - - uses: friendlyanon/fetch-core-count@v1 - id: cores - - - name: CMake - run: echo "/host_usr_local/bin" >> $GITHUB_PATH - - - name: Install dependencies - run: script/ci_install_deps.sh - - - name: Configure - run: cmake -S test -B build -D CMAKE_BUILD_TYPE=Debug - - - name: Build - run: cmake --build build -j ${{ steps.cores.outputs.count }} - - - name: Run - working-directory: build - run: ctest --output-on-failure -j ${{ steps.cores.outputs.count }} diff --git a/.github/workflows/ubuntu-latest-gcc.yml b/.github/workflows/ubuntu-latest-gcc.yml new file mode 100644 index 0000000..04959e6 --- /dev/null +++ b/.github/workflows/ubuntu-latest-gcc.yml @@ -0,0 +1,57 @@ +name: ubuntu-latest-gcc-ci + +on: + push: + branches: + - master + - feature/** + - improvement/** + - bugfix/** + + pull_request: + branches: + - master + - feature/** + - improvement/** + - bugfix/** + +jobs: + gcc_tests: + if: >- + ! contains(toJSON(github.event.commits.*.message), '[skip ci]') && + ! contains(toJSON(github.event.commits.*.message), '[skip github]') + + strategy: + matrix: + version: [10, 9, 8] + + runs-on: ubuntu-latest + + name: GCC ${{ matrix.version }} + + container: + image: gcc:${{ matrix.version }} + + options: -v /usr/local:/host_usr_local + + steps: + - uses: actions/checkout@v1 + + - uses: friendlyanon/fetch-core-count@v1 + id: cores + + - name: CMake + run: echo "/host_usr_local/bin" >> $GITHUB_PATH + + - name: Install dependencies + run: script/ci_install_deps.sh + + - name: Configure + run: cmake -S test -B build -D CMAKE_BUILD_TYPE=Debug + + - name: Build + run: cmake --build build -j ${{ steps.cores.outputs.count }} + + - name: Run + working-directory: build + run: ctest --output-on-failure -j ${{ steps.cores.outputs.count }} diff --git a/.github/workflows/ubuntu-latest-icc.yml b/.github/workflows/ubuntu-latest-icc.yml new file mode 100644 index 0000000..4bd3b0d --- /dev/null +++ b/.github/workflows/ubuntu-latest-icc.yml @@ -0,0 +1,73 @@ +name: ubuntu-latest-icc-ci + +on: + push: + branches: + - master + - feature/** + - improvement/** + - bugfix/** + + pull_request: + branches: + - master + - feature/** + - improvement/** + - bugfix/** + +env: + LINUX_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17431/l_BaseKit_p_2021.1.0.2659_offline.sh + LINUX_HPCKIT_URL: + https://registrationcenter-download.intel.com/akdlm/irc_nas/17427/l_HPCKit_p_2021.1.0.2684_offline.sh + +jobs: + icc_tests: + if: >- + ! contains(toJSON(github.event.commits.*.message), '[skip ci]') && + ! contains(toJSON(github.event.commits.*.message), '[skip github]') + + runs-on: ubuntu-latest + defaults: + run: + shell: bash + + container: + image: gcc:10 + options: -v /usr/local:/host_usr_local + + steps: + - uses: actions/checkout@v2 + + - name: cache install + id: cache-install + uses: actions/cache@v2 + with: + path: | + /opt/intel/oneapi/compiler + key: >- + install-${{ env.LINUX_HPCKIT_URL }}- + ${{ env.LINUX_CPP_COMPONENTS_WEB }}- + compiler-${{ hashFiles('**/scripts/cache_exclude_linux.sh') }} + + - name: Install icc + run: script/ci_install_icc.sh $LINUX_HPCKIT_URL $LINUX_CPP_COMPONENTS_WEB + + - name: CMake + run: echo "/host_usr_local/bin" >> $GITHUB_PATH + + - name: Install dependencies + run: script/ci_install_deps.sh + + - name: Configure + run: >- + source script/ci_setup_icc.sh && + cmake -S test -B build -D CMAKE_BUILD_TYPE=Debug + + - name: Build + run: >- + source script/ci_setup_icc.sh && + cmake --build build -j ${{ steps.cores.outputs.count }} + + - name: Run + working-directory: build + run: ctest --output-on-failure -j ${{ steps.cores.outputs.count }} diff --git a/.github/workflows/win-msys2-clang.yml b/.github/workflows/win-msys2-clang.yml new file mode 100644 index 0000000..c13b862 --- /dev/null +++ b/.github/workflows/win-msys2-clang.yml @@ -0,0 +1,77 @@ +name: win-msys2-clang-ci + +on: + push: + branches: + - master + - feature/** + - improvement/** + - bugfix/** + + pull_request: + branches: + - master + - feature/** + - improvement/** + - bugfix/** + +jobs: + windows-mingw: + if: >- + ! contains(toJSON(github.event.commits.*.message), '[skip ci]') && + ! contains(toJSON(github.event.commits.*.message), '[skip github]') + + name: ${{ matrix.msystem }} + runs-on: windows-latest + defaults: + run: + shell: msys2 {0} + strategy: + fail-fast: false + matrix: + include: + - msystem: "MINGW64" + install: >- + git mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja + mingw-w64-x86_64-clang + type: Release + - msystem: "MINGW32" + install: >- + git mingw-w64-i686-cmake mingw-w64-i686-ninja + mingw-w64-i686-clang + type: Release + - msystem: "MINGW64" + install: >- + git mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja + mingw-w64-x86_64-clang + type: Debug + - msystem: "MINGW32" + install: >- + git mingw-w64-i686-cmake mingw-w64-i686-ninja + mingw-w64-i686-clang + type: Debug + env: + CMAKE_GENERATOR: Ninja + + steps: + - uses: actions/checkout@v2 + - uses: msys2/setup-msys2@v2 + with: + update: true + msystem: ${{ matrix.msystem }} + install: ${{ matrix.install }} + + - name: Install dependencies + run: script/ci_install_deps.sh + + - name: Configure + run: >- + cmake -DCMAKE_CXX_COMPILER=clang++ -S test -B build + -DCMAKE_BUILD_TYPE=Debug + + - name: Build + run: cmake --build build -j ${{ steps.cores.outputs.count }} + + - name: Run + working-directory: build + run: ctest --output-on-failure -j ${{ steps.cores.outputs.count }} diff --git a/.github/workflows/win-msys2-gcc.yml b/.github/workflows/win-msys2-gcc.yml new file mode 100644 index 0000000..e4c6795 --- /dev/null +++ b/.github/workflows/win-msys2-gcc.yml @@ -0,0 +1,75 @@ +name: win-msys2-gcc-ci + +on: + push: + branches: + - master + - feature/** + - improvement/** + - bugfix/** + + pull_request: + branches: + - master + - feature/** + - improvement/** + - bugfix/** + +jobs: + windows-mingw: + if: >- + ! contains(toJSON(github.event.commits.*.message), '[skip ci]') && + ! contains(toJSON(github.event.commits.*.message), '[skip github]') + + name: ${{ matrix.msystem }} + runs-on: windows-latest + defaults: + run: + shell: msys2 {0} + strategy: + fail-fast: false + matrix: + include: + - msystem: "MINGW64" + install: >- + git mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja + mingw-w64-x86_64-gcc + type: Release + - msystem: "MINGW32" + install: >- + git mingw-w64-i686-cmake mingw-w64-i686-ninja + mingw-w64-i686-gcc + type: Release + - msystem: "MINGW64" + install: >- + git mingw-w64-x86_64-cmake mingw-w64-x86_64-ninja + mingw-w64-x86_64-gcc + type: Debug + - msystem: "MINGW32" + install: >- + git mingw-w64-i686-cmake mingw-w64-i686-ninja + mingw-w64-i686-gcc + type: Debug + env: + CMAKE_GENERATOR: Ninja + + steps: + - uses: actions/checkout@v2 + - uses: msys2/setup-msys2@v2 + with: + update: true + msystem: ${{ matrix.msystem }} + install: ${{ matrix.install }} + + - name: Install dependencies + run: script/ci_install_deps.sh + + - name: Configure + run: cmake -S test -B build -D CMAKE_BUILD_TYPE=Debug + + - name: Build + run: cmake --build build -j ${{ steps.cores.outputs.count }} + + - name: Run + working-directory: build + run: ctest --output-on-failure -j ${{ steps.cores.outputs.count }} diff --git a/.gitignore b/.gitignore index 5c1267c..f800137 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ compile_commands.json .clang-format +.ccls-cache/* experiment/ build/ subprojects/* diff --git a/README.md b/README.md index 0789b95..31fcd27 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,12 @@ # Static split parser +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) +![ubuntu-latest-gcc](https://github.com/red0124/ssp/workflows/ubuntu-latest-gcc-ci/badge.svg) +![ubuntu-latest-clang](https://github.com/red0124/ssp/workflows/ubuntu-latest-clang-ci/badge.svg) +![ubuntu-latest-icc](https://github.com/red0124/ssp/workflows/ubuntu-latest-icc-ci/badge.svg) +![windows-msys2-gcc](https://github.com/red0124/ssp/workflows/win-msys2-gcc-ci/badge.svg) +![windows-msys2-clang](https://github.com/red0124/ssp/workflows/win-msys2-clang-ci/badge.svg) + A header only "csv" parser which is fast and versatile with modern C++ api. Requires compiler with C++17 support. Conversion for floating point values invoked using [fast-float](https://github.com/fastfloat/fast_float) . diff --git a/include/ss/common.hpp b/include/ss/common.hpp index d2d34bd..99ce493 100644 --- a/include/ss/common.hpp +++ b/include/ss/common.hpp @@ -16,4 +16,61 @@ inline void assert_string_error_defined() { "'string_error' needs to be enabled to use 'error_msg'"); } +#if __unix__ +inline ssize_t get_line(char** lineptr, size_t* n, FILE* stream) { + return getline(lineptr, n, stream); +} +#else +#include +using ssize_t = int64_t; +inline ssize_t get_line(char** lineptr, size_t* n, FILE* stream) { + size_t pos; + int c; + + if (lineptr == nullptr || stream == nullptr || n == nullptr) { + errno = EINVAL; + return -1; + } + + c = getc(stream); + if (c == EOF) { + return -1; + } + + if (*lineptr == nullptr) { + *lineptr = static_cast(malloc(128)); + if (*lineptr == nullptr) { + return -1; + } + *n = 128; + } + + pos = 0; + while (c != EOF) { + if (pos + 1 >= *n) { + size_t new_size = *n + (*n >> 2); + if (new_size < 128) { + new_size = 128; + } + char* new_ptr = static_cast( + realloc(static_cast(*lineptr), new_size)); + if (new_ptr == nullptr) { + return -1; + } + *n = new_size; + *lineptr = new_ptr; + } + + (*lineptr)[pos++] = c; + if (c == '\n') { + break; + } + c = getc(stream); + } + + (*lineptr)[pos] = '\0'; + return pos; +} +#endif + } /* ss */ diff --git a/include/ss/extract.hpp b/include/ss/extract.hpp index ebf7ef8..2634399 100644 --- a/include/ss/extract.hpp +++ b/include/ss/extract.hpp @@ -18,11 +18,11 @@ namespace ss { template std::enable_if_t, std::optional> to_num( - const char* begin, const char* const end) { + const char* const begin, const char* const end) { T ret; - auto answer = fast_float::from_chars(begin, end, ret); + auto [ptr, ec] = fast_float::from_chars(begin, end, ret); - if (answer.ec != std::errc() || answer.ptr != end) { + if (ec != std::errc() || ptr != end) { return std::nullopt; } return ret; @@ -35,7 +35,13 @@ inline std::optional from_char(char c) { return std::nullopt; } -#if defined(__clang__) || defined(__GNUC__) || defined(__GUNG__) +#if defined(__clang__) && defined(__MINGW32__) && !defined(__MINGW64__) +#define MINGW32_CLANG +#endif + +// mingw32 clang does not support some of the builtin functions +#if (defined(__clang__) || defined(__GNUC__) || defined(__GUNG__)) && \ + !defined(MINGW32_CLANG) //////////////// // mul overflow detection //////////////// @@ -163,7 +169,6 @@ bool shift_and_add_overflow(T& value, T digit, F add_last_digit_owerflow) { } #else -#warning "Use clang or gcc if possible." template bool shift_and_add_overflow(T& value, T digit, U is_negative) { digit = (is_negative) ? -digit : digit; @@ -193,7 +198,8 @@ std::enable_if_t, std::optional> to_num( } } -#if defined(__clang__) || defined(__GNUC__) || defined(__GNUG__) +#if (defined(__clang__) || defined(__GNUC__) || defined(__GUNG__)) && \ + !defined(MINGW32_CLANG) auto add_last_digit_owerflow = (is_negative) ? sub_overflow : add_overflow; #else diff --git a/include/ss/parser.hpp b/include/ss/parser.hpp index bf0fad6..86f4e92 100644 --- a/include/ss/parser.hpp +++ b/include/ss/parser.hpp @@ -1,5 +1,6 @@ #pragma once +#include "common.hpp" #include "converter.hpp" #include "extract.hpp" #include "restrictions.hpp" @@ -379,7 +380,7 @@ private: bool read_next() { memset(next_line_buffer_, '\0', next_line_size_); ssize_t ssize = - getline(&next_line_buffer_, &next_line_size_, file_); + get_line(&next_line_buffer_, &next_line_size_, file_); if (ssize == -1) { return false; @@ -500,7 +501,8 @@ private: bool append_next_line_to_buffer(char*& buffer, size_t& size) { undo_remove_eol(buffer, size); - ssize_t next_ssize = getline(&helper_buffer_, &helper_size_, file_); + ssize_t next_ssize = + get_line(&helper_buffer_, &helper_size_, file_); if (next_ssize == -1) { return false; } diff --git a/script/ci_install_icc.sh b/script/ci_install_icc.sh new file mode 100755 index 0000000..d719f6f --- /dev/null +++ b/script/ci_install_icc.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +URL=$1 +COMPONENTS=$2 + +curl --output webimage.sh --url "$URL" --retry 5 --retry-delay 5 +chmod +x webimage.sh +./webimage.sh -x -f webimage_extracted --log extract.log +rm -rf webimage.sh +WEBIMAGE_NAME=$(ls -1 webimage_extracted/) +if [ -z "$COMPONENTS" ]; then + webimage_extracted/"$WEBIMAGE_NAME"/bootstrapper -s --action install --eula=accept --continue-with-optional-error=yes --log-dir=. + installer_exit_code=$? +else + webimage_extracted/"$WEBIMAGE_NAME"/bootstrapper -s --action install --components="$COMPONENTS" --eula=accept --continue-with-optional-error=yes --log-dir=. + installer_exit_code=$? +fi +rm -rf webimage_extracted +exit $installer_exit_code diff --git a/script/ci_setup_icc.sh b/script/ci_setup_icc.sh new file mode 100755 index 0000000..d17d89b --- /dev/null +++ b/script/ci_setup_icc.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT + +#shellcheck disable=SC2010 +LATEST_VERSION=$(ls -1 /opt/intel/oneapi/compiler/ | grep -v latest | sort | tail -1) +# shellcheck source=/dev/null +source /opt/intel/oneapi/compiler/"$LATEST_VERSION"/env/vars.sh +CXX=dpcpp diff --git a/test/test_parser.cpp b/test/test_parser.cpp index af8b183..ab5d8c9 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -2,12 +2,26 @@ #include #include #include +#include #include +#include +std::string time_now_rand() { + std::stringstream ss; + auto t = std::time(nullptr); + auto tm = *std::localtime(&t); + ss << std::put_time(&tm, "%d%m%Y%H%M%S"); + srand(time(nullptr)); + return ss.str() + std::to_string(rand()); +} + +inline int i = 0; struct unique_file_name { const std::string name; - unique_file_name() : name{std::tmpnam(nullptr)} { + unique_file_name() + : name{"random_" + std::to_string(i++) + time_now_rand() + + "_file.csv"} { } ~unique_file_name() { @@ -15,6 +29,24 @@ struct unique_file_name { } }; +void replace_all(std::string& s, const std::string& from, + const std::string& to) { + if (from.empty()) return; + size_t start_pos = 0; + while ((start_pos = s.find(from, start_pos)) != std::string::npos) { + s.replace(start_pos, from.length(), to); + start_pos += to.length(); + } +} + +void update_if_crlf(std::string& s) { +#ifdef _WIN32 + replace_all(s, "\r\n", "\n"); +#else + (void)(s); +#endif +} + struct X { constexpr static auto delim = ","; int i; @@ -43,19 +75,27 @@ template static void make_and_write(const std::string& file_name, const std::vector& data) { std::ofstream out{file_name}; + +#ifdef _WIN32 + std::vector new_lines = {"\n"}; +#else std::vector new_lines = {"\n", "\r\n"}; +#endif + for (size_t i = 0; i < data.size(); ++i) { out << data[i].to_string() << new_lines[i % new_lines.size()]; } } +#include + TEST_CASE("parser test various cases") { unique_file_name f; std::vector data = {{1, 2, "x"}, {3, 4, "y"}, {5, 6, "z"}, {7, 8, "u"}, {9, 10, "v"}, {11, 12, "w"}}; make_and_write(f.name, data); { - ss::parser p{f.name, ","}; + ss::parser p{f.name, ","}; ss::parser p0{std::move(p)}; p = std::move(p0); @@ -524,11 +564,18 @@ std::string no_quote(const std::string& s) { return s; } -TEST_CASE("parser test csv on multiple lines with quotes zzz") { +TEST_CASE("parser test csv on multiple lines with quotes") { unique_file_name f; - std::vector data = {{1, 2, "\"x\nx\nx\""}, {3, 4, "\"y\ny\ny\""}, - {5, 6, "\"z\nz\""}, {7, 8, "\"u\"\"\""}, - {9, 10, "v"}, {11, 12, "\"w\n\""}}; + std::vector data = {{1, 2, "\"x\r\nx\nx\""}, + {3, 4, "\"y\ny\r\ny\""}, + {5, 6, "\"z\nz\""}, + {7, 8, "\"u\"\"\""}, + {9, 10, "v"}, + {11, 12, "\"w\n\""}}; + for (auto& [_, __, s] : data) { + update_if_crlf(s); + } + make_and_write(f.name, data); for (auto& [_, __, s] : data) { s = no_quote(s); @@ -545,6 +592,9 @@ TEST_CASE("parser test csv on multiple lines with quotes zzz") { i.emplace_back(ss::to_object(a)); } + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } CHECK_EQ(i, data); ss::parser> p_no_multiline{f.name, ","}; @@ -561,9 +611,15 @@ std::string no_escape(std::string& s) { TEST_CASE("parser test csv on multiple lines with escapes") { unique_file_name f; - std::vector data = {{1, 2, "x\\\nx\\\nx"}, {5, 6, "z\\\nz\\\nz"}, - {7, 8, "u"}, {3, 4, "y\\\ny\\\ny"}, - {9, 10, "v\\\\"}, {11, 12, "w\\\n"}}; + std::vector data = {{1, 2, "x\\\nx\\\r\nx"}, + {5, 6, "z\\\nz\\\nz"}, + {7, 8, "u"}, + {3, 4, "y\\\ny\\\ny"}, + {9, 10, "v\\\\"}, + {11, 12, "w\\\n"}}; + for (auto& [_, __, s] : data) { + update_if_crlf(s); + } make_and_write(f.name, data); for (auto& [_, __, s] : data) { @@ -581,6 +637,9 @@ TEST_CASE("parser test csv on multiple lines with escapes") { i.emplace_back(ss::to_object(a)); } + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } CHECK_EQ(i, data); ss::parser> p_no_multiline{f.name, ","}; @@ -595,8 +654,13 @@ TEST_CASE("parser test csv on multiple lines with quotes and escapes") { { std::ofstream out{f.name}; out << "1,2,\"just\\\n\nstrings\"" << std::endl; +#ifndef _WIN32 + out << "3,4,\"just\r\nsome\\\r\n\n\\\nstrings\"" << std::endl; + out << "5,6,\"just\\\n\\\r\n\r\n\nstrings" << std::endl; +#else out << "3,4,\"just\nsome\\\n\n\\\nstrings\"" << std::endl; out << "5,6,\"just\\\n\\\n\n\nstrings" << std::endl; +#endif out << "7,8,\"just strings\"" << std::endl; out << "9,10,just strings" << std::endl; } @@ -612,9 +676,16 @@ TEST_CASE("parser test csv on multiple lines with quotes and escapes") { } std::vector data = {{1, 2, "just\n\nstrings"}, +#ifndef _WIN32 + {3, 4, "just\r\nsome\r\n\n\nstrings"}, +#else {3, 4, "just\nsome\n\n\nstrings"}, +#endif {9, 10, "just strings"}}; + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } CHECK_EQ(i, data); } @@ -623,8 +694,13 @@ TEST_CASE("parser test multiline restricted") { { std::ofstream out{f.name}; out << "1,2,\"just\n\nstrings\"" << std::endl; +#ifndef _WIN32 + out << "3,4,\"ju\n\r\n\nnk\"" << std::endl; + out << "5,6,just\\\n\\\r\nstrings" << std::endl; +#else out << "3,4,\"ju\n\n\nnk\"" << std::endl; out << "5,6,just\\\n\\\nstrings" << std::endl; +#endif out << "7,8,ju\\\n\\\n\\\nnk" << std::endl; out << "9,10,\"just\\\n\nstrings\"" << std::endl; out << "11,12,\"ju\\\n|\n\n\n\n\nk\"" << std::endl; @@ -645,8 +721,16 @@ TEST_CASE("parser test multiline restricted") { } std::vector data = {{1, 2, "just\n\nstrings"}, +#ifndef _WIN32 + {5, 6, "just\n\r\nstrings"}, +#else {5, 6, "just\n\nstrings"}, +#endif {9, 10, "just\n\nstrings"}, {19, 20, "just strings"}}; + + for (auto& [_, __, s] : i) { + update_if_crlf(s); + } CHECK_EQ(i, data); }