pax_global_header 0000666 0000000 0000000 00000000064 15113123577 0014517 g ustar 00root root 0000000 0000000 52 comment=e8ea1744d81c39f73e181ce9848577db19617659
zsv-1.3.0/ 0000775 0000000 0000000 00000000000 15113123577 0012342 5 ustar 00root root 0000000 0000000 zsv-1.3.0/.clang-format 0000664 0000000 0000000 00000000212 15113123577 0014710 0 ustar 00root root 0000000 0000000 BasedOnStyle: Microsoft
BreakBeforeBraces: Attach
ContinuationIndentWidth: 2
IndentWidth: 2
SortIncludes: false
TabWidth: 2
UseTab: Never
zsv-1.3.0/.cppcheck 0000664 0000000 0000000 00000001773 15113123577 0014133 0 ustar 00root root 0000000 0000000
cppcheck-build-dir
false
true
false
2
cppcheck-lib
emscripten
posix
sqlite3
windows
missingInclude
missingIncludeSystem
unmatchedSuppression
strdupCalled
zsv-1.3.0/.dockerignore 0000664 0000000 0000000 00000000213 15113123577 0015012 0 ustar 00root root 0000000 0000000 /**
!/app
/app/cli_internal.*.in
!/data
!/docs/*.json
!examples
!/include
!/scripts
!/src
!configure
!AUTHORS
!Makefile
!LICENSE
!./ci/zsv
zsv-1.3.0/.fpm 0000664 0000000 0000000 00000000161 15113123577 0013123 0 ustar 00root root 0000000 0000000 --force
--architecture "amd64"
--maintainer "Liquidaty"
--url "https://github.com/liquidaty/zsv"
--license "MIT"
zsv-1.3.0/.gitattributes 0000664 0000000 0000000 00000001102 15113123577 0015227 0 ustar 00root root 0000000 0000000 configure text eol=lf
*.sh text eol=lf
*.csv text eol=lf
*.txt text eol=lf
app/test/expected/test-2db.out2 text eol=lf
app/test/expected/test-2db.out3 text eol=lf
app/test/expected/test-sheet-*.out* text eol=lf
app/ext_example/test/expected/test-sheet-extension-1-indexed.out text eol=lf
app/ext_example/test/expected/test-sheet-extension-1.out text eol=lf
data/stack2-2.csv -text
data/test/crlf-line-ending.csv -text
data/test/embedded_dos.csv -text
data/test/mixed-line-endings.csv -text
data/test/select-merge.csv -text
data/test/crlf.txt -text
data/test/crlf-2.csv -text zsv-1.3.0/.github/ 0000775 0000000 0000000 00000000000 15113123577 0013702 5 ustar 00root root 0000000 0000000 zsv-1.3.0/.github/dependabot.yml 0000664 0000000 0000000 00000000167 15113123577 0016536 0 ustar 00root root 0000000 0000000 version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
zsv-1.3.0/.github/workflows/ 0000775 0000000 0000000 00000000000 15113123577 0015737 5 ustar 00root root 0000000 0000000 zsv-1.3.0/.github/workflows/benchmarks.yml 0000664 0000000 0000000 00000004616 15113123577 0020606 0 ustar 00root root 0000000 0000000 name: benchmarks
on:
workflow_dispatch:
inputs:
workflow-run-id:
description: 'Workflow Run ID'
type: number
required: false
runs:
description: 'No. of runs [min: 5, max: 100]'
type: number
default: 5
required: false
permissions:
actions: read
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
WORKFLOW_RUN_ID: ${{ inputs.workflow-run-id }}
RUNS: ${{ inputs.runs }}
BENCHMARKS_DIR: .benchmarks
jobs:
amd64-macos-gcc:
runs-on: macos-15-intel
steps:
- name: Checkout
uses: actions/checkout@v6
with:
sparse-checkout: |
scripts/ci-run-benchmarks.sh
- name: Download build artifact from workflow run
if: ${{ env.WORKFLOW_RUN_ID }}
run: |
gh run download --repo liquidaty/zsv --pattern *amd64-macosx-gcc.tar.gz "$WORKFLOW_RUN_ID"
mkdir -p ./"$BENCHMARKS_DIR"
mv zsv-*.tar.gz/*.tar.gz ./"$BENCHMARKS_DIR"
rm -rf zsv-*.tar.gz
ls -hl ./"$BENCHMARKS_DIR"
- name: Set ZSV_TAG env var
if: ${{ env.WORKFLOW_RUN_ID }}
run: |
cd ./"$BENCHMARKS_DIR"
ZSV_TAG="$(ls *.tar.gz | sed 's|zsv-\(.*\)-amd64.*|\1|')"
echo "ZSV_TAG=$ZSV_TAG" | tee -a "$GITHUB_ENV"
- name: Run benchmarks
run: ./scripts/ci-run-benchmarks.sh
amd64-linux:
runs-on: ubuntu-latest
strategy:
matrix:
compiler: [gcc, clang, musl]
continue-on-error: true
name: amd64-linux-${{ matrix.compiler }}
steps:
- name: Checkout
uses: actions/checkout@v6
with:
sparse-checkout: |
scripts/ci-run-benchmarks.sh
- name: Download build artifact from workflow run
if: ${{ env.WORKFLOW_RUN_ID }}
env:
COMPILER: ${{ matrix.compiler }}
run: |
gh run download --repo liquidaty/zsv --pattern *amd64-linux-$COMPILER.tar.gz "$WORKFLOW_RUN_ID"
mkdir -p ./"$BENCHMARKS_DIR"
mv zsv-*.tar.gz/*.tar.gz ./"$BENCHMARKS_DIR"
rm -rf zsv-*.tar.gz
ls -hl ./"$BENCHMARKS_DIR"
- name: Set ZSV_TAG env var
if: ${{ env.WORKFLOW_RUN_ID }}
run: |
cd ./"$BENCHMARKS_DIR"
ZSV_TAG="$(ls *.tar.gz | sed 's|zsv-\(.*\)-amd64.*|\1|')"
echo "ZSV_TAG=$ZSV_TAG" | tee -a "$GITHUB_ENV"
- name: Run benchmarks
env:
ZSV_LINUX_BUILD_COMPILER: ${{ matrix.compiler }}
run: ./scripts/ci-run-benchmarks.sh
zsv-1.3.0/.github/workflows/ci.yml 0000664 0000000 0000000 00000067405 15113123577 0017071 0 ustar 00root root 0000000 0000000 name: ci
on:
push:
branches: [main]
paths-ignore:
- '**.md'
- 'setup-action/**'
- '.github/workflows/setup-action.yml'
- '.github/workflows/codeql.yml'
- '.github/workflows/benchmarks.yml'
- 'scripts/ci-run-benchmarks.sh'
pull_request:
branches: [main]
paths-ignore:
- '**.md'
- 'setup-action/**'
- '.github/workflows/setup-action.yml'
- '.github/workflows/codeql.yml'
- '.github/workflows/benchmarks.yml'
- 'scripts/ci-run-benchmarks.sh'
release:
types: [published]
workflow_dispatch:
inputs:
wsl:
description: 'Enable WSL job'
required: false
type: boolean
default: false
permissions:
contents: write
id-token: write
attestations: write
defaults:
run:
shell: bash
env:
AMD64_LINUX_GCC: amd64-linux-gcc
AMD64_LINUX_CLANG: amd64-linux-clang
AMD64_LINUX_MUSL: amd64-linux-musl
AMD64_WINDOWS_MINGW: amd64-windows-mingw
AMD64_WSL_MINGW: amd64-wsl-mingw
AMD64_LINUX_WASM: amd64-linux-wasm
AMD64_MACOSX_GCC: amd64-macosx-gcc
ARM64_MACOSX_GCC: arm64-macosx-gcc
AMD64_FREEBSD_GCC: amd64-freebsd-gcc
ARTIFACT_DIR: .artifacts
ARTIFACT_RETENTION_DAYS: 5
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
jobs:
tag:
runs-on: ubuntu-latest
outputs:
TAG: ${{ steps.tag.outputs.TAG }}
steps:
- name: Checkout
uses: actions/checkout@v6
with:
sparse-checkout: |
scripts/ci-set-tag-output-parameter.sh
- name: Set TAG output parameter
id: tag
env:
TAG: ${{ startsWith(github.ref, 'refs/tags/v') && github.ref_name || '' }}
run: ./scripts/ci-set-tag-output-parameter.sh
clang-format:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Run clang-format
run: |
sudo ln -sf /usr/bin/clang-format-15 /usr/bin/clang-format
./scripts/ci-run-clang-format.sh
cppcheck:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Install cppcheck
run: |
sudo apt update
sudo apt install -y cppcheck
cppcheck --version
- name: Run cppcheck
run: ./scripts/ci-run-cppcheck.sh
- name: Upload (${{ env.CPPCHECK_XML_ARTIFACT_NAME }})
uses: actions/upload-artifact@v5
with:
name: ${{ env.CPPCHECK_XML_ARTIFACT_NAME }}
path: ${{ env.CPPCHECK_XML_ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (${{ env.CPPCHECK_HTML_ARTIFACT_NAME }})
uses: actions/upload-artifact@v5
with:
name: ${{ env.CPPCHECK_HTML_ARTIFACT_NAME }}
path: ${{ env.CPPCHECK_HTML_ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
shellcheck:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Run shellcheck
run: ./scripts/ci-run-shellcheck.sh
ci-linux:
needs: [tag, clang-format, cppcheck, shellcheck]
runs-on: ubuntu-22.04
timeout-minutes: 15
env:
TAG: ${{ needs.tag.outputs.TAG }}
outputs:
TAG: ${{ needs.tag.outputs.TAG }}
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Set up apt dependencies
run: |
sudo apt update
sudo apt install -y rpm alien tmux
sudo apt remove -y jq
- name: Build on Linux (${{ env.AMD64_LINUX_GCC }})
env:
PREFIX: ${{ env.AMD64_LINUX_GCC }}
CC: gcc
MAKE: make
RUN_TESTS: true
run: |
./scripts/ci-build.sh
./scripts/ci-create-debian-package.sh
./scripts/ci-create-rpm-package.sh
- name: Build on Linux (${{ env.AMD64_LINUX_CLANG }})
env:
PREFIX: ${{ env.AMD64_LINUX_CLANG }}
CC: clang
MAKE: make
RUN_TESTS: true
run: |
./scripts/ci-build.sh
./scripts/ci-create-debian-package.sh
./scripts/ci-create-rpm-package.sh
- name: Prepare build artifacts for upload
run: ./scripts/ci-prepare-artifacts-for-upload.sh
- name: Attest build artifacts for release
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
uses: actions/attest-build-provenance@v3
with:
subject-path: ${{ env.ARTIFACT_DIR }}/*
- name: Verify attestations of release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: ./scripts/ci-verify-attestations.sh
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.zip)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.zip
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.zip)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.zip
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.deb)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.deb
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.deb)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.deb
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.rpm)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.rpm
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.rpm)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.rpm
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.tar.gz)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_GCC }}.tar.gz
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.tar.gz)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_CLANG }}.tar.gz
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: ./scripts/ci-upload-release-artifacts.sh
prepare-linux-packages:
if: ${{ github.ref_name == 'main' || startsWith(github.ref, 'refs/tags/v') }}
needs: [ci-linux]
runs-on: ubuntu-latest
env:
TAG: ${{ needs.ci-linux.outputs.TAG }}
AMD64_ZIP: zsv-${{ needs.ci-linux.outputs.TAG }}-amd64-linux-gcc.zip
steps:
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y rpm dpkg-dev createrepo-c
sudo gem install --no-document fpm
- name: Checkout
uses: actions/checkout@v6
with:
sparse-checkout: |
.fpm
scripts/ci-prepare-deb-rpm-repos.sh
- name: Download ${{ env.AMD64_ZIP }}
uses: actions/download-artifact@v6
with:
name: ${{ env.AMD64_ZIP }}
path: ${{ env.ARTIFACT_DIR }}
- name: Prepare DEB and RPM package repos
run: ./scripts/ci-prepare-deb-rpm-repos.sh
- name: Upload packages artifact
uses: actions/upload-artifact@v5
with:
name: packages
path: ${{ env.ARTIFACT_DIR }}/packages
ci-macos:
needs: [tag, clang-format, cppcheck, shellcheck]
strategy:
matrix:
os: [macos-15-intel, macos-latest]
runs-on: ${{ matrix.os }}
timeout-minutes: 30
outputs:
TAG: ${{ needs.tag.outputs.TAG }}
env:
TAG: ${{ needs.tag.outputs.TAG }}
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Set up homebrew dependencies
run: brew install --quiet coreutils tree tmux sqlite3
- name: Set PREFIX and ZIP env var
env:
PREFIX: ${{ runner.arch == 'X64' && env.AMD64_MACOSX_GCC || env.ARM64_MACOSX_GCC }}
run: |
{
echo "PREFIX=$PREFIX"
echo "ZIP=zsv-$TAG-$PREFIX.zip"
echo "TAR=zsv-$TAG-$PREFIX.tar.gz"
} | tee -a "$GITHUB_ENV"
- name: Build on macOS (${{ env.AMD64_MACOSX_GCC }})
env:
CC: gcc-13
MAKE: make
RUN_TESTS: true
run: ./scripts/ci-build.sh
- name: Prepare build artifacts for upload
run: ./scripts/ci-prepare-artifacts-for-upload.sh
- name: Codesign and notarize (${{ env.ZIP }})
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
env:
MACOS_CERT_P12: ${{ secrets.MACOS_CERT_P12 }}
MACOS_CERT_PASSWORD: ${{ secrets.MACOS_CERT_PASSWORD }}
APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
run: ./scripts/ci-macos-codesign-and-notarize.sh "$PWD/$ARTIFACT_DIR/$ZIP"
- name: Attest build artifacts for release
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
uses: actions/attest-build-provenance@v3
with:
subject-path: ${{ env.ARTIFACT_DIR }}/*
- name: Verify attestations of release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: ./scripts/ci-verify-attestations.sh
- name: Upload (${{ env.ZIP }})
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: ${{ env.ZIP }}
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (${{ env.TAR }})
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: ${{ env.TAR }}
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: ./scripts/ci-upload-release-artifacts.sh
update-homebrew-tap:
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
needs: ci-macos
runs-on: ubuntu-22.04
env:
TAG: ${{ needs.ci-macos.outputs.TAG }}
steps:
- name: Checkout
uses: actions/checkout@v6
with:
sparse-checkout: |
scripts/ci-update-homebrew-tap.sh
- name: Update
env:
HOMEBREW_TAP_DEPLOY_KEY: ${{ secrets.HOMEBREW_TAP_DEPLOY_KEY }}
run: ./scripts/ci-update-homebrew-tap.sh
update-homebrew-core:
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
needs: ci-macos
runs-on: ubuntu-latest
env:
TAG: ${{ needs.ci-macos.outputs.TAG }}
steps:
- name: Update
uses: mislav/bump-homebrew-formula-action@v3
env:
COMMITTER_TOKEN: ${{ secrets.HOMEBREW_CORE_PR_PAT }}
with:
formula-name: zsv
formula-path: Formula/z/zsv.rb
push-to: liquidaty/homebrew-core
tag-name: v${{ env.TAG }}
ci-bsd:
needs: [tag, clang-format, cppcheck, shellcheck]
runs-on: ubuntu-22.04
timeout-minutes: 15
env:
TAG: ${{ needs.tag.outputs.TAG }}
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Build (${{ env.AMD64_FREEBSD_GCC }})
uses: cross-platform-actions/action@v0.30.0
env:
PREFIX: ${{ env.AMD64_FREEBSD_GCC }}
CC: gcc
MAKE: gmake
RUN_TESTS: true
with:
operating_system: freebsd
version: '13.2'
environment_variables: 'PREFIX CC MAKE RUN_TESTS ARTIFACT_DIR'
shell: sh
run: |
./scripts/ci-freebsd-setup.sh
./scripts/ci-build.sh
- name: Prepare build artifacts for upload
run: ./scripts/ci-prepare-artifacts-for-upload.sh
- name: Attest build artifacts for release
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
uses: actions/attest-build-provenance@v3
with:
subject-path: ${{ env.ARTIFACT_DIR }}/*
- name: Verify attestations of release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: ./scripts/ci-verify-attestations.sh
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_FREEBSD_GCC }}.zip)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_FREEBSD_GCC }}.zip
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_FREEBSD_GCC }}.tar.gz)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_FREEBSD_GCC }}.tar.gz
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: ./scripts/ci-upload-release-artifacts.sh
mingw-ncurses:
runs-on: ubuntu-22.04
steps:
- name: Cache
uses: actions/cache@v4
id: cache
with:
key: mingw-ncurses.zip
path: ${{ github.workspace }}/mingw-ncurses.zip
- name: Install MinGW
if: ${{ steps.cache.outputs.cache-hit != 'true' }}
run: |
sudo apt update
sudo apt install -y mingw-w64
- name: Checkout
uses: actions/checkout@v6
if: ${{ steps.cache.outputs.cache-hit != 'true' }}
with:
sparse-checkout: |
scripts/ci-build-ncurses-with-mingw.sh
- name: Build ncurses with MinGW
if: ${{ steps.cache.outputs.cache-hit != 'true' }}
run: ./scripts/ci-build-ncurses-with-mingw.sh
- name: Upload mingw-ncurses.zip
uses: actions/upload-artifact@v5
with:
name: mingw-ncurses.zip
path: ${{ github.workspace }}/mingw-ncurses.zip
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
ci-linux-mingw:
needs: [tag, clang-format, cppcheck, shellcheck, mingw-ncurses]
runs-on: ubuntu-22.04
timeout-minutes: 15
env:
TAG: ${{ needs.tag.outputs.TAG }}
outputs:
TAG: ${{ needs.tag.outputs.TAG }}
steps:
- name: Set up apt dependencies
run: |
sudo apt update
sudo apt install -y mingw-w64 nuget
sudo apt remove -y jq
- name: Checkout
uses: actions/checkout@v6
- name: Download mingw-ncurses.zip
uses: actions/download-artifact@v6
with:
name: mingw-ncurses.zip
path: ${{ github.workspace }}/app/external
- name: Unzip mingw-ncurses.zip in app/external
run: |
cd app/external
unzip mingw-ncurses.zip
- name: Build (${{ env.AMD64_WINDOWS_MINGW }})
env:
PREFIX: ${{ env.AMD64_WINDOWS_MINGW }}
CC: x86_64-w64-mingw32-gcc
MAKE: make
RUN_TESTS: false
run: |
export CFLAGS="-I$PWD/app/external/mingw-ncurses/include"
export LDFLAGS="-L$PWD/app/external/mingw-ncurses/lib"
./scripts/ci-build.sh
./scripts/ci-create-nuget-package.sh
- name: Prepare build artifacts for upload
run: ./scripts/ci-prepare-artifacts-for-upload.sh
- name: Attest build artifacts for release
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
uses: actions/attest-build-provenance@v3
with:
subject-path: ${{ env.ARTIFACT_DIR }}/*
- name: Verify attestations of release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: ./scripts/ci-verify-attestations.sh
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.zip)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.zip
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.tar.gz)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.tar.gz
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.nupkg)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_WINDOWS_MINGW }}.nupkg
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: ./scripts/ci-upload-release-artifacts.sh
ci-wsl-mingw:
if: ${{ github.event_name == 'workflow_dispatch' && inputs.wsl == true }}
needs: [tag, clang-format, cppcheck, shellcheck, mingw-ncurses]
runs-on: windows-2025
timeout-minutes: 30
env:
TAG: ${{ needs.tag.outputs.TAG }}
defaults:
run:
shell: bash
steps:
- name: Set up WSL 2
uses: Vampire/setup-wsl@v6
with:
wsl-version: 2
wsl-shell-command: bash --noprofile --norc -eo pipefail {0}
set-as-default: true
distribution: Ubuntu-22.04
additional-packages: |
build-essential
mingw-w64
sqlite3
tree
zip
tar
tmux
- name: Checkout
uses: actions/checkout@v6
- name: Download mingw-ncurses.zip
uses: actions/download-artifact@v6
with:
name: mingw-ncurses.zip
path: ${{ github.workspace }}/app/external
- name: Unzip mingw-ncurses.zip in app/external
run: |
cd app/external
unzip mingw-ncurses.zip
- name: Build (${{ env.AMD64_WSL_MINGW }})
env:
PREFIX: ${{ env.AMD64_WSL_MINGW }}
CC: x86_64-w64-mingw32-gcc
MAKE: make
RUN_TESTS: true
SKIP_BUILD: true
SKIP_ZIP_ARCHIVE: true
SKIP_TAR_ARCHIVE: true
WSLENV: ARTIFACT_DIR:TAG:PREFIX:CC:MAKE:RUN_TESTS:SKIP_BUILD:SKIP_ZIP_ARCHIVE:SKIP_TAR_ARCHIVE
shell: wsl-bash {0}
run: |
export CFLAGS="-I$PWD/app/external/mingw-ncurses/include"
export LDFLAGS="-L$PWD/app/external/mingw-ncurses/lib"
./scripts/ci-build.sh
ci-musl:
needs: [tag, clang-format, cppcheck, shellcheck]
runs-on: ubuntu-latest
container: alpine:latest
timeout-minutes: 15
outputs:
TAG: ${{ needs.tag.outputs.TAG }}
env:
TAG: ${{ needs.tag.outputs.TAG }}
steps:
- name: Set up dependencies
shell: sh
run: apk add bash gcc make musl-dev ncurses-dev ncurses-static tmux file sqlite curl zip wget tar git
- name: Checkout
uses: actions/checkout@v6
- name: Configure git in container
run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
- name: Build (${{ env.AMD64_LINUX_MUSL }})
env:
PREFIX: ${{ env.AMD64_LINUX_MUSL }}
CC: gcc
MAKE: make
RUN_TESTS: true
STATIC_BUILD: "1"
run: ./scripts/ci-build.sh
- name: Prepare build artifacts for upload
run: ./scripts/ci-prepare-artifacts-for-upload.sh
- name: Attest build artifacts for release
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
uses: actions/attest-build-provenance@v3
with:
subject-path: ${{ env.ARTIFACT_DIR }}/*
- name: Set up GitHub CLI
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: |
wget https://github.com/cli/cli/releases/download/v2.63.2/gh_2.63.2_linux_amd64.tar.gz
tar xvf gh_2.63.2_linux_amd64.tar.gz
cp gh_2.63.2_linux_amd64/bin/gh /usr/bin
rm -rf gh_2.63.2_linux_amd64
- name: Verify attestations of release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: ./scripts/ci-verify-attestations.sh
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_MUSL }}.zip)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_MUSL }}.zip
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_MUSL }}.tar.gz)
uses: actions/upload-artifact@v5
env:
ARTIFACT_NAME: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_MUSL }}.tar.gz
with:
name: ${{ env.ARTIFACT_NAME }}
path: ${{ env.ARTIFACT_DIR }}/${{ env.ARTIFACT_NAME }}
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
- name: Upload release artifacts
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
run: |
git config --global --add safe.directory "$PWD"
./scripts/ci-upload-release-artifacts.sh
ghcr:
needs: [ci-musl]
runs-on: ubuntu-latest
permissions:
packages: write
env:
TAG: ${{ needs.ci-musl.outputs.TAG }}
steps:
- name: Checkout
uses: actions/checkout@v6
with:
sparse-checkout: |
Dockerfile.ci
- name: Download (zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_MUSL }}.zip)
uses: actions/download-artifact@v6
with:
name: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_MUSL }}.zip
path: ${{ env.AMD64_LINUX_MUSL }}
- name: Unzip
env:
ZIP: zsv-${{ env.TAG }}-${{ env.AMD64_LINUX_MUSL }}.zip
DIR: ${{ env.AMD64_LINUX_MUSL }}
run: |
cd "$DIR"
unzip -o "$ZIP"
cd ..
mkdir -p ./ci
mv ./"$DIR"/bin/zsv ./ci/
rm -rf ./"$DIR"
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Login to GitHub Container Registry
if: ${{ startsWith(github.ref, 'refs/tags/v') }}
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push (on release)
uses: docker/build-push-action@v6
env:
DOCKER_BUILD_RECORD_UPLOAD: false
with:
no-cache: true
context: .
file: Dockerfile.ci
platforms: linux/amd64
push: ${{ startsWith(github.ref, 'refs/tags/v') }}
tags: |
ghcr.io/liquidaty/zsv:${{ env.TAG }}
ghcr.io/liquidaty/zsv:latest
ci-wasm-playground:
needs: [tag, clang-format, cppcheck, shellcheck]
runs-on: ubuntu-22.04
timeout-minutes: 15
env:
TAG: ${{ needs.tag.outputs.TAG }}
steps:
- name: Set up emsdk
uses: mymindstorm/setup-emsdk@v14
- name: Checkout
uses: actions/checkout@v6
- name: Update version in index.html
run: sed "s|__VERSION__|$TAG|g" -i playground/index.html
- name: Build with SIMD (${{ env.AMD64_LINUX_WASM }})
env:
PREFIX: ${{ env.AMD64_LINUX_WASM }}
CC: emcc
MAKE: make
RUN_TESTS: false
CONFIGFILE: "config.emcc"
CFLAGS: "-msse2 -msimd128"
CROSS_COMPILING: "yes"
NO_THREADING: "1"
STATIC_BUILD: "1"
run: |
emconfigure ./configure --enable-pic --disable-pie
emmake make install NO_STDIN=1 NO_PLAYGROUND=0
cp "$PREFIX"/bin/cli.em.{js,wasm} playground
- name: Build without SIMD (${{ env.AMD64_LINUX_WASM }})
env:
PREFIX: ${{ env.AMD64_LINUX_WASM }}
CC: emcc
MAKE: make
RUN_TESTS: false
CONFIGFILE: "config.emcc"
CROSS_COMPILING: "yes"
NO_THREADING: "1"
STATIC_BUILD: "1"
run: |
emconfigure ./configure --enable-pic --disable-pie
emmake make clean install NO_STDIN=1 NO_PLAYGROUND=0
mkdir -p playground/non-simd
cp "$PREFIX"/bin/cli.em.{js,wasm} playground/non-simd
- name: Upload playground artifact
uses: actions/upload-artifact@v5
with:
name: playground
path: playground
upload-github-pages-artifact:
if: ${{ github.ref_name == 'main' || startsWith(github.ref, 'refs/tags/v') }}
needs: [prepare-linux-packages, ci-wasm-playground]
runs-on: ubuntu-latest
steps:
- name: Download packages artifact
uses: actions/download-artifact@v6
with:
name: packages
path: ${{ env.ARTIFACT_DIR }}/packages
- name: Download playground artifact
uses: actions/download-artifact@v6
with:
name: playground
path: ${{ env.ARTIFACT_DIR }}
- name: Upload GitHub Pages artifacts
uses: actions/upload-pages-artifact@v4
with:
path: ${{ env.ARTIFACT_DIR }}
deploy-to-github-pages:
if: ${{ github.ref_name == 'main' || startsWith(github.ref, 'refs/tags/v') }}
needs: [upload-github-pages-artifact]
runs-on: ubuntu-latest
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
publish-winget-package:
if: ${{ github.event_name == 'release' && github.event.action == 'published' }}
needs: [ci-linux-mingw]
runs-on: windows-latest
env:
TAG: ${{ needs.ci-linux-mingw.outputs.TAG }}
defaults:
run:
shell: bash
steps:
- name: Install wingetcreate
run: |
curl -L https://aka.ms/wingetcreate/latest -o wingetcreate
chmod +x wingetcreate
./wingetcreate info
- name: Update
env:
PAT: ${{ secrets.WINGET_PAT }}
PKG_ID: "liquidaty.zsv"
run: |
URL="https://github.com/liquidaty/zsv/releases/download/v$TAG/zsv-$TAG-amd64-windows-mingw.zip"
./wingetcreate update "$PKG_ID" \
--version "$TAG" \
--urls "$URL" \
--out "$GITHUB_WORKSPACE/manifests" \
--token "$PAT" \
--prtitle "New version: $PKG_ID v$TAG" \
--submit
- name: Upload manifest
uses: actions/upload-artifact@v5
with:
name: zsv-${{ env.TAG }}-winget-manifest
path: ${{ github.workspace }}/manifests
retention-days: ${{ env.ARTIFACT_RETENTION_DAYS }}
if-no-files-found: error
zsv-1.3.0/.github/workflows/codeql.yml 0000664 0000000 0000000 00000000773 15113123577 0017740 0 ustar 00root root 0000000 0000000 name: codeql
on: workflow_dispatch
jobs:
codeql:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
language: [actions, c]
permissions:
security-events: write
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Initialize CodeQL
uses: github/codeql-action/init@v4
with:
languages: ${{ matrix.language }}
build-mode: none
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v4
zsv-1.3.0/.github/workflows/setup-action.yml 0000664 0000000 0000000 00000001354 15113123577 0021100 0 ustar 00root root 0000000 0000000 name: zsv/setup-action
on:
push:
branches: [main]
paths: ['setup-action/**']
pull_request:
branches: [main]
paths: ['setup-action/**']
workflow_dispatch:
permissions:
contents: read
jobs:
ci:
strategy:
matrix:
os: [ubuntu-latest, macos-15-intel, macos-latest, windows-latest]
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash
steps:
- name: Checkout
uses: actions/checkout@v6
with:
sparse-checkout: |
setup-action
- name: Set up
id: setup
uses: ./setup-action
- name: Check output parameter [install-path]
run: echo '${{ steps.setup.outputs.install-path }}'
- name: Check version
run: zsv version
zsv-1.3.0/.gitignore 0000664 0000000 0000000 00000001624 15113123577 0014335 0 ustar 00root root 0000000 0000000 # Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
# IDE
.vscode
# Generated Files and Directories
*.log
*.mk
*.mk.log
*.emcc
*.emcc.log
*.wasm
*.worker.js
package.json
package-lock.json
node_modules
*.c.in
*.h.in
*.zip
*.tar.gz
*.deb
*.rpm
*.nupkg
build
app/external
install.sh
.artifacts
.benchmarks
.rpmbuild
amd64-linux-*
amd64-windows-*
amd64-macosx-*
amd64-freebsd-*
jq
nuget-feed
homebrew-zsv
cppcheck*
tmp
include/zsv.h
app/test/worldcitiespop_mil.csv
app/test/worldcitiespop_mil.tsv
data/quoted5.csv
data/loans_2.csv
data/.zsv/data/loans_2.csv/overwrite.sqlite3
compile_commands.json
.cache
zsvsheet_filter_*
overwrite.sqlite3
playground/*
!playground/index.html
playground/favicon.ico
zsv-1.3.0/.markdownlint.json 0000664 0000000 0000000 00000000027 15113123577 0016023 0 ustar 00root root 0000000 0000000 {
"MD028": false
}
zsv-1.3.0/.shellcheckrc 0000664 0000000 0000000 00000000017 15113123577 0014773 0 ustar 00root root 0000000 0000000 disable=SC2153
zsv-1.3.0/AUTHORS 0000664 0000000 0000000 00000000124 15113123577 0013407 0 ustar 00root root 0000000 0000000 Matt Wong
Tai Chi Minh Ralph Eastwood
zsv-1.3.0/BUILD.md 0000664 0000000 0000000 00000001346 15113123577 0013527 0 ustar 00root root 0000000 0000000 # Building and installing the library and/or CLI
## From source
GCC is the recommended compiler, but clang is also supported.
To build from source, you'll need a basic unix toolchain with `sh` and
`make`/`gmake`:
```shell
./configure && sudo ./install.sh
```
or:
```shell
./configure && sudo make install
```
To uninstall:
```shell
sudo make uninstall
```
To build the independent executables in a local build folder,
use `make install` instead of `make all`.
### Building and installing only the library
```shell
./configure && cd src && sudo make install
```
## A note on compilers
GCC 11+ is the recommended compiler. Compared with clang, gcc in some cases
seems to produce faster code for reasons we have not yet determined.
zsv-1.3.0/Dockerfile 0000664 0000000 0000000 00000001331 15113123577 0014332 0 ustar 00root root 0000000 0000000 FROM alpine:latest AS build
LABEL maintainer="Liquidaty"
LABEL url="https://github.com/liquidaty/zsv"
LABEL org.opencontainers.image.description="zsv: tabular data swiss-army knife CLI + world's fastest (simd) CSV parser"
RUN apk add bash gcc make musl-dev ncurses-dev ncurses-static tmux file sqlite curl zip
WORKDIR /zsv
COPY . .
RUN mkdir /usr/local/etc
RUN \
PREFIX=amd64-linux-musl \
CC=gcc \
MAKE=make \
ARTIFACT_DIR=artifacts \
RUN_TESTS=true \
STATIC_BUILD=1 \
SKIP_ZIP_ARCHIVE=true \
SKIP_TAR_ARCHIVE=true \
./scripts/ci-build.sh
FROM scratch
WORKDIR /zsv
COPY --from=build /zsv/amd64-linux-musl/bin/zsv .
COPY --from=build /zsv/AUTHORS /zsv/LICENSE ./
ENTRYPOINT [ "./zsv" ]
zsv-1.3.0/Dockerfile.ci 0000664 0000000 0000000 00000000404 15113123577 0014724 0 ustar 00root root 0000000 0000000 FROM scratch
LABEL maintainer="Liquidaty"
LABEL url="https://github.com/liquidaty/zsv"
LABEL org.opencontainers.image.description="zsv: tabular data swiss-army knife CLI + world's fastest (simd) CSV parser"
WORKDIR /zsv
COPY ci/zsv .
ENTRYPOINT [ "./zsv" ]
zsv-1.3.0/INSTALL.md 0000664 0000000 0000000 00000007573 15113123577 0014006 0 ustar 00root root 0000000 0000000 # Install
Download pre-built binaries and packages for macOS, Windows, Linux and BSD from
the [Releases](https://github.com/liquidaty/zsv/releases) page.
> [!IMPORTANT]
>
> For [musl libc](https://www.musl-libc.org/) static build, the dynamic
> extensions are not supported!
> [!NOTE]
>
> All package artifacts are properly
> [attested](https://github.blog/news-insights/product-news/introducing-artifact-attestations-now-in-public-beta/)
> and can be verified using [GitHub CLI](https://cli.github.com/) like this:
>
> ```shell
> gh attestation verify --repo liquidaty/zsv
> ```
## macOS
### macOS: Homebrew
```shell
# Update
brew update
# Install
brew install zsv
# Uninstall
brew uninstall zsv
```
### macOS: Homebrew Custom Tap
```shell
# Tap
brew tap liquidaty/zsv
# Update
brew update
# Install
brew install zsv
# Uninstall
brew uninstall zsv
```
### macOS: MacPorts
```shell
sudo port install zsv
```
## Linux
### Linux: Homebrew
```shell
# Update
brew update
# Install
brew install zsv
# Uninstall
brew uninstall zsv
```
### Linux: `apt`
```shell
# Add repository
echo "deb [trusted=yes] https://liquidaty.github.io/zsv/packages/apt/amd64/ ./" | \
sudo tee /etc/apt/sources.list.d/zsv.list
# Update
sudo apt update
# Install CLI
sudo apt install zsv
# Install library
sudo apt install zsv-dev
```
### Linux: `rpm`
```shell
# Add repository
sudo tee /etc/yum.repos.d/zsv.repo << EOF
[zsv]
name=zsv
baseurl=https://liquidaty.github.io/zsv/packages/rpm/amd64
enabled=1
gpgcheck=0
EOF
# Install CLI
sudo yum install zsv
# Install library
sudo yum install zsv-devel
```
To install the manually downloaded `deb`/`rpm`, follow these instructions:
For Linux (Debian/Ubuntu - `*.deb`):
```shell
# Install
sudo apt install ./zsv-VERSION-amd64-linux-gcc.deb
# Uninstall
sudo apt remove zsv
```
For Linux (RHEL/CentOS - `*.rpm`):
```shell
# Install
sudo yum install ./zsv-VERSION-amd64-linux-gcc.rpm
# Uninstall
sudo yum remove zsv
```
## Windows
### Windows: `winget`
```powershell
# Install with alias
winget.exe install zsv
# Install with id
winget.exe install --id liquidaty.zsv
```
### Windows: `nuget`
Install the downloaded `.nupkg` with `nuget.exe`:
```powershell
# Install via nuget custom feed (requires absolute paths)
md nuget-feed
nuget.exe add zsv path\to\nupkg -source path\to\nuget-feed
nuget.exe install zsv -version -source path\to\nuget-feed
# Uninstall
nuget.exe delete zsv -source path\to\nuget-feed
```
Alternatively, install the downloaded `.nupkg` with `choco.exe`:
```powershell
# Install
choco.exe install zsv --pre -source path\to\nupkg
# Uninstall
choco.exe uninstall zsv
```
## Node
The zsv parser library is available for node:
```shell
npm install zsv-lib
```
Please note:
- This package currently only exposes a small subset of the zsv library
capabilities. More to come!
- The CLI is not yet available as a Node package
- If you'd like to use additional parser features, or use the CLI as a Node
package, please feel free to post a request in an issue here.
## GHCR (GitHub Container Registry)
`zsv` CLI is also available as a container image from
[Packages](https://github.com/liquidaty?tab=packages).
The container image is published on every release. In addition to the specific
release tag, the image is also tagged as `latest` i.e. `zsv:latest` always
points the latest released version.
Example:
```shell
$ docker pull ghcr.io/liquidaty/zsv
# ...
$ cat worldcitiespop_mil.csv | docker run -i ghcr.io/liquidaty/zsv count
1000000
```
For image details, see [Dockerfile](./Dockerfile). You may use this as a
baseline for your own use cases as needed.
## GitHub Actions
In a GitHub Actions workflow, you can use [`zsv/setup-action`](./setup-action)
to set up zsv+zsvlib:
```yml
- name: Set up zsv+zsvlib
uses: liquidaty/zsv/setup-action@main
```
See [zsv/setup-action/README](./setup-action/README.md) for more details.
zsv-1.3.0/LICENSE 0000664 0000000 0000000 00000002074 15113123577 0013352 0 ustar 00root root 0000000 0000000 MIT License
Copyright (c) 2021 Guarnerix Inc dba Liquidaty
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
zsv-1.3.0/Makefile 0000664 0000000 0000000 00000004642 15113123577 0014010 0 ustar 00root root 0000000 0000000 # Makefile for use with GNU make
THIS_MAKEFILE_DIR:=$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
THIS_DIR:=$(shell basename "${THIS_MAKEFILE_DIR}")
THIS_MAKEFILE:=$(lastword $(MAKEFILE_LIST))
THIS_MAKE=`basename ${MAKE}`
CONFIGFILE ?= config.mk
include ${CONFIGFILE}
CONFIGFILEPATH=$(shell ls ${CONFIGFILE} >/dev/null 2>/dev/null && realpath ${CONFIGFILE})
ifeq (${CONFIGFILEPATH},)
$(error Config file ${CONFIGFILE} not found)
endif
help:
@echo "**** Welcome to zsv+lib ****"
@echo
@echo "This package has two primary components:"
@echo "* zsvlib : a fast CSV parser library"
@echo "* zsv : CSV editor and toolkit (that uses zsvlib)"
@echo
@echo "\`zsv\` also supports dynamic extensions, a sample of which you can"
@echo "build and run as described in docs/extension.md"
@echo
@echo "To build, test and install zsvlib and zsv:"
@echo " ./configure && ${THIS_MAKE} test"
@echo
@echo "To build and install zsvlib and zsv:"
@echo " ./configure && ${THIS_MAKE} install"
@echo
@echo "To build and install only zsvlib:"
@echo " ./configure && ${THIS_MAKE} -C src install"
@echo
@echo "To build and install only zsv (i.e. install both, remove zsvlib):"
@echo " ./configure && ${THIS_MAKE} install && ${THIS_MAKE} -C src uninstall"
@echo
@echo "To save and build from a configuration without losing the current one,"
@echo "use the configuration option CONFIGFILE e.g.:"
@echo " ./configure --config-file=/path/to/config.custom"
@echo " ./configure && ${THIS_MAKE} -C src CONFIGFILE=/path/to/config.custom install"
@echo
@echo "To clean (remove temporary build objects) (after running configure):"
@echo " ${THIS_MAKE} clean"
@echo
@echo "To uninstall libs and apps:"
@echo " ${THIS_MAKE} uninstall"
@echo
@echo "To test:"
@echo " ${THIS_MAKE} test"
@echo
@echo "Additional make options available for the library or the apps by"
@echo " running ${THIS_MAKE} from the src or app directory"
@echo
@echo "For more information, see README.md"
check test:
@${MAKE} -C app test CONFIGFILE=${CONFIGFILEPATH}
@${MAKE} -C examples/lib test CONFIGFILE=${CONFIGFILEPATH}
build install uninstall: % :
@${MAKE} -C src $* CONFIGFILE=${CONFIGFILEPATH}
@${MAKE} -C app $* CONFIGFILE=${CONFIGFILEPATH}
clean:
@${MAKE} -C src clean CONFIGFILE=${CONFIGFILEPATH}
@${MAKE} -C app clean-all CONFIGFILE=${CONFIGFILEPATH}
@rm -rf ${THIS_MAKEFILE_DIR}/build
.PHONY: help build install uninstall clean check test
zsv-1.3.0/README.md 0000664 0000000 0000000 00000037362 15113123577 0013634 0 ustar 00root root 0000000 0000000 # zsv+lib: the world's fastest (simd) CSV parser, with an extensible CLI
lib + CLI:
[](https://github.com/liquidaty/zsv/actions/workflows/ci.yml)


[](https://github.com/liquidaty/zsv/blob/master/LICENSE)
npm:
[![NPM Version][npm-version-image]][npm-url]
[![NPM Install Size][npm-install-size-image]][npm-install-size-url]
[npm-install-size-image]: https://badgen.net/packagephobia/install/zsv-lib
[npm-install-size-url]: https://packagephobia.com/result?p=zsv-lib
[npm-url]: https://npmjs.org/package/zsv-lib
[npm-version-image]: https://badgen.net/npm/v/zsv-lib
Playground (without `sheet` viewer command): https://liquidaty.github.io/zsv
zsv+lib is a [fast CSV parser](./app/benchmark/README.md) library and extensible command-line utility. It
achieves high performance using SIMD operations, [efficient memory
use](docs/memory.md) and other optimization techniques, and can also parse
generic-delimited and fixed-width formats, as well as multi-row-span headers.
## CLI
The ZSV CLI can be compiled to virtually any target, including
[WebAssembly](examples/js), and offers a variety of [commands](#batteries-included) including `select`, `count`,
direct CSV `sql`, `flatten`, `serialize`, `2json` conversion, `2db` sqlite3
conversion, `stack`, `pretty`, `2tsv`, `compare`, `paste`, `overwrite`,
`check` and more.
The ZSV CLI also includes [`sheet`](docs/sheet.md), an in-console interactive
grid viewer that includes basic navigation, filtering, and pivot table with
drill down, and that supports custom extensions:
## Installation
- `brew` (MacOS, Linux):
- `brew install zsv`
- `winget` (Windows):
- `winget.exe install zsv`
- `npm` (parser only), `nuget`, `yum`, `apt`, `choco` and more
- See [INSTALL.md](INSTALL.md)
- Download
- Pre-built binaries and packages for macOS, Windows, Linux and BSD can be
downloaded from the [Releases](https://github.com/liquidaty/zsv/releases)
page.
- Build
- See [BUILD.md](BUILD.md) to build from source.
## Playground
An [online playground](https://liquidaty.github.io/zsv) is available as well
(without the `sheet` feature due to browser limitations)
If you like zsv+lib, do not forget to give it a star! 🌟
## Performance
Performance results compare favorably vs other CSV utilities (`xsv`,
`tsv-utils`, `csvkit`, `mlr` (miller) etc).
See [benchmarks](./app/benchmark/README.md)
## Which "CSV"
"CSV" is an ambiguous term. This library uses, *by default*, the same definition
as Excel (the library and app have various options to change this default
behavior); a more accurate description of it would be "UTF8 delimited data
parser" insofar as it requires UTF8 input and its options support customization
of the delimiter and whether to allow quoting.
In addition, zsv provides a *row-level* (as well as cell-level) API and provides
"normalized" CSV output (e.g. input of `this"iscell1,"thisis,"cell2` becomes
`"this""iscell1","thisis,cell2"`). Each of these three objectives (Excel
compatibility, row-level API and normalized output) has a measurable performance
impact; conversely, it is possible to achieve-- which a number of other CSV
parsers do-- much faster parsing speeds if any of these requirements (especially
Excel compatibility) are dropped.
### Examples of input that does not comply with RFC 4180
The following is a comprehensive list of all input patterns that are
non-compliant with RFC 4180, and how zsv (by default) parses each:
|Input Description|Parser treatment|Example input|How example input is parsed|
|--|--|--|--|
|Non-ASCII input, UTF8 BOM| BOM at start of the stream is ignored|(0xEF BB BF)|Ignored|
|Non-ASCII input, valid UTF8|Parsed as UTF8|ä½ ,好|cell1 = ä½ , cell2 = 好|
|Non-ASCII input, invalid UTF8|Parsed as UTF8; any non-compliant bytes are retained, or replaced with specified char|aaa,bXb,ccc where Y is malformed UTF8|cell1 = aaa, cell2 = bXb, cell3 = ccc|
|`\n`, `\r`, or `\r\n` newlines|Any non-quote-captured occurrence of `\n`, `\r`, `\r\n` or `\n\r` is parsed as a row end|`1a,1b,1c\n`
`2a,2b,2c\r`
`3a,3b,3c\n\r`
`4a,4b,4c\r\n`
`5a,"5\nb",5c\n`
`6a,"6b\r","6c"\n`
`7a,7b,7c`|Parsed as 7 rows each with 3 cells|
|Unquoted quote|Treated like any other non-delmiter|`aaa,b"bb,ccc`|Cell 2 value is `b"bb`, output as CSV `"b""bb"`|
|Closing quote followed by character other than delimiter (comma) or row end|Treated like any other non-delmiter|`"aa"a,"bb"bb"b,ccc`|Cell 1 value is `aaa`, cell2 value is `bbbb"b`, output as CSV `aaa` and `"bbbb""b"`|
|Missing final CRLF|Ignored; end-of-stream is considered end-of-row if not preceded by explicit row terminator|`aaa,bbb,ccc`|Row with 3 cells, same as if input ended with row terminator preceding `EOF`|
|Row and header contain different number of columns (cells)|Number of cells in each row is independent of other rows|`aaa,bbb\n`
`aaa,bbb,ccc`|Row 1 = 2 cells; Row 2 = 3 cells|
|Header row contains duplicate cells or embedded newlines|Header rows are parsed the same was as other rows (see NOTE below)|`"a\na","a\na"`|Two cells of `a\na`|
The above behavior can be altered with various optional flags:
* Header rows can be treated differently if options are used to skip rows
and/or use multi-row header span -- see documentation for further detail.
* Quote support can be turned off, to treat quotes just like any other non-
delimiter character
* Cell delimiter can be a character other than comma
* Row delimiter can be specfied as CRLF only, in which case a standalone CR
or LF is simply part of the cell value, even without quoting
## Built-in and extensible features
`zsv` is an extensible CSV utility, which uses zsvlib, for tasks such as slicing
and dicing, querying with SQL, combining, serializing, flattening,
[converting between CSV/JSON/sqlite3](docs/csv_json_sqlite.md) and more.
`zsv` is streamlined for easy development of custom dynamic extensions.
zsvlib and `zsv` are written in C, but since zsvlib is a library, and `zsv`
extensions are just shared libraries, you can extend `zsv` with your own code in
any programming language, so long as it has been compiled into a shared library
that implements the expected
[interface](./include/zsv/ext/implementation_private.h).
## Key highlights
- Available as BOTH a library and an application (coming soon: standalone
zsvutil library for common helper functions such as csv writer)
- Open-source, permissively licensed
- Handles real-world CSV the same way that spreadsheet programs do (*including
edge cases*). Gracefully handles (and can "clean") real-world data that may be
"dirty".
- Runs on macOS (tested on clang/gcc), Linux (gcc), Windows (mingw), BSD
(gcc-only) and in-browser (emscripten/wasm)
- High performance (fastest vs all alternatives we've benchmarked)
[app/benchmark/README.md](app/benchmark/README.md)
- Lightweight: low memory usage (regardless of input data size) and binary size for
both lib (~30k) and CLI (< 3MB)
- Handles general delimited data (e.g. pipe-delimited) and fixed-width input
(with specified widths or auto-detected widths), as well as CRLF-only row delims
with unquoted embedded LF
- Handles multi-row headers
- Handles input from any stream, including caller-defined streams accessed via a
single caller-defined `fread`-like function
- Easy to use as a library in a few lines of code, via either pull or push
parsing
- Includes the `zsv` CLI with the following built-in commands:
- [`sheet`](docs/sheet.md), an in-console interactive and extendable grid viewer
- `select`, `count`, `sql` query, `desc`ribe, `flatten`, `serialize`, `2json`,
`2db`, `stack`, `pretty`, `2tsv`, `paste`, `check`, `compare`, `overwrite`,
`jq`
- easily [convert between CSV/JSON/sqlite3](docs/csv_json_sqlite.md)
- [compare multiple files](docs/compare.md)
- [overwrite cells in files](docs/overwrite.md)
- [and more](#batteries-included)
- CLI is easy to extend/customize with a few lines of code via modular plug-in
framework. Just write a few custom functions and compile into a distributable
DLL that any existing zsv installation can use.
## Why another CSV parser/utility?
Our objectives, which we were unable to find in a pre-existing project, are:
- Reasonably high performance
- Runs on any platform, including web assembly
- Available as both a library and a standalone executable / command-line
interface utility (CLI)
- Memory-efficient, configurable resource limits
- Handles real-world CSV cases the same way that Excel does, including all edge
cases (quote handling, newline handling (either `\n` or `\r`), embedded
newlines, abnormal quoting e.g. aaa"aaa,bbb...)
- Handles other "dirty" data issues:
- Assumes valid UTF8, but does not misbehave if input contains bad UTF8
- Option to specify multi-row headers
- Does not assume or stop working in case of inconsistent numbers of columns
- Easy to use library or extend/customize CLI
There are several excellent tools that achieve high performance. Among those we
considered were xsv and tsv-utils. While they met our performance objective,
both were designed primarily as a utility and not a library, and were not easy
enough, for our needs, to customize and/or to support modular customizations
that could be maintained (or licensed) independently of the related project (in
addition to the fact that they were written in Rust and D, respectively, which
happen to be languages with which we lacked deep experience, especially for web
assembly targeting).
Others we considered were Miller (`mlr`), `csvkit` and Go (csv module), which
did not meet our performance objective. We also considered various other
libraries using SIMD for CSV parsing, but none that we tried met the "real-world
CSV" objective.
Hence, zsv was created as a library and a versatile application, both optimized
for speed and ease of development for extending and/or customizing to your
needs.
## Batteries included
`zsv` comes with several built-in commands:
- [`sheet`](docs/sheet.md): an in-console, interactive grid viewer
- `echo`: read CSV from stdin and write it back out to stdout. This is mostly
useful for demonstrating how to use the API and also how to create a plug-in,
and has several uses beyond that including adding/removing BOM, cleaning up
bad UTF8, whitespace or blank column trimming, limiting output to a contiguous
data block, skipping leading garbage, and even proving substitution values
without modifying the underlying source
- `check`: scan for anomolies such as rows with a different number of cells
than the header row or invalid utf8
- `count`: print the number of rows
- `select`: re-shape CSV by skipping leading garbage, combining header rows into
a single header, selecting or excluding specified columns, removing duplicate
columns, sampling, converting from fixed-width input, searching and more
- `desc`: provide a quick description of your table data
- `sql`: treat one or more CSV files like database tables and query with SQL
- `pretty`: format for console (fixed-width) display, or convert to markdown
format
- `serialize` (inverse of flatten): convert an NxM table to a single 3x (Nx(M-1))
table with columns: Row, Column Name, Column Value
- `flatten` (inverse of serialize): flatten a table by combining rows that share
a common value in a specified identifier column
- `2json`: convert CSV to JSON. Optionally, output in
[database schema](docs/db.schema.json)
- `2tsv`: convert to TSV (tab-delimited) format
- `stack`: merge CSV files vertically
- `paste`: horizontally paste two tables together (given inputs X and Y,
output 1...N rows where each row contains the entire corresponding
row in X followed by the entire corresponding row in Y)
- `compare`: compare two or more tables of data and output the differences
- `overwrite`: overwrite a cell value; changes will be reflected in any zsv
command when the --apply-overwrites option is specified
- `jq`: run a `jq` filter
- `2db`: [convert from JSON to sqlite3 db](docs/csv_json_sqlite.md)
- `prop`: view or save parsing options associated with a file, such as initial
rows to ignore, or header row span. Saved options are be applied by default
when processing that file.
Most of these can also be built as an independent executable named `zsv_xxx`
where `xxx` is the command name.
## Running the CLI
After installing, run `zsv help` to see usage details. The typical syntax is
`zsv ` e.g.:
```shell
zsv sql my_population_data.csv "select * from data where population > 100000"
```
## Using the API
Simple API usage examples include:
### Pull parsing
```c
zsv_parser parser = zsv_new(NULL);
while (zsv_next_row(parser) == zsv_status_row) { // for each row
// ...
const size_t cell_count = zsv_cell_count(parser);
for (size_t i = 0; i < cell_count; i++) { // for each cell
struct zsv_cell cell = zsv_get_cell(parser, i);
printf("cell: %.*s\n", cell.len, cell.str);
// ...
}
}
```
### Push parsing
```c
static void my_row_handler(void *ctx) {
zsv_parser parser = ctx;
const size_t cell_count = zsv_cell_count(parser);
for (size_t i = 0; i < cell_count; i++) {
// ...
}
}
int main() {
zsv_parser parser = zsv_new(NULL);
zsv_set_row_handler(parser, my_row_handler);
zsv_set_context(parser, parser);
while (zsv_parse_more(parser) == zsv_status_ok);
return 0;
}
```
Full application code examples can be found at
[examples/lib/README.md](examples/lib/README.md).
An example of using the API, compiled to wasm and called via Javascript, is in
[examples/js/README.md](examples/js/README.md).
For more sophisticated (but at this time, only sporadically
commented/documented) use cases, see the various CLI C source files in the `app`
directory such as `app/serialize.c`.
## Creating your own extension
You can extend `zsv` by providing a pre-compiled shared or static library that
defines the functions specified in `extension_template.h` and which `zsv` loads
in one of three ways:
- as a static library that is statically linked at compile time
- as a dynamic library that is linked at compile time and located in any library
search path
- as a dynamic library that is located in the same folder as the `zsv`
executable and loaded at runtime if/as/when the custom mode is invoked
### Example and template
You can build and run a sample extension by running `make test` from
`app/ext_example`.
The easiest way to implement your own extension is to copy and customize the
template files in [app/ext_template](app/ext_template/README.md)
## Possible enhancements and related developments
- optimize search; add search with hyperscan or re2 regex matching, possibly
parallelize?
- optional OpenMP or other multi-threading for row processing
- auto-generated documentation, and better documentation in general
- Additional benchmarking. Would be great to use
as a springboard to
benchmarking a number of various tasks
- encoding conversion e.g. UTF16 to UTF8
## Contribute
- [Fork](https://github.com/liquidaty/zsv/fork) the project.
- Check out the latest [`main`](https://github.com/liquidaty/zsv/tree/main)
branch.
- Create a feature or bugfix branch from `main`.
- Update your required changes.
- Make sure to run `clang-format` (version 15 or later) for C source updates.
- Commit and push your changes.
- Submit the PR.
## License
[MIT](https://github.com/liquidaty/zsv/blob/master/LICENSE)
The zsv CLI uses some permissively-licensed third-party libraries.
See [misc/THIRDPARTY.md](misc/THIRDPARTY.md) for details.
zsv-1.3.0/TO-DO.md 0000664 0000000 0000000 00000006067 15113123577 0013517 0 ustar 00root root 0000000 0000000 # v1.0 to do:
- sheet
- navigate to row: vim N-G, emacs Esc-GG
- edit cell
- save buffer to file (with or without Row # column)
- bug fixes
- key bindings:
- align key bindings with vim
- add cmd to switch to emacs key bindings
- save sheet prefs in zsv.ini?
- Open file: tab for autocomplete
- View edits (cell highlight, status bar)
- Edit file?
- Pivot and/or frequency table with drill-down
- CI/CD:
- use code signing to prevent os from quarantining by default
- Documentation
- Review / update / fix errors (README and all other)
- Add intro + tutorial for each command (esp sheet)
# Core Parser
### Performance
- add zsv_opts option to build index while parsing
- add zsv_opts option to use index if available
- add start_row and end_row options to `zsv_opts`
### input formats
- high priority: single-registration-function support for additional file suffix support. This functionality could come from either a built-in or an extension
- for example, `zsv_register_file_reader("bz2", ...)` and `zsvwriter_register_file_writer("bz2", ...)`
- should also handle file formats that may contain multiple files e.g. multiple tabs in xlsx
# CLI
- add index-related parser options to general command-line options
- add `--all` option to `help` which will output all help messages for all commands (including extension commands)
---
## Performance
Row indexing
---
## Sheet
### Help menu
- multi-tab read and write (e.g. XLSX support via plug-in)
- column operation plug-in (add, remove, modify column)
### Data editing
- needs to support buffer-level handlers
- for example:
- user opens mydata.csv
- user filters to rows that contain "xyz"; results are displayed in a new buffer
- user tries to edit cell in the new (filtered data) buffer
- either this attempt fails because the buffer is read-only, or
- the buffer handles this in a specific manner to trace the edited row back to the correct row in the original data
### Buffers
- add buffer type e.g. csv etc
- add read-only flag
### Extensions
- update temp file management to centralized list with ref count
- add options to stop or cancel event handling before it is finished running. Stop = stop running, and display any progress so far in a new buffer;
cancel = stop running, don't display anything and return as if the event handler had never started in the first place
- add extension_id to each buffer; prevent extension A from modifying (e.g. set/get ext_ctx) buffer owned by extension B
- high priority: support extension custom properties
- save in ../zsv/extensions/xxx.ini
- API should include functions to set/get
- Extend the my_extension.c such that when a buffer in the displayed list is selected, pressing Return will load that buffer
- cell plug-in: display context menu (e.g. for drill-down menu options)
### Interface
- progress tracking
- title line?
- help
---
## New commands
### ls
- list files that have saved zsv settings (i.e. each subdirectory in .zsv/data)
### audit/dump
- dump all file-specific saved settings (properties, edits/corrections etc)
zsv-1.3.0/app/ 0000775 0000000 0000000 00000000000 15113123577 0013122 5 ustar 00root root 0000000 0000000 zsv-1.3.0/app/2db.c 0000664 0000000 0000000 00000057253 15113123577 0013751 0 ustar 00root root 0000000 0000000 /*
* Copyright (C) 2021-2022 Liquidaty and zsv contributors. All rights reserved.
* This file is part of zsv/lib, distributed under the license defined at
* https://opensource.org/licenses/MIT
*/
#include
#include
#include // unlink
#include
#include
#include
#include
#define ZSV_COMMAND 2db
#include "zsv_command.h"
#include
#include
#include
#include
#define ZSV_2DB_DEFAULT_TABLE_NAME "mytable"
enum zsv_2db_action {
zsv_2db_action_create = 1,
zsv_2db_action_append,
zsv_2db_action_index
};
enum zsv_2db_state {
zsv_2db_state_header = 1,
zsv_2db_state_data,
zsv_2db_state_done
};
#define LQ_2DB_MAX_INDEXES 32
struct zsv_2db_ix {
struct zsv_2db_ix *next;
char *name;
char *on;
char delete;
char unique;
};
struct zsv_2db_column {
struct zsv_2db_column *next;
char *name;
char *datatype;
char *collate;
};
struct zsv_2db_options {
char *table_name;
char *db_fn;
char verbose;
char overwrite; // overwrite old db if it exists
#define ZSV_2DB_DEFAULT_BATCH_SIZE 10000
size_t batch_size;
};
typedef struct zsv_2db_data *zsv_2db_handle;
struct zsv_2db_data {
struct zsv_2db_options opts;
char *db_fn_tmp;
sqlite3 *db;
char transaction_started;
char *connection_string;
struct {
yajl_helper_t yh;
yajl_status yajl_stat;
enum zsv_2db_state state;
unsigned int col_count;
struct zsv_2db_column *columns, **last_column;
struct zsv_2db_column current_column;
struct zsv_2db_ix *indexes, **last_index;
sqlite3_int64 index_sequence_num_max;
struct zsv_2db_ix current_index;
char have_row_data;
char **row_values;
sqlite3_stmt *insert_stmt;
unsigned stmt_colcount;
} json_parser;
size_t rows_processed;
size_t row_insert_attempts;
size_t rows_inserted;
#define ZSV_2DB_MSG_BATCH_SIZE 10000 // number of rows between each console update (if verbose)
int err;
};
static void zsv_2db_ix_free(struct zsv_2db_ix *e) {
free(e->name);
free(e->on);
}
static void zsv_2db_ixes_delete(struct zsv_2db_ix **p) {
if (p && *p) {
struct zsv_2db_ix *next;
for (struct zsv_2db_ix *e = *p; e; e = next) {
next = e->next;
zsv_2db_ix_free(e);
free(e);
}
*p = NULL;
}
}
static void zsv_2db_column_free(struct zsv_2db_column *e) {
free(e->name);
free(e->datatype);
free(e->collate);
}
static void zsv_2db_columns_delete(struct zsv_2db_column **p) {
struct zsv_2db_column *next;
if (p && *p) {
for (struct zsv_2db_column *e = *p; e; e = next) {
next = e->next;
zsv_2db_column_free(e);
free(e);
}
*p = NULL;
}
}
static void zsv_2db_delete(zsv_2db_handle data) {
if (!data)
return;
free(data->opts.table_name);
free(data->db_fn_tmp);
if (data->db)
sqlite3_close(data->db);
zsv_2db_columns_delete(&data->json_parser.columns);
zsv_2db_column_free(&data->json_parser.current_column);
zsv_2db_ixes_delete(&data->json_parser.indexes);
zsv_2db_ix_free(&data->json_parser.current_index);
free(data->json_parser.row_values);
yajl_helper_delete(data->json_parser.yh);
free(data);
}
/* sqlite3 helper functions */
static int zsv_2db_sqlite3_exec_2db(sqlite3 *db, const char *sql) {
char *err_msg = NULL;
int rc = sqlite3_exec(db, sql, NULL, NULL, &err_msg);
if (err_msg) {
fprintf(stderr, "Error executing '%s': %s\n", sql, err_msg);
sqlite3_free(err_msg);
} else if (rc == SQLITE_DONE || rc == SQLITE_OK)
return 0;
return 1;
}
// add_db_indexes: return 0 on success, else error code
static int zsv_2db_add_indexes(struct zsv_2db_data *data) {
int err = 0;
for (struct zsv_2db_ix *ix = data->json_parser.indexes; !err && ix; ix = ix->next) {
sqlite3_str *pStr = sqlite3_str_new(data->db);
sqlite3_str_appendf(pStr, "create%s index \"%w_%w\" on \"%w\"(%s)", ix->unique ? " unique" : "",
data->opts.table_name, ix->name, data->opts.table_name, ix->on);
err = zsv_2db_sqlite3_exec_2db(data->db, sqlite3_str_value(pStr));
if (!err)
data->json_parser.index_sequence_num_max++;
sqlite3_free(sqlite3_str_finish(pStr));
}
return err;
}
static void zsv_2db_start_transaction(struct zsv_2db_data *data) {
if (!data->transaction_started)
sqlite3_exec(data->db, "BEGIN TRANSACTION", NULL, NULL, NULL);
data->transaction_started = 1;
}
static void zsv_2db_end_transaction(struct zsv_2db_data *data) {
if (data->transaction_started)
sqlite3_exec(data->db, "COMMIT", NULL, NULL, NULL);
data->transaction_started = 0;
}
static sqlite3_str *build_create_table_statement(sqlite3 *db, const char *tname, const char *const *colnames,
const char *const *datatypes, const char *const *collates,
unsigned int col_count) {
int err = 0;
sqlite3_str *pStr = sqlite3_str_new(db);
sqlite3_str_appendf(pStr, "CREATE TABLE \"%w\" (\n ", tname ? tname : ZSV_2DB_DEFAULT_TABLE_NAME);
for (unsigned int i = 0; i < col_count; i++) {
if (i > 0)
sqlite3_str_appendf(pStr, ",\n ");
sqlite3_str_appendf(pStr, "\"%w\"", colnames[i]);
const char *datatype = datatypes ? datatypes[i] : NULL;
if (!datatype || !(!strcmp("int", datatype) || !strcmp("integer", datatype) || !strcmp("real", datatype) ||
!strcmp("text", datatype))) {
if (datatype)
fprintf(stderr, "Unrecognized datatype %s", datatype), err = 1;
else
datatype = "text";
}
if (!err) {
const char *collate = collates ? collates[i] : NULL;
if (collate && *collate) {
if (collate && !(!strcmp("binary", collate) || !strcmp("rtrim", collate) || !strcmp("nocase", collate))) {
fprintf(stderr, "Unrecognized collate: expected binary, rtrim or nocase, got %s", collate);
err = 1;
} else
sqlite3_str_appendf(pStr, " %s%s%s", datatype, collate ? " collate " : "", collate ? collate : "");
}
}
}
if (err) {
if (pStr)
sqlite3_free(sqlite3_str_finish(pStr));
pStr = NULL;
} else
sqlite3_str_appendf(pStr, ")\n");
return pStr;
}
// zsv_2db_finish_header: return 0 on error, 1 on success
static int zsv_2db_finish_header(struct zsv_2db_data *data) {
if (data->err)
return 0;
if (!data->json_parser.col_count) {
fprintf(stderr, "No columns found!\n");
return 0;
}
data->json_parser.state = zsv_2db_state_data;
if ((data->json_parser.row_values = calloc(data->json_parser.col_count, sizeof(*data->json_parser.row_values))))
return 1;
data->err = 1;
return 0;
}
/* json parser functions */
static sqlite3_stmt *create_insert_statement(sqlite3 *db, const char *tname, unsigned int col_count) {
sqlite3_stmt *insert_stmt = NULL;
sqlite3_str *insert_sql = sqlite3_str_new(db);
if (insert_sql) {
sqlite3_str_appendf(insert_sql, "insert into \"%w\" values(?", tname);
for (unsigned int i = 1; i < col_count; i++)
sqlite3_str_appendf(insert_sql, ", ?");
sqlite3_str_appendf(insert_sql, ")");
int status = sqlite3_prepare_v2(db, sqlite3_str_value(insert_sql), -1, &insert_stmt, NULL);
if (status != SQLITE_OK) {
fprintf(stderr, "Unable to prep (%s): %s\n", sqlite3_str_value(insert_sql), sqlite3_errmsg(db));
}
sqlite3_free(sqlite3_str_finish(insert_sql));
}
return insert_stmt;
}
// return error
static int zsv_2db_set_insert_stmt(struct zsv_2db_data *data) {
int err = 0;
if (!data->json_parser.col_count) {
fprintf(stderr, "insert statement called with no columns to insert");
err = 1;
} else {
const char **colnames = calloc(data->json_parser.col_count, sizeof(*colnames));
const char **datatypes = calloc(data->json_parser.col_count, sizeof(*datatypes));
const char **collates = calloc(data->json_parser.col_count, sizeof(*collates));
unsigned int i = 0;
for (struct zsv_2db_column *e = data->json_parser.columns; e; e = e->next, i++) {
colnames[i] = e->name;
datatypes[i] = e->datatype;
collates[i] = e->collate;
}
sqlite3_str *create_sql = build_create_table_statement(data->db, data->opts.table_name, colnames, datatypes,
collates, data->json_parser.col_count);
if (!create_sql)
err = 1;
else {
if (!(err = zsv_2db_sqlite3_exec_2db(data->db, sqlite3_str_value(create_sql))) &&
!(data->json_parser.insert_stmt =
create_insert_statement(data->db, data->opts.table_name, data->json_parser.col_count))) {
err = 1;
zsv_2db_start_transaction(data);
} else
data->json_parser.stmt_colcount = data->json_parser.col_count;
sqlite3_free(sqlite3_str_finish(create_sql));
}
free(colnames);
free(datatypes);
free(collates);
}
return err;
}
/*
add_local_db_row(): return sqlite3 error, or 0 on ok
*/
static int zsv_2db_insert_row_values(sqlite3_stmt *stmt, unsigned stmt_colcount, char const *const *const values,
unsigned int values_count) {
if (!stmt)
return -1;
int status = 0;
unsigned int errors_printed = 0;
if (values_count > stmt_colcount)
values_count = stmt_colcount;
for (unsigned int i = 0; i < values_count; i++) {
const char *val = values[i];
if (val && *val)
sqlite3_bind_text(stmt, (int)i + 1, val, (int)strlen(val), SQLITE_STATIC);
else
// don't use sqlite3_bind_null, else x = ? will fail if value is ""/null
sqlite3_bind_text(stmt, (int)i + 1, "", 0, SQLITE_STATIC);
}
for (unsigned int i = values_count; i < stmt_colcount; i++)
sqlite3_bind_null(stmt, (int)i + 1);
status = sqlite3_step(stmt);
if (status == SQLITE_DONE)
status = 0;
else if (errors_printed < 10) {
errors_printed++;
fprintf(stderr, "Unable to insert: %s\n", sqlite3_errstr(status));
} else if (errors_printed != 100) {
errors_printed = 100;
fprintf(stderr, "Too many insert errors to print\n");
}
sqlite3_reset(stmt);
return status;
}
static int zsv_2db_insert_row(struct zsv_2db_data *data) {
if (!data->err) {
data->rows_processed++;
if (data->json_parser.have_row_data) {
if (!data->json_parser.insert_stmt)
data->err = zsv_2db_set_insert_stmt(data);
if (!data->db)
return 0;
int rc =
zsv_2db_insert_row_values(data->json_parser.insert_stmt, data->json_parser.stmt_colcount,
(char const *const *const)data->json_parser.row_values, data->json_parser.col_count);
data->row_insert_attempts++;
if (!rc) {
data->rows_inserted++;
if (data->opts.verbose && (data->rows_inserted % ZSV_2DB_MSG_BATCH_SIZE == 0))
fprintf(stderr, "%zu rows inserted\n", data->rows_inserted);
if (data->opts.batch_size && (data->rows_inserted % data->opts.batch_size == 0)) {
zsv_2db_end_transaction(data);
if (data->opts.verbose)
fprintf(stderr, "%zu rows committed\n", data->rows_inserted);
zsv_2db_start_transaction(data);
}
}
}
}
return 1;
}
static int json_start_map(yajl_helper_t yh) {
(void)(yh);
return 1;
}
static int json_end_map(yajl_helper_t yh) {
struct zsv_2db_data *data = yajl_helper_ctx(yh);
if (data->json_parser.state == zsv_2db_state_header &&
yajl_helper_got_path(yh, 3, "[{columns[")) { // exiting a column header
if (!data->json_parser.current_column.name) {
fprintf(stderr, "Name missing from column spec!\n");
return 0;
} else {
struct zsv_2db_column *e = calloc(1, sizeof(*e));
if (!e) {
fprintf(stderr, "Out of memory!");
return 0;
}
*e = data->json_parser.current_column;
*data->json_parser.last_column = e;
data->json_parser.last_column = &e->next;
data->json_parser.col_count++;
memset(&data->json_parser.current_column, 0, sizeof(data->json_parser.current_column));
}
} else if (data->json_parser.state == zsv_2db_state_header &&
yajl_helper_got_path(yh, 3, "[{indexes{")) { // exiting an index
if (!data->json_parser.current_index.name) {
fprintf(stderr, "Name missing from index spec\n");
return 0;
} else if (!(data->json_parser.current_index.on || data->json_parser.current_index.delete)) {
fprintf(stderr, "'on' or 'delete' missing from index spec\n");
return 0;
} else {
struct zsv_2db_ix *e = calloc(1, sizeof(*e));
if (!e) {
fprintf(stderr, "Out of memory!");
return 0;
}
*e = data->json_parser.current_index;
*data->json_parser.last_index = e;
data->json_parser.last_index = &e->next;
memset(&data->json_parser.current_index, 0, sizeof(data->json_parser.current_index));
}
}
return 1;
}
static int json_map_key(yajl_helper_t yh, const unsigned char *s, size_t len) {
struct zsv_2db_data *data = yajl_helper_ctx(yh);
if (data->json_parser.state == zsv_2db_state_header && yajl_helper_got_path(yh, 3, "[{indexes{")) {
free(data->json_parser.current_index.name);
if (len)
data->json_parser.current_index.name = zsv_memdup(s, len);
else
data->json_parser.current_index.name = NULL;
}
return 1;
}
static int json_start_array(yajl_helper_t yh) {
if (yajl_helper_level(yh) == 2) {
struct zsv_2db_data *data = yajl_helper_ctx(yh);
if (data->json_parser.state == zsv_2db_state_header && yajl_helper_got_path(yh, 2, "[[") &&
yajl_helper_array_index_plus_1(yh, 1) == 2)
return zsv_2db_finish_header(data);
}
return 1;
}
static void reset_row_values(struct zsv_2db_data *data) {
if (data->json_parser.row_values) {
for (unsigned int i = 0; i < data->json_parser.col_count; i++) {
free(data->json_parser.row_values[i]);
data->json_parser.row_values[i] = NULL;
}
}
data->json_parser.have_row_data = 0;
}
static int json_end_array(yajl_helper_t yh) {
if (yajl_helper_level(yh) == 2) {
struct zsv_2db_data *data = yajl_helper_ctx(yh);
if (data->json_parser.state == zsv_2db_state_data && yajl_helper_got_path(yh, 2, "[[")) { // finished a row of data
zsv_2db_insert_row(data);
reset_row_values(data);
}
}
return 1;
}
static int json_process_value(yajl_helper_t yh, struct json_value *value) {
const unsigned char *jsstr;
size_t len;
struct zsv_2db_data *data = yajl_helper_ctx(yh);
if (data->json_parser.state == zsv_2db_state_data) {
if (yajl_helper_got_path(yh, 3, "[[[")) {
json_value_default_string(value, &jsstr, &len);
if (jsstr && len) {
unsigned int j = yajl_helper_array_index_plus_1(yh, 0);
if (j && j - 1 < data->json_parser.col_count) {
data->json_parser.row_values[j - 1] = zsv_memdup(jsstr, len);
data->json_parser.have_row_data = 1;
}
}
}
} else if (yajl_helper_got_path(yh, 2, "[{name")) {
json_value_default_string(value, &jsstr, &len);
if (len) {
if (data->opts.table_name)
fprintf(stderr, "Table name specified twice; keeping %s, ignoring %.*s\n", data->opts.table_name, (int)len,
jsstr);
else
data->opts.table_name = zsv_memdup(jsstr, len);
}
} else if (yajl_helper_got_path(yh, 4, "[{columns[{name")) {
free(data->json_parser.current_column.name);
data->json_parser.current_column.name = NULL;
json_value_default_string(value, &jsstr, &len);
if (jsstr && len)
data->json_parser.current_column.name = zsv_memdup(jsstr, len);
} else if (yajl_helper_got_path(yh, 4, "[{columns[{datatype")) {
free(data->json_parser.current_column.datatype);
data->json_parser.current_column.datatype = NULL;
json_value_default_string(value, &jsstr, &len);
if (jsstr && len)
data->json_parser.current_column.datatype = zsv_memdup(jsstr, len);
} else if (yajl_helper_got_path(yh, 4, "[{columns[{collate")) {
free(data->json_parser.current_column.collate);
data->json_parser.current_column.collate = NULL;
json_value_default_string(value, &jsstr, &len);
if (jsstr && len)
data->json_parser.current_column.collate = zsv_memdup(jsstr, len);
} else if (yajl_helper_got_path(yh, 4, "[{indexes{*{delete")) {
data->json_parser.current_index.delete = json_value_truthy(value);
} else if (yajl_helper_got_path(yh, 4, "[{indexes{*{unique")) {
data->json_parser.current_index.unique = json_value_truthy(value);
} else if (yajl_helper_got_path(yh, 4, "[{indexes{*{on") || yajl_helper_got_path(yh, 5, "[{indexes{*{on[")) {
json_value_default_string(value, &jsstr, &len);
if (len) {
if (yajl_helper_level(yh) == 4 || !data->json_parser.current_index.on) {
free(data->json_parser.current_index.on);
data->json_parser.current_index.on = zsv_memdup(jsstr, len);
} else {
char *defn;
asprintf(&defn, "%s,%.*s", data->json_parser.current_index.on, (int)len, jsstr);
free(data->json_parser.current_index.on);
data->json_parser.current_index.on = defn;
}
}
}
return 1;
}
/* api functions */
// exportable
static zsv_2db_handle zsv_2db_new(struct zsv_2db_options *opts) {
int err = 0;
if (!opts->db_fn)
fprintf(stderr, "Please specify an output file\n"), err = 1;
struct stat stt = {0};
if (!err && !opts->overwrite && (!stat(opts->db_fn, &stt) || errno != ENOENT))
fprintf(stderr, "File %s already exists\n", opts->db_fn), err = 1;
if (err)
return NULL;
struct zsv_2db_data *data = calloc(1, sizeof(*data));
data->opts = *opts;
if (!(data->opts.batch_size))
data->opts.batch_size = ZSV_2DB_DEFAULT_BATCH_SIZE;
data->json_parser.last_column = &data->json_parser.columns;
data->json_parser.last_index = &data->json_parser.indexes;
data->json_parser.state = zsv_2db_state_header;
if (opts->table_name)
data->opts.table_name = strdup(opts->table_name);
asprintf(&data->db_fn_tmp, "%s.tmp", data->opts.db_fn);
if (!data->db_fn_tmp)
err = 1;
else {
int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX;
#ifndef NDEBUG
fprintf(stderr, "Opening: %s\n", data->db_fn_tmp);
#endif
unlink(data->db_fn_tmp);
int rc = sqlite3_open_v2(data->db_fn_tmp, &data->db, flags, NULL);
err = 1;
if (!data->db)
fprintf(stderr, "Unable to open db at %s\n", data->db_fn_tmp);
else if (rc != SQLITE_OK)
fprintf(stderr, "Unable to open db at %s: %s\n", data->db_fn_tmp, sqlite3_errmsg(data->db));
else {
err = 0;
// performance tweaks
sqlite3_exec(data->db, "PRAGMA synchronous = OFF", NULL, NULL, NULL);
sqlite3_exec(data->db, "PRAGMA journal_mode = OFF", NULL, NULL, NULL);
// parse the input and create & populate the database table
if (!(data->json_parser.yh = yajl_helper_new(32, json_start_map, json_end_map, json_map_key, json_start_array,
json_end_array, json_process_value, data))) {
fprintf(stderr, "Unable to get yajl parser\n");
err = 1;
}
}
}
if (err) {
zsv_2db_delete(data);
data = NULL;
}
return data;
}
// exportable
static int zsv_2db_err(zsv_2db_handle h) {
return h->err;
}
// exportable
static int zsv_2db_finish(zsv_2db_handle data) {
// add indexes
int err = zsv_2db_add_indexes(data);
if (!err) {
if (data->db) {
zsv_2db_end_transaction(data);
if (data->json_parser.insert_stmt)
sqlite3_finalize(data->json_parser.insert_stmt);
sqlite3_close(data->db);
data->db = NULL;
// rename tmp to target
unlink(data->opts.db_fn);
if (zsv_replace_file(data->db_fn_tmp, data->opts.db_fn)) {
fprintf(stderr, "Unable to rename %s to %s\n", data->db_fn_tmp, data->opts.db_fn);
zsv_perror(NULL);
err = 1;
} else
fprintf(stderr, "Database %s created\n", data->opts.db_fn);
}
}
return err;
}
// exportable
static yajl_handle zsv_2db_yajl_handle(zsv_2db_handle data) {
return yajl_helper_yajl(data->json_parser.yh);
}
int ZSV_MAIN_FUNC(ZSV_COMMAND)(int argc, const char *argv[], struct zsv_opts *zsv_opts,
struct zsv_prop_handler *custom_prop_handler) {
(void)(zsv_opts);
(void)(custom_prop_handler);
FILE *f_in = NULL;
int err = 0;
struct zsv_2db_options opts = {0};
opts.verbose = zsv_get_default_opts().verbose;
const char *usage[] = {
APPNAME ": convert JSON to SQLite3 DB",
"",
"Usage: " APPNAME " -o