pax_global_header00006660000000000000000000000064150167001160014507gustar00rootroot0000000000000052 comment=ecdc355b9efc1168d6aad540de4089cd7ef280ea csv-3.3.5/000077500000000000000000000000001501670011600123125ustar00rootroot00000000000000csv-3.3.5/.github/000077500000000000000000000000001501670011600136525ustar00rootroot00000000000000csv-3.3.5/.github/dependabot.yml000066400000000000000000000001661501670011600165050ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: 'github-actions' directory: '/' schedule: interval: 'weekly' csv-3.3.5/.github/workflows/000077500000000000000000000000001501670011600157075ustar00rootroot00000000000000csv-3.3.5/.github/workflows/benchmark.yml000066400000000000000000000012071501670011600203640ustar00rootroot00000000000000name: Benchmark on: - push - pull_request jobs: benchmark: name: "Benchmark: ${{ matrix.runs-on }}" strategy: fail-fast: false matrix: runs-on: - macos-latest - ubuntu-latest - windows-latest runs-on: ${{ matrix.runs-on }} timeout-minutes: 10 steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ruby - name: Install dependencies run: | bundle install gem install csv -v 3.0.1 gem install csv -v 3.0.2 - name: Benchmark run: | rake benchmark csv-3.3.5/.github/workflows/release.yml000066400000000000000000000024751501670011600200620ustar00rootroot00000000000000name: Release on: push: tags: - "*" jobs: github: name: GitHub runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@v4 - name: Extract release note run: | ruby \ -e 'print("## csv "); \ puts(ARGF.read.split(/^## /)[1]. \ gsub(/ {.+?}/, ""). \ gsub(/\[(.+?)\]\[.+?\]/) {$1})' \ NEWS.md > release-note.md - name: Upload to release run: | title=$(head -n1 release-note.md | sed -e 's/^## //') tail -n +2 release-note.md > release-note-without-version.md gh release create ${GITHUB_REF_NAME} \ --discussion-category Announcements \ --notes-file release-note-without-version.md \ --title "${title}" env: GH_TOKEN: ${{ github.token }} rubygems: name: RubyGems runs-on: ubuntu-latest timeout-minutes: 10 permissions: id-token: write environment: release steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ruby bundler-cache: true - uses: rubygems/configure-rubygems-credentials@v1.0.0 - name: Push gems run: | bundle exec rake release:rubygem_push csv-3.3.5/.github/workflows/test.yml000066400000000000000000000071731501670011600174210ustar00rootroot00000000000000name: Test on: - push - pull_request jobs: ruby-versions: uses: ruby/actions/.github/workflows/ruby_versions.yml@master with: engine: cruby min_version: 2.5 versions: '["debug"]' in-place: needs: ruby-versions name: "In-place test: Ruby ${{ matrix.ruby-version }}: ${{ matrix.runs-on }}" strategy: fail-fast: false matrix: ruby-version: ${{ fromJson(needs.ruby-versions.outputs.versions) }} runs-on: - macos-latest - ubuntu-latest - windows-latest exclude: - { runs-on: macos-latest, ruby-version: "2.5" } - { runs-on: windows-latest, ruby-version: "3.1" } - { runs-on: windows-latest, ruby-version: debug } include: - { runs-on: windows-latest, ruby-version: mingw } - { runs-on: windows-latest, ruby-version: mswin } runs-on: ${{ matrix.runs-on }} timeout-minutes: 10 steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby-version }} bundler-cache: true - name: Test run: | bundle exec rake scanner: needs: ruby-versions name: "Scanner test: Ruby ${{ matrix.ruby-version }}: ${{ matrix.runs-on }}" strategy: fail-fast: false matrix: ruby-version: ${{ fromJson(needs.ruby-versions.outputs.versions) }} runs-on: - macos-latest - ubuntu-latest - windows-latest exclude: - { runs-on: macos-latest, ruby-version: "2.5" } - { runs-on: windows-latest, ruby-version: "3.1" } - { runs-on: windows-latest, ruby-version: debug } include: - { runs-on: windows-latest, ruby-version: mingw } - { runs-on: windows-latest, ruby-version: mswin } runs-on: ${{ matrix.runs-on }} timeout-minutes: 10 env: CSV_PARSER_SCANNER_TEST: "yes" steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ${{ matrix.ruby-version }} bundler-cache: true - name: Test run: | bundle exec rake gem: name: "Gem test" runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ruby - name: Install run: | rake install gem install test-unit - name: Test run: | mkdir -p tmp cd tmp cp -a ../test/ ./ ../run-test.rb document: name: "Document test" runs-on: ubuntu-latest timeout-minutes: 10 steps: - uses: actions/checkout@v4 - uses: ruby/setup-ruby@v1 with: ruby-version: ruby bundler-cache: true - name: Build document run: | bundle exec rake warning:error rdoc - uses: actions/checkout@v4 if: | github.event_name == 'push' with: ref: gh-pages path: gh-pages - name: Deploy if: | github.event_name == 'push' run: | rm html/created.rid touch html/.nojekyll cd gh-pages rsync \ -a \ --delete \ --exclude "/.git/" \ ../html/ \ ./ if [ "$(git status --porcelain)" != "" ]; then git add --all git config user.name "github-actions[bot]" git config user.email "github-actions[bot]@users.noreply.github.com" git commit -m "Generate (${GITHUB_SHA})" git push fi csv-3.3.5/.gitignore000066400000000000000000000001051501670011600142760ustar00rootroot00000000000000/.bundle/ /Gemfile.lock /coverage/ /html/ /pkg/ /spec/reports/ /tmp/ csv-3.3.5/Gemfile000066400000000000000000000003241501670011600136040ustar00rootroot00000000000000source 'https://rubygems.org' # Specify your gem's dependencies in csv.gemspec gemspec group :development do gem "bundler" gem "rake" gem "rdoc" gem "benchmark_driver" gem "test-unit", ">= 3.4.8" end csv-3.3.5/LICENSE.txt000066400000000000000000000035621501670011600141430ustar00rootroot00000000000000Copyright (C) 2005-2016 James Edward Gray II. All rights reserved. Copyright (C) 2007-2017 Yukihiro Matsumoto. All rights reserved. Copyright (C) 2017 SHIBATA Hiroshi. All rights reserved. Copyright (C) 2017 Olivier Lacan. All rights reserved. Copyright (C) 2017 Espartaco Palma. All rights reserved. Copyright (C) 2017 Marcus Stollsteimer. All rights reserved. Copyright (C) 2017 pavel. All rights reserved. Copyright (C) 2017-2018 Steven Daniels. All rights reserved. Copyright (C) 2018 Tomohiro Ogoke. All rights reserved. Copyright (C) 2018 Kouhei Sutou. All rights reserved. Copyright (C) 2018 Mitsutaka Mimura. All rights reserved. Copyright (C) 2018 Vladislav. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. csv-3.3.5/NEWS.md000066400000000000000000000470421501670011600134170ustar00rootroot00000000000000# News ## 3.3.5 - 2025-06-01 ### Improvements * docs: Fixed `StringScanner` document URL. * GH-343 * Patch by Petrik de Heus ### Thanks * Petrik de Heus ## 3.3.4 - 2025-04-13 ### Improvements * `csv-filter`: Removed an experimental command line tool. * GH-341 ## 3.3.3 - 2025-03-20 ### Improvements * `csv-filter`: Added an experimental command line tool to filter a CSV. * Patch by Burdette Lamar ### Fixes * Fixed wrong EOF detection for `ARGF` * GH-328 * Reported by Takeshi Nishimatsu * Fixed a regression bug that `CSV.open` rejects integer mode. * GH-336 * Reported by Dave Burgess ### Thanks * Takeshi Nishimatsu * Burdette Lamar * Dave Burgess ## 3.3.2 - 2024-12-21 ### Fixes * Fixed a parse bug with a quoted line with `col_sep` and an empty line. This was introduced in 3.3.1. * GH-324 * Reported by stoodfarback ### Thanks * stoodfarback ## 3.3.1 - 2024-12-15 ### Improvements * `CSV.open`: Changed to detect BOM by default. Note that this isn't enabled on Windows because Ruby may have a bug. See also: https://bugs.ruby-lang.org/issues/20526 * GH-301 * Reported by Junichi Ito * Improved performance. * GH-311 * GH-312 * Patch by Vladimir Kochnev * `CSV.open`: Added support for `StringIO` as an input. * GH-300 * GH-302 * Patch by Marcelo * Added a built-in time converter. You can use it by `converters: :time`. * GH-313 * Patch by Bart de Water * Added `CSV::TSV` for tab-separated values. * GH-272 * GH-319 * Reported by kojix2 * Patch by Jas ### Thanks * Junichi Ito * Vladimir Kochnev * Marcelo * Bart de Water * kojix2 * Jas ## 3.3.0 - 2024-03-22 ### Fixes * Fixed a regression parse bug in 3.2.9 that parsing with `:skip_lines` may cause wrong result. ## 3.2.9 - 2024-03-22 ### Fixes * Fixed a parse bug that wrong result may be happen when: * `:skip_lines` is used * `:row_separator` is `"\r\n"` * There is a line that includes `\n` as a column value Reported by Ryo Tsukamoto. GH-296 ### Thanks * Ryo Tsukamoto ## 3.2.8 - 2023-11-08 ### Improvements * Added `CSV::InvalidEncodingError`. Patch by Kosuke Shibata. GH-287 ### Thanks * Kosuke Shibata ## 3.2.7 - 2023-06-26 ### Improvements * Removed an unused internal variable. [GH-273](https://github.com/ruby/csv/issues/273) [Patch by Mau Magnaguagno] * Changed to use `https://` instead of `http://` in documents. [GH-274](https://github.com/ruby/csv/issues/274) [Patch by Vivek Bharath Akupatni] * Added prefix to a helper module in test. [GH-278](https://github.com/ruby/csv/issues/278) [Patch by Luke Gruber] * Added a documentation for `liberal_parsing: {backslash_quotes: true}`. [GH-280](https://github.com/ruby/csv/issues/280) [Patch by Mark Schneider] ### Fixes * Fixed a wrong execution result in documents. [GH-276](https://github.com/ruby/csv/issues/276) [Patch by Yuki Tsujimoto] * Fixed a bug that the same line is used multiple times. [GH-279](https://github.com/ruby/csv/issues/279) [Reported by Gabriel Nagy] ### Thanks * Mau Magnaguagno * Vivek Bharath Akupatni * Yuki Tsujimoto * Luke Gruber * Mark Schneider * Gabriel Nagy ## 3.2.6 - 2022-12-08 ### Improvements * `CSV#read` consumes the same lines with other methods like `CSV#shift`. [[GitHub#258](https://github.com/ruby/csv/issues/258)] [Reported by Lhoussaine Ghallou] * All `Enumerable` based methods consume the same lines with other methods. This may have a performance penalty. [[GitHub#260](https://github.com/ruby/csv/issues/260)] [Reported by Lhoussaine Ghallou] * Simplify some implementations. [[GitHub#262](https://github.com/ruby/csv/pull/262)] [[GitHub#263](https://github.com/ruby/csv/pull/263)] [Patch by Mau Magnaguagno] ### Fixes * Fixed `CSV.generate_lines` document. [[GitHub#257](https://github.com/ruby/csv/pull/257)] [Patch by Sampat Badhe] ### Thanks * Sampat Badhe * Lhoussaine Ghallou * Mau Magnaguagno ## 3.2.5 - 2022-08-26 ### Improvements * Added `CSV.generate_lines`. [[GitHub#255](https://github.com/ruby/csv/issues/255)] [Reported by OKURA Masafumi] [[GitHub#256](https://github.com/ruby/csv/pull/256)] [Patch by Eriko Sugiyama] ### Thanks * OKURA Masafumi * Eriko Sugiyama ## 3.2.4 - 2022-08-22 ### Improvements * Cleaned up internal implementations. [[GitHub#249](https://github.com/ruby/csv/pull/249)] [[GitHub#250](https://github.com/ruby/csv/pull/250)] [[GitHub#251](https://github.com/ruby/csv/pull/251)] [Patch by Mau Magnaguagno] * Added support for RFC 3339 style time. [[GitHub#248](https://github.com/ruby/csv/pull/248)] [Patch by Thierry Lambert] * Added support for transcoding String CSV. Syntax is `from-encoding:to-encoding`. [[GitHub#254](https://github.com/ruby/csv/issues/254)] [Reported by Richard Stueven] * Added quoted information to `CSV::FieldInfo`. [[GitHub#254](https://github.com/ruby/csv/pull/253)] [Reported by Hirokazu SUZUKI] ### Fixes * Fixed a link in documents. [[GitHub#244](https://github.com/ruby/csv/pull/244)] [Patch by Peter Zhu] ### Thanks * Peter Zhu * Mau Magnaguagno * Thierry Lambert * Richard Stueven * Hirokazu SUZUKI ## 3.2.3 - 2022-04-09 ### Improvements * Added contents summary to `CSV::Table#inspect`. [GitHub#229][Patch by Eriko Sugiyama] [GitHub#235][Patch by Sampat Badhe] * Suppressed `$INPUT_RECORD_SEPARATOR` deprecation warning by `Warning.warn`. [GitHub#233][Reported by Jean byroot Boussier] * Improved error message for liberal parsing with quoted values. [GitHub#231][Patch by Nikolay Rys] * Fixed typos in documentation. [GitHub#236][Patch by Sampat Badhe] * Added `:max_field_size` option and deprecated `:field_size_limit` option. [GitHub#238][Reported by Dan Buettner] * Added `:symbol_raw` to built-in header converters. [GitHub#237][Reported by taki] [GitHub#239][Patch by Eriko Sugiyama] ### Fixes * Fixed a bug that some texts may be dropped unexpectedly. [Bug #18245][ruby-core:105587][Reported by Hassan Abdul Rehman] * Fixed a bug that `:field_size_limit` doesn't work with not complex row. [GitHub#238][Reported by Dan Buettner] ### Thanks * Hassan Abdul Rehman * Eriko Sugiyama * Jean byroot Boussier * Nikolay Rys * Sampat Badhe * Dan Buettner * taki ## 3.2.2 - 2021-12-24 ### Improvements * Added a validation for invalid option combination. [GitHub#225][Patch by adamroyjones] * Improved documentation for developers. [GitHub#227][Patch by Eriko Sugiyama] ### Fixes * Fixed a bug that all of `ARGF` contents may not be consumed. [GitHub#228][Reported by Rafael Navaza] ### Thanks * adamroyjones * Eriko Sugiyama * Rafael Navaza ## 3.2.1 - 2021-10-23 ### Improvements * doc: Fixed wrong class name. [GitHub#217][Patch by Vince] * Changed to always use `"\n"` for the default row separator on Ruby 3.0 or later because `$INPUT_RECORD_SEPARATOR` was deprecated since Ruby 3.0. * Added support for Ractor. [GitHub#218][Patch by rm155] * Users who want to use the built-in converters in non-main Ractors need to call `Ractor.make_shareable(CSV::Converters)` and/or `Ractor.make_shareable(CSV::HeaderConverters)` before creating non-main Ractors. ### Thanks * Vince * Joakim Antman * rm155 ## 3.2.0 - 2021-06-06 ### Improvements * `CSV.open`: Added support for `:newline` option. [GitHub#198][Patch by Nobuyoshi Nakada] * `CSV::Table#each`: Added support for column mode with duplicated headers. [GitHub#206][Reported by Yaroslav Berezovskiy] * `Object#CSV`: Added support for Ruby 3.0. * `CSV::Row`: Added support for pattern matching. [GitHub#207][Patch by Kevin Newton] ### Fixes * Fixed typos in documentation. [GitHub#196][GitHub#205][Patch by Sampat Badhe] ### Thanks * Sampat Badhe * Nobuyoshi Nakada * Yaroslav Berezovskiy * Kevin Newton ## 3.1.9 - 2020-11-23 ### Fixes * Fixed a compatibility bug that the line to be processed by `skip_lines:` has a row separator. [GitHub#194][Reported by Josef Šimánek] ### Thanks * Josef Šimánek ## 3.1.8 - 2020-11-18 ### Improvements * Improved documentation. [Patch by Burdette Lamar] ### Thanks * Burdette Lamar ## 3.1.7 - 2020-08-04 ### Improvements * Improved document. [GitHub#158][GitHub#160][GitHub#161] [Patch by Burdette Lamar] * Updated required Ruby version to 2.5.0 or later. [GitHub#159] [Patch by Gabriel Nagy] * Removed stringio 0.1.3 or later dependency. ### Thanks * Burdette Lamar * Gabriel Nagy ## 3.1.6 - 2020-07-20 ### Improvements * Improved document. [GitHub#127][GitHub#135][GitHub#136][GitHub#137][GitHub#139][GitHub#140] [GitHub#141][GitHub#142][GitHub#143][GitHub#145][GitHub#146][GitHub#148] [GitHub#148][GitHub#151][GitHub#152][GitHub#154][GitHub#155][GitHub#157] [Patch by Burdette Lamar] * `CSV.open`: Added support for `undef: :replace`. [GitHub#129][Patch by Koichi ITO] * `CSV.open`: Added support for `invalid: :replace`. [GitHub#129][Patch by Koichi ITO] * Don't run quotable check for invalid encoding field values. [GitHub#131][Patch by Koichi ITO] * Added support for specifying the target indexes and names to `force_quotes:`. [GitHub#153][Reported by Aleksandr] * `CSV.generate`: Changed to use the encoding of the first non-ASCII field rather than the encoding of ASCII only field. * Changed to require the stringio gem 0.1.3 or later. ### Thanks * Burdette Lamar * Koichi ITO * Aleksandr ## 3.1.5 - 2020-05-18 ### Improvements * Improved document. [GitHub#124][Patch by Burdette Lamar] ### Fixes * Added missing document files. [GitHub#125][Reported by joast] ### Thanks * Burdette Lamar * joast ## 3.1.4 - 2020-05-17 ### Improvements * Improved document. [GitHub#122][Patch by Burdette Lamar] * Stopped to dropping stack trace for exception caused by `CSV.parse_line`. [GitHub#120][Reported by Kyle d'Oliveira] ### Fixes * Fixed a bug that `:write_nil_value` or `:write_empty_value` don't work with non `String` objects. [GitHub#123][Reported by asm256] ### Thanks * Burdette Lamar * asm256 * Kyle d'Oliveira ## 3.1.3 - 2020-05-09 ### Improvements * `CSV::Row#dup`: Copied deeply. [GitHub#108][Patch by Jim Kane] ### Fixes * Fixed a infinite loop bug for zero length match `skip_lines`. [GitHub#110][Patch by Mike MacDonald] * `CSV.generate`: Fixed a bug that encoding isn't set correctly. [GitHub#110][Patch by Seiei Miyagi] * Fixed document for the `:strip` option. [GitHub#114][Patch by TOMITA Masahiro] * Fixed a parse bug when split charcter exists in middle of column value. [GitHub#115][Reported by TOMITA Masahiro] ### Thanks * Jim Kane * Mike MacDonald * Seiei Miyagi * TOMITA Masahiro ## 3.1.2 - 2019-10-12 ### Improvements * Added `:col_sep` check. [GitHub#94][Reported by Florent Beaurain] * Suppressed warnings. [GitHub#96][Patch by Nobuyoshi Nakada] * Improved documentation. [GitHub#101][GitHub#102][Patch by Vitor Oliveira] ### Fixes * Fixed a typo in documentation. [GitHub#95][Patch by Yuji Yaginuma] * Fixed a multibyte character handling bug. [GitHub#97][Patch by koshigoe] * Fixed typos in documentation. [GitHub#100][Patch by Vitor Oliveira] * Fixed a bug that seeked `StringIO` isn't accepted. [GitHub#98][Patch by MATSUMOTO Katsuyoshi] * Fixed a bug that `CSV.generate_line` doesn't work with `Encoding.default_internal`. [GitHub#105][Reported by David Rodríguez] ### Thanks * Florent Beaurain * Yuji Yaginuma * Nobuyoshi Nakada * koshigoe * Vitor Oliveira * MATSUMOTO Katsuyoshi * David Rodríguez ## 3.1.1 - 2019-04-26 ### Improvements * Added documentation for `strip` option. [GitHub#88][Patch by hayashiyoshino] * Added documentation for `write_converters`, `write_nil_value` and `write_empty_value` options. [GitHub#87][Patch by Masafumi Koba] * Added documentation for `quote_empty` option. [GitHub#89][Patch by kawa\_tech] ### Fixes * Fixed a bug that `strip; true` removes a newline. ### Thanks * hayashiyoshino * Masafumi Koba * kawa\_tech ## 3.1.0 - 2019-04-17 ### Fixes * Fixed a backward incompatibility bug that `CSV#eof?` may raises an error. [GitHub#86][Reported by krororo] ### Thanks * krororo ## 3.0.9 - 2019-04-15 ### Fixes * Fixed a test for Windows. ## 3.0.8 - 2019-04-11 ### Fixes * Fixed a bug that `strip: String` doesn't work. ## 3.0.7 - 2019-04-08 ### Improvements * Improve parse performance 1.5x by introducing loose parser. ### Fixes * Fix performance regression in 3.0.5. * Fix a bug that `CSV#line` returns wrong value when you use `quote_char: nil`. ## 3.0.6 - 2019-03-30 ### Improvements * `CSV.foreach`: Added support for `mode`. ## 3.0.5 - 2019-03-24 ### Improvements * Added `:liberal_parsing => {backslash_quote: true}` option. [GitHub#74][Patch by 284km] * Added `:write_converters` option. [GitHub#73][Patch by Danillo Souza] * Added `:write_nil_value` option. * Added `:write_empty_value` option. * Improved invalid byte line number detection. [GitHub#78][Patch by Alyssa Ross] * Added `quote_char: nil` optimization. [GitHub#79][Patch by 284km] * Improved error message. [GitHub#81][Patch by Andrés Torres] * Improved IO-like implementation for `StringIO` data. [GitHub#80][Patch by Genadi Samokovarov] * Added `:strip` option. [GitHub#58] ### Fixes * Fixed a compatibility bug that `CSV#each` doesn't care `CSV#shift`. [GitHub#76][Patch by Alyssa Ross] * Fixed a compatibility bug that `CSV#eof?` doesn't care `CSV#each` and `CSV#shift`. [GitHub#77][Reported by Chi Leung] * Fixed a compatibility bug that invalid line isn't ignored. [GitHub#82][Reported by krororo] * Fixed a bug that `:skip_lines` doesn't work with multibyte characters data. [GitHub#83][Reported by ff2248] ### Thanks * Alyssa Ross * 284km * Chi Leung * Danillo Souza * Andrés Torres * Genadi Samokovarov * krororo * ff2248 ## 3.0.4 - 2019-01-25 ### Improvements * Removed duplicated `CSV::Row#include?` implementations. [GitHub#69][Patch by Max Schwenk] * Removed duplicated `CSV::Row#header?` implementations. [GitHub#70][Patch by Max Schwenk] ### Fixes * Fixed a typo in document. [GitHub#72][Patch by Artur Beljajev] * Fixed a compatibility bug when row headers are changed. [GitHub#71][Reported by tomoyuki kosaka] ### Thanks * Max Schwenk * Artur Beljajev * tomoyuki kosaka ## 3.0.3 - 2019-01-12 ### Improvements * Migrated benchmark tool to benchmark-driver from benchmark-ips. [GitHub#57][Patch by 284km] * Added `liberal_parsing: {double_quote_outside_quote: true}` parse option. [GitHub#66][Reported by Watson] * Added `quote_empty:` write option. [GitHub#35][Reported by Dave Myron] ### Fixes * Fixed a compatibility bug that `CSV.generate` always return `ASCII-8BIT` encoding string. [GitHub#63][Patch by Watson] * Fixed a compatibility bug that `CSV.parse("", headers: true)` doesn't return `CSV::Table`. [GitHub#64][Reported by Watson][Patch by 284km] * Fixed a compatibility bug that multiple-characters column separator doesn't work. [GitHub#67][Reported by Jesse Reiss] * Fixed a compatibility bug that double `#each` parse twice. [GitHub#68][Reported by Max Schwenk] ### Thanks * Watson * 284km * Jesse Reiss * Dave Myron * Max Schwenk ## 3.0.2 - 2018-12-23 ### Improvements * Changed to use strscan in parser. [GitHub#52][Patch by 284km] * Improves CSV write performance. 3.0.2 will be about 2 times faster than 3.0.1. * Improves CSV parse performance for complex case. 3.0.2 will be about 2 times faster than 3.0.1. ### Fixes * Fixed a parse error bug for new line only input with `headers` option. [GitHub#53][Reported by Chris Beer] * Fixed some typos in document. [GitHub#54][Patch by Victor Shepelev] ### Thanks * 284km * Chris Beer * Victor Shepelev ## 3.0.1 - 2018-12-07 ### Improvements * Added a test. [GitHub#38][Patch by 284km] * `CSV::Row#dup`: Changed to duplicate internal data. [GitHub#39][Reported by André Guimarães Sakata] * Documented `:nil_value` and `:empty_value` options. [GitHub#41][Patch by OwlWorks] * Added support for separator detection for non-seekable inputs. [GitHub#45][Patch by Ilmari Karonen] * Removed needless code. [GitHub#48][Patch by Espartaco Palma] * Added support for parsing header only CSV with `headers: true`. [GitHub#47][Patch by Kazuma Shibasaka] * Added support for coverage report in CI. [GitHub#48][Patch by Espartaco Palma] * Improved auto CR row separator detection. [GitHub#51][Reported by Yuki Kurihara] ### Fixes * Fixed a typo in document. [GitHub#40][Patch by Marcus Stollsteimer] ### Thanks * 284km * André Guimarães Sakata * Marcus Stollsteimer * OwlWorks * Ilmari Karonen * Espartaco Palma * Kazuma Shibasaka * Yuki Kurihara ## 3.0.0 - 2018-06-06 ### Fixes * Fixed a bug that header isn't returned for empty row. [GitHub#37][Patch by Grace Lee] ### Thanks * Grace Lee ## 1.0.2 - 2018-05-03 ### Improvements * Split file for CSV::VERSION * Code cleanup: Split csv.rb into a more manageable structure [GitHub#19][Patch by Espartaco Palma] [GitHub#20][Patch by Steven Daniels] * Use CSV::MalformedCSVError for invalid encoding line [GitHub#26][Reported by deepj] * Support implicit Row <-> Array conversion [Bug #10013][ruby-core:63582][Reported by Dawid Janczak] * Update class docs [GitHub#32][Patch by zverok] * Add `Row#each_pair` [GitHub#33][Patch by zverok] * Improve CSV performance [GitHub#30][Patch by Watson] * Add :nil_value and :empty_value option ### Fixes * Fix a bug that "bom|utf-8" doesn't work [GitHub#23][Reported by Pavel Lobashov] * `CSV::Row#to_h`, `#to_hash`: uses the same value as `Row#[]` [Bug #14482][Reported by tomoya ishida] * Make row separator detection more robust [GitHub#25][Reported by deepj] * Fix a bug that too much separator when col_sep is `" "` [Bug #8784][ruby-core:63582][Reported by Sylvain Laperche] ### Thanks * Espartaco Palma * Steven Daniels * deepj * Dawid Janczak * zverok * Watson * Pavel Lobashov * tomoya ishida * Sylvain Laperche * Ryunosuke Sato ## 1.0.1 - 2018-02-09 ### Improvements * `CSV::Table#delete`: Added bulk delete support. You can delete multiple rows and columns at once. [GitHub#4][Patch by Vladislav] * Updated Gem description. [GitHub#11][Patch by Marcus Stollsteimer] * Code cleanup. [GitHub#12][Patch by Marcus Stollsteimer] [GitHub#14][Patch by Steven Daniels] [GitHub#18][Patch by takkanm] * `CSV::Table#dig`: Added. [GitHub#15][Patch by Tomohiro Ogoke] * `CSV::Row#dig`: Added. [GitHub#15][Patch by Tomohiro Ogoke] * Added ISO 8601 support to date time converter. [GitHub#16] ### Fixes * Fixed wrong `CSV::VERSION`. [GitHub#10][Reported by Marcus Stollsteimer] * `CSV.generate`: Fixed a regression bug that `String` argument is ignored. [GitHub#13][Patch by pavel] ### Thanks * Vladislav * Marcus Stollsteimer * Steven Daniels * takkanm * Tomohiro Ogoke * pavel csv-3.3.5/README.md000066400000000000000000000031261501670011600135730ustar00rootroot00000000000000# CSV This library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed. ## Installation Add this line to your application's Gemfile: ```ruby gem 'csv' ``` And then execute: $ bundle Or install it yourself as: $ gem install csv ## Usage ```ruby require "csv" CSV.foreach("path/to/file.csv") do |row| # use row here... end ``` ## Documentation - [API](https://ruby.github.io/csv/): all classes, methods, and constants. - [Recipes](https://ruby.github.io/csv/doc/csv/recipes/recipes_rdoc.html): specific code for specific tasks. ## Development After checking out the repo, run `ruby run-test.rb` to check if your changes can pass the test. To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). ## Contributing Bug reports and pull requests are welcome on GitHub at https://github.com/ruby/csv. ### NOTE: About RuboCop We don't use RuboCop because we can manage our coding style by ourselves. We want to accept small fluctuations in our coding style because we use Ruby. Please do not submit issues and PRs that aim to introduce RuboCop in this repository. ## License The gem is available as open source under the terms of the [2-Clause BSD License](https://opensource.org/licenses/BSD-2-Clause). See LICENSE.txt for details. csv-3.3.5/Rakefile000066400000000000000000000033611501670011600137620ustar00rootroot00000000000000require "rbconfig" require "rdoc/task" require "bundler/gem_tasks" spec = Bundler::GemHelper.gemspec desc "Run test" task :test do ruby("run-test.rb") end task :default => :test namespace :warning do desc "Treat warning as error" task :error do def Warning.warn(*message) super raise "Treat warning as error:\n" + message.join("\n") end end end RDoc::Task.new do |rdoc| rdoc.options = spec.rdoc_options rdoc.rdoc_files.include(*spec.source_paths) rdoc.rdoc_files.include(*spec.extra_rdoc_files) end benchmark_tasks = [] namespace :benchmark do Dir.glob("benchmark/*.yaml").sort.each do |yaml| name = File.basename(yaml, ".*") env = { "RUBYLIB" => nil, "BUNDLER_ORIG_RUBYLIB" => nil, } command_line = [ RbConfig.ruby, "-v", "-S", "benchmark-driver", File.expand_path(yaml), ] desc "Run #{name} benchmark" task name do puts("```") sh(env, *command_line) puts("```") end benchmark_tasks << "benchmark:#{name}" case name when /\Aparse/, "shift" namespace name do desc "Run #{name} benchmark: small" task :small do puts("```") sh(env.merge("N_COLUMNS" => "10"), *command_line) puts("```") end benchmark_tasks << "benchmark:#{name}:small" end end end end desc "Run all benchmarks" task :benchmark => benchmark_tasks release_task = Rake.application["release"] # We use Trusted Publishing. release_task.prerequisites.delete("build") release_task.prerequisites.delete("release:rubygem_push") release_task_comment = release_task.comment if release_task_comment release_task.clear_comments release_task.comment = release_task_comment.gsub(/ and build.*$/, "") end csv-3.3.5/benchmark/000077500000000000000000000000001501670011600142445ustar00rootroot00000000000000csv-3.3.5/benchmark/convert_nil.yaml000066400000000000000000000007551501670011600174610ustar00rootroot00000000000000loop_count: 100 contexts: - gems: csv: 3.0.1 - gems: csv: 3.0.2 - gems: csv: 3.3.0 - name: "master" prelude: | $LOAD_PATH.unshift(File.expand_path("lib")) require "csv" prelude: |- csv_text = <(s) {s || ""} benchmark: 'not convert': CSV.parse(csv_text) converter: |- CSV.parse(csv_text, converters: convert_nil) option: |- CSV.parse(csv_text, nil_value: "") csv-3.3.5/benchmark/parse.yaml000066400000000000000000000021521501670011600162420ustar00rootroot00000000000000loop_count: 100 contexts: - gems: csv: 3.0.1 - gems: csv: 3.0.2 - gems: csv: 3.3.0 - name: "master" prelude: | $LOAD_PATH.unshift(File.expand_path("lib")) require "csv" prelude: |- n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) alphas = ["AAAAA"] * n_columns unquoted = (alphas.join(",") + "\r\n") * n_rows quoted = (alphas.map { |s| %("#{s}") }.join(",") + "\r\n") * n_rows mixed = (alphas.map.with_index { |s, i| i.odd? ? s : %("#{s}") }.join(",") + "\r\n") * n_rows inc_col_sep = (alphas.map { |s| %(",#{s}") }.join(",") + "\r\n") * n_rows inc_row_sep = (alphas.map { |s| %("#{s}\r\n") }.join(",") + "\r\n") * n_rows hiraganas = ["あああああ"] * n_columns enc_utf8 = (hiraganas.join(",") + "\r\n") * n_rows enc_sjis = enc_utf8.encode("Windows-31J") benchmark: unquoted: CSV.parse(unquoted) quoted: CSV.parse(quoted) mixed: CSV.parse(mixed) include_col_sep: CSV.parse(inc_col_sep) include_row_sep: CSV.parse(inc_row_sep) encode_utf-8: CSV.parse(enc_utf8) encode_sjis: CSV.parse(enc_sjis) csv-3.3.5/benchmark/parse_liberal_parsing.yaml000066400000000000000000000032121501670011600214550ustar00rootroot00000000000000loop_count: 100 contexts: - gems: csv: 3.0.2 - gems: csv: 3.3.0 - name: "master" prelude: | $LOAD_PATH.unshift(File.expand_path("lib")) require "csv" prelude: |- n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) alphas = ['\"\"a\"\"'] * n_columns unquoted = (alphas.join(",") + "\r\n") * n_rows quoted = (alphas.map { |s| %("#{s}") }.join(",") + "\r\n") * n_rows inc_col_sep = (alphas.map { |s| %(",#{s}") }.join(",") + "\r\n") * n_rows inc_row_sep = (alphas.map { |s| %("#{s}") }.join(",") + "\r\n") * n_rows hiraganas = ["あああああ"] * n_columns enc_utf8 = (hiraganas.join(",") + "\r\n") * n_rows enc_sjis = enc_utf8.encode("Windows-31J") benchmark: unquoted: |- CSV.parse(unquoted, liberal_parsing: true) unquoted_backslash_quote: |- CSV.parse(unquoted, liberal_parsing: { backslash_quote: true, }) quoted: |- CSV.parse(quoted, liberal_parsing: true) quoted_double_quote_outside_quote: |- CSV.parse(quoted, liberal_parsing: { double_quote_outside_quote: true }) quoted_backslash_quote: |- CSV.parse(quoted, liberal_parsing: { double_quote_outside_quote: true, backslash_quote: true, }) include_col_sep: |- CSV.parse(inc_col_sep, liberal_parsing: true) include_row_sep: |- CSV.parse(inc_row_sep, liberal_parsing: true) encode_utf-8: |- CSV.parse(enc_utf8, liberal_parsing: true) encode_sjis: |- CSV.parse(enc_sjis, liberal_parsing: true) csv-3.3.5/benchmark/parse_quote_char_nil.yaml000066400000000000000000000011521501670011600213150ustar00rootroot00000000000000loop_count: 100 contexts: - gems: csv: 3.3.0 - name: "master" prelude: | $LOAD_PATH.unshift(File.expand_path("lib")) require "csv" prelude: |- n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) alphas = ["AAAAA"] * n_columns unquoted = (alphas.join(",") + "\r\n") * n_rows col_sep_space = (alphas.join(" ") + "\r\n") * n_rows benchmark: without_quote_char: |- CSV.parse(unquoted) quote_char_nil: |- CSV.parse(unquoted, quote_char: nil) col_sep_space: |- CSV.parse(col_sep_space, quote_char: nil, col_sep: " ") csv-3.3.5/benchmark/parse_strip.yaml000066400000000000000000000007641501670011600174720ustar00rootroot00000000000000loop_count: 100 contexts: - gems: csv: 3.3.0 - name: "master" prelude: | $LOAD_PATH.unshift(File.expand_path("lib")) require "csv" prelude: |- n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) alphas = ["AAAAA"] * n_columns quoted = (alphas.map { |s| %("#{s}") }.join(",") + "\r\n") * n_rows benchmark: default: |- CSV.parse(quoted) no_quote_strip: |- CSV.parse(quoted, quote_char: nil, strip: '"') csv-3.3.5/benchmark/read.yaml000066400000000000000000000012551501670011600160460ustar00rootroot00000000000000loop_count: 100 contexts: - gems: csv: 3.0.1 - gems: csv: 3.0.2 - gems: csv: 3.3.0 - name: "master" prelude: | $LOAD_PATH.unshift(File.expand_path("lib")) require "csv" prelude: |- CSV.open("/tmp/file.csv", "w") do |csv| csv << ["player", "gameA", "gameB"] 1000.times do csv << ['"Alice"', "84.0", "79.5"] csv << ['"Bob"', "20.0", "56.5"] end end benchmark: "CSV.foreach": |- CSV.foreach("/tmp/file.csv") do |row| end "CSV#shift": |- CSV.open("/tmp/file.csv") do |csv| while _line = csv.shift end end "CSV.read": CSV.read("/tmp/file.csv") "CSV.table": CSV.table("/tmp/file.csv") csv-3.3.5/benchmark/shift.yaml000066400000000000000000000007501501670011600162470ustar00rootroot00000000000000loop_count: 100 contexts: - gems: csv: 3.0.1 - gems: csv: 3.0.2 - gems: csv: 3.3.0 - name: "master" prelude: | $LOAD_PATH.unshift(File.expand_path("lib")) require "csv" prelude: |- n_columns = Integer(ENV.fetch("N_COLUMNS", "50"), 10) n_rows = Integer(ENV.fetch("N_ROWS", "1000"), 10) alphas = ["AAAAA"] * n_columns data = (alphas.join(",") + "\r\n") * n_rows benchmark: shift: |- csv = CSV.new(data) while csv.shift do end csv-3.3.5/benchmark/write.yaml000066400000000000000000000032621501670011600162650ustar00rootroot00000000000000loop_count: 100 contexts: - gems: csv: 3.0.1 - gems: csv: 3.0.2 - gems: csv: 3.3.0 - name: "master" prelude: | $LOAD_PATH.unshift(File.expand_path("lib")) require "csv" prelude: |- n_columns = Integer(ENV.fetch("N_COLUMNS", "5"), 10) n_rows = Integer(ENV.fetch("N_ROWS", "100"), 10) fields = ["AAAAA"] * n_columns headers = n_columns.times.collect do |i| "header#{i}" end row = CSV::Row.new(headers, fields) raw_row = {} n_columns.times do |i| raw_row[headers[i]] = fields[i] end benchmark: "generate_line: fields": |- n_rows.times do CSV.generate_line(fields) end "generate_line: Row": |- n_rows.times do CSV.generate_line(row) end "generate_line: Hash": |- n_rows.times do CSV.generate_line(raw_row, headers: headers) end "<< fields": |- output = StringIO.new csv = CSV.new(output) n_rows.times do csv << fields end "<< Row": |- output = StringIO.new csv = CSV.new(output) n_rows.times do csv << row end "<< Hash": |- output = StringIO.new csv = CSV.new(output, headers: headers) n_rows.times do csv << raw_row end "<< fields: write headers": |- output = StringIO.new csv = CSV.new(output, headers: headers, write_headers: true) n_rows.times do csv << fields end "<< Row: write headers": |- output = StringIO.new csv = CSV.new(output, headers: headers, write_headers: true) n_rows.times do csv << row end "<< Hash: write headers": |- output = StringIO.new csv = CSV.new(output, headers: headers, write_headers: true) n_rows.times do csv << raw_row end csv-3.3.5/bin/000077500000000000000000000000001501670011600130625ustar00rootroot00000000000000csv-3.3.5/bin/console000077500000000000000000000005221501670011600144510ustar00rootroot00000000000000#!/usr/bin/env ruby require "bundler/setup" require "csv" # You can add fixtures and/or initialization code here to make experimenting # with your gem easier. You can also use a different console, if you like. # (If you use this, don't forget to add pry to your Gemfile!) # require "pry" # Pry.start require "irb" IRB.start(__FILE__) csv-3.3.5/bin/setup000077500000000000000000000002031501670011600141430ustar00rootroot00000000000000#!/usr/bin/env bash set -euo pipefail IFS=$'\n\t' set -vx bundle install # Do any other automated setup that you need to do here csv-3.3.5/csv.gemspec000066400000000000000000000032621501670011600144550ustar00rootroot00000000000000# frozen_string_literal: true begin require_relative "lib/csv/version" rescue LoadError # for Ruby core repository require_relative "version" end Gem::Specification.new do |spec| spec.name = "csv" spec.version = CSV::VERSION spec.authors = ["James Edward Gray II", "Kouhei Sutou"] spec.email = [nil, "kou@cozmixng.org"] spec.summary = "CSV Reading and Writing" spec.description = "The CSV library provides a complete interface to CSV files and data. It offers tools to enable you to read and write to and from Strings or IO objects, as needed." spec.homepage = "https://github.com/ruby/csv" spec.licenses = ["Ruby", "BSD-2-Clause"] lib_path = "lib" spec.require_paths = [lib_path] files = [] lib_dir = File.join(__dir__, lib_path) if File.exist?(lib_dir) Dir.chdir(lib_dir) do Dir.glob("**/*.rb").each do |file| files << "lib/#{file}" end end end doc_dir = File.join(__dir__, "doc") if File.exist?(doc_dir) Dir.chdir(doc_dir) do Dir.glob("**/*.rdoc").each do |rdoc_file| files << "doc/#{rdoc_file}" end end end spec.files = files spec.rdoc_options.concat(["--main", "README.md"]) rdoc_files = [ "LICENSE.txt", "NEWS.md", "README.md", ] recipes_dir = File.join(doc_dir, "csv", "recipes") if File.exist?(recipes_dir) Dir.chdir(recipes_dir) do Dir.glob("**/*.rdoc").each do |recipe_file| rdoc_files << "doc/csv/recipes/#{recipe_file}" end end end spec.extra_rdoc_files = rdoc_files spec.required_ruby_version = ">= 2.5.0" spec.metadata["changelog_uri"] = "#{spec.homepage}/releases/tag/v#{spec.version}" end csv-3.3.5/doc/000077500000000000000000000000001501670011600130575ustar00rootroot00000000000000csv-3.3.5/doc/csv/000077500000000000000000000000001501670011600136525ustar00rootroot00000000000000csv-3.3.5/doc/csv/arguments/000077500000000000000000000000001501670011600156575ustar00rootroot00000000000000csv-3.3.5/doc/csv/arguments/io.rdoc000066400000000000000000000004331501670011600171370ustar00rootroot00000000000000* Argument +io+ should be an IO object that is: * Open for reading; on return, the IO object will be closed. * Positioned at the beginning. To position at the end, for appending, use method CSV.generate. For any other positioning, pass a preset \StringIO object instead. csv-3.3.5/doc/csv/options/000077500000000000000000000000001501670011600153455ustar00rootroot00000000000000csv-3.3.5/doc/csv/options/common/000077500000000000000000000000001501670011600166355ustar00rootroot00000000000000csv-3.3.5/doc/csv/options/common/col_sep.rdoc000066400000000000000000000026651501670011600211430ustar00rootroot00000000000000====== Option +col_sep+ Specifies the \String column separator to be used for both parsing and generating. The \String will be transcoded into the data's \Encoding before use. Default value: CSV::DEFAULT_OPTIONS.fetch(:col_sep) # => "," (comma) Using the default (comma): str = CSV.generate do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo,0\nbar,1\nbaz,2\n" ary = CSV.parse(str) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Using +:+ (colon): col_sep = ':' str = CSV.generate(col_sep: col_sep) do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo:0\nbar:1\nbaz:2\n" ary = CSV.parse(str, col_sep: col_sep) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Using +::+ (two colons): col_sep = '::' str = CSV.generate(col_sep: col_sep) do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo::0\nbar::1\nbaz::2\n" ary = CSV.parse(str, col_sep: col_sep) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Using '' (empty string): col_sep = '' str = CSV.generate(col_sep: col_sep) do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo0\nbar1\nbaz2\n" --- Raises an exception if parsing with the empty \String: col_sep = '' # Raises ArgumentError (:col_sep must be 1 or more characters: "") CSV.parse("foo0\nbar1\nbaz2\n", col_sep: col_sep) csv-3.3.5/doc/csv/options/common/quote_char.rdoc000066400000000000000000000025551501670011600216470ustar00rootroot00000000000000====== Option +quote_char+ Specifies the character (\String of length 1) used used to quote fields in both parsing and generating. This String will be transcoded into the data's \Encoding before use. Default value: CSV::DEFAULT_OPTIONS.fetch(:quote_char) # => "\"" (double quote) This is useful for an application that incorrectly uses ' (single-quote) to quote fields, instead of the correct " (double-quote). Using the default (double quote): str = CSV.generate do |csv| csv << ['foo', 0] csv << ["'bar'", 1] csv << ['"baz"', 2] end str # => "foo,0\n'bar',1\n\"\"\"baz\"\"\",2\n" ary = CSV.parse(str) ary # => [["foo", "0"], ["'bar'", "1"], ["\"baz\"", "2"]] Using ' (single-quote): quote_char = "'" str = CSV.generate(quote_char: quote_char) do |csv| csv << ['foo', 0] csv << ["'bar'", 1] csv << ['"baz"', 2] end str # => "foo,0\n'''bar''',1\n\"baz\",2\n" ary = CSV.parse(str, quote_char: quote_char) ary # => [["foo", "0"], ["'bar'", "1"], ["\"baz\"", "2"]] --- Raises an exception if the \String length is greater than 1: # Raises ArgumentError (:quote_char has to be nil or a single character String) CSV.new('', quote_char: 'xx') Raises an exception if the value is not a \String: # Raises ArgumentError (:quote_char has to be nil or a single character String) CSV.new('', quote_char: :foo) csv-3.3.5/doc/csv/options/common/row_sep.rdoc000066400000000000000000000053441501670011600211720ustar00rootroot00000000000000====== Option +row_sep+ Specifies the row separator, a \String or the \Symbol :auto (see below), to be used for both parsing and generating. Default value: CSV::DEFAULT_OPTIONS.fetch(:row_sep) # => :auto --- When +row_sep+ is a \String, that \String becomes the row separator. The String will be transcoded into the data's Encoding before use. Using "\n": row_sep = "\n" str = CSV.generate(row_sep: row_sep) do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo,0\nbar,1\nbaz,2\n" ary = CSV.parse(str) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Using | (pipe): row_sep = '|' str = CSV.generate(row_sep: row_sep) do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo,0|bar,1|baz,2|" ary = CSV.parse(str, row_sep: row_sep) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Using -- (two hyphens): row_sep = '--' str = CSV.generate(row_sep: row_sep) do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo,0--bar,1--baz,2--" ary = CSV.parse(str, row_sep: row_sep) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Using '' (empty string): row_sep = '' str = CSV.generate(row_sep: row_sep) do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo,0bar,1baz,2" ary = CSV.parse(str, row_sep: row_sep) ary # => [["foo", "0bar", "1baz", "2"]] --- When +row_sep+ is the \Symbol +:auto+ (the default), generating uses "\n" as the row separator: str = CSV.generate do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo,0\nbar,1\nbaz,2\n" Parsing, on the other hand, invokes auto-discovery of the row separator. Auto-discovery reads ahead in the data looking for the next \r\n, +\n+, or +\r+ sequence. The sequence will be selected even if it occurs in a quoted field, assuming that you would have the same line endings there. Example: str = CSV.generate do |csv| csv << [:foo, 0] csv << [:bar, 1] csv << [:baz, 2] end str # => "foo,0\nbar,1\nbaz,2\n" ary = CSV.parse(str) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] The default $INPUT_RECORD_SEPARATOR ($/) is used if any of the following is true: * None of those sequences is found. * Data is +ARGF+, +STDIN+, +STDOUT+, or +STDERR+. * The stream is only available for output. Obviously, discovery takes a little time. Set manually if speed is important. Also note that IO objects should be opened in binary mode on Windows if this feature will be used as the line-ending translation can cause problems with resetting the document position to where it was before the read ahead. csv-3.3.5/doc/csv/options/generating/000077500000000000000000000000001501670011600174705ustar00rootroot00000000000000csv-3.3.5/doc/csv/options/generating/force_quotes.rdoc000066400000000000000000000006521501670011600230420ustar00rootroot00000000000000====== Option +force_quotes+ Specifies the boolean that determines whether each output field is to be double-quoted. Default value: CSV::DEFAULT_OPTIONS.fetch(:force_quotes) # => false For examples in this section: ary = ['foo', 0, nil] Using the default, +false+: str = CSV.generate_line(ary) str # => "foo,0,\n" Using +true+: str = CSV.generate_line(ary, force_quotes: true) str # => "\"foo\",\"0\",\"\"\n" csv-3.3.5/doc/csv/options/generating/quote_empty.rdoc000066400000000000000000000005361501670011600227200ustar00rootroot00000000000000====== Option +quote_empty+ Specifies the boolean that determines whether an empty value is to be double-quoted. Default value: CSV::DEFAULT_OPTIONS.fetch(:quote_empty) # => true With the default +true+: CSV.generate_line(['"', ""]) # => "\"\"\"\",\"\"\n" With +false+: CSV.generate_line(['"', ""], quote_empty: false) # => "\"\"\"\",\n" csv-3.3.5/doc/csv/options/generating/write_converters.rdoc000066400000000000000000000015711501670011600237510ustar00rootroot00000000000000====== Option +write_converters+ Specifies converters to be used in generating fields. See {Write Converters}[#class-CSV-label-Write+Converters] Default value: CSV::DEFAULT_OPTIONS.fetch(:write_converters) # => nil With no write converter: str = CSV.generate_line(["\na\n", "\tb\t", " c "]) str # => "\"\na\n\",\tb\t, c \n" With a write converter: strip_converter = proc {|field| field.strip } str = CSV.generate_line(["\na\n", "\tb\t", " c "], write_converters: strip_converter) str # => "a,b,c\n" With two write converters (called in order): upcase_converter = proc {|field| field.upcase } downcase_converter = proc {|field| field.downcase } write_converters = [upcase_converter, downcase_converter] str = CSV.generate_line(['a', 'b', 'c'], write_converters: write_converters) str # => "a,b,c\n" See also {Write Converters}[#class-CSV-label-Write+Converters] csv-3.3.5/doc/csv/options/generating/write_empty_value.rdoc000066400000000000000000000006241501670011600241070ustar00rootroot00000000000000====== Option +write_empty_value+ Specifies the object that is to be substituted for each field that has an empty \String. Default value: CSV::DEFAULT_OPTIONS.fetch(:write_empty_value) # => "" Without the option: str = CSV.generate_line(['a', '', 'c', '']) str # => "a,\"\",c,\"\"\n" With the option: str = CSV.generate_line(['a', '', 'c', ''], write_empty_value: "x") str # => "a,x,c,x\n" csv-3.3.5/doc/csv/options/generating/write_headers.rdoc000066400000000000000000000012511501670011600231650ustar00rootroot00000000000000====== Option +write_headers+ Specifies the boolean that determines whether a header row is included in the output; ignored if there are no headers. Default value: CSV::DEFAULT_OPTIONS.fetch(:write_headers) # => nil Without +write_headers+: file_path = 't.csv' CSV.open(file_path,'w', :headers => ['Name','Value'] ) do |csv| csv << ['foo', '0'] end CSV.open(file_path) do |csv| csv.shift end # => ["foo", "0"] With +write_headers+": CSV.open(file_path,'w', :write_headers => true, :headers => ['Name','Value'] ) do |csv| csv << ['foo', '0'] end CSV.open(file_path) do |csv| csv.shift end # => ["Name", "Value"] csv-3.3.5/doc/csv/options/generating/write_nil_value.rdoc000066400000000000000000000005761501670011600235410ustar00rootroot00000000000000====== Option +write_nil_value+ Specifies the object that is to be substituted for each +nil+-valued field. Default value: CSV::DEFAULT_OPTIONS.fetch(:write_nil_value) # => nil Without the option: str = CSV.generate_line(['a', nil, 'c', nil]) str # => "a,,c,\n" With the option: str = CSV.generate_line(['a', nil, 'c', nil], write_nil_value: "x") str # => "a,x,c,x\n" csv-3.3.5/doc/csv/options/parsing/000077500000000000000000000000001501670011600170105ustar00rootroot00000000000000csv-3.3.5/doc/csv/options/parsing/converters.rdoc000066400000000000000000000027301501670011600220550ustar00rootroot00000000000000====== Option +converters+ Specifies converters to be used in parsing fields. See {Field Converters}[#class-CSV-label-Field+Converters] Default value: CSV::DEFAULT_OPTIONS.fetch(:converters) # => nil The value may be a field converter name (see {Stored Converters}[#class-CSV-label-Stored+Converters]): str = '1,2,3' # Without a converter array = CSV.parse_line(str) array # => ["1", "2", "3"] # With built-in converter :integer array = CSV.parse_line(str, converters: :integer) array # => [1, 2, 3] The value may be a converter list (see {Converter Lists}[#class-CSV-label-Converter+Lists]): str = '1,3.14159' # Without converters array = CSV.parse_line(str) array # => ["1", "3.14159"] # With built-in converters array = CSV.parse_line(str, converters: [:integer, :float]) array # => [1, 3.14159] The value may be a \Proc custom converter: (see {Custom Field Converters}[#class-CSV-label-Custom+Field+Converters]): str = ' foo , bar , baz ' # Without a converter array = CSV.parse_line(str) array # => [" foo ", " bar ", " baz "] # With a custom converter array = CSV.parse_line(str, converters: proc {|field| field.strip }) array # => ["foo", "bar", "baz"] See also {Custom Field Converters}[#class-CSV-label-Custom+Field+Converters] --- Raises an exception if the converter is not a converter name or a \Proc: str = 'foo,0' # Raises NoMethodError (undefined method `arity' for nil:NilClass) CSV.parse(str, converters: :foo) csv-3.3.5/doc/csv/options/parsing/empty_value.rdoc000066400000000000000000000006201501670011600222110ustar00rootroot00000000000000====== Option +empty_value+ Specifies the object that is to be substituted for each field that has an empty \String. Default value: CSV::DEFAULT_OPTIONS.fetch(:empty_value) # => "" (empty string) With the default, "": CSV.parse_line('a,"",b,"",c') # => ["a", "", "b", "", "c"] With a different object: CSV.parse_line('a,"",b,"",c', empty_value: 'x') # => ["a", "x", "b", "x", "c"] csv-3.3.5/doc/csv/options/parsing/field_size_limit.rdoc000066400000000000000000000022471501670011600232010ustar00rootroot00000000000000====== Option +field_size_limit+ Specifies the \Integer field size limit. Default value: CSV::DEFAULT_OPTIONS.fetch(:field_size_limit) # => nil This is a maximum size CSV will read ahead looking for the closing quote for a field. (In truth, it reads to the first line ending beyond this size.) If a quote cannot be found within the limit CSV will raise a MalformedCSVError, assuming the data is faulty. You can use this limit to prevent what are effectively DoS attacks on the parser. However, this limit can cause a legitimate parse to fail; therefore the default value is +nil+ (no limit). For the examples in this section: str = <<~EOT "a","b" " 2345 ","" EOT str # => "\"a\",\"b\"\n\"\n2345\n\",\"\"\n" Using the default +nil+: ary = CSV.parse(str) ary # => [["a", "b"], ["\n2345\n", ""]] Using 50: field_size_limit = 50 ary = CSV.parse(str, field_size_limit: field_size_limit) ary # => [["a", "b"], ["\n2345\n", ""]] --- Raises an exception if a field is too long: big_str = "123456789\n" * 1024 # Raises CSV::MalformedCSVError (Field size exceeded in line 1.) CSV.parse('valid,fields,"' + big_str + '"', field_size_limit: 2048) csv-3.3.5/doc/csv/options/parsing/header_converters.rdoc000066400000000000000000000027011501670011600233630ustar00rootroot00000000000000====== Option +header_converters+ Specifies converters to be used in parsing headers. See {Header Converters}[#class-CSV-label-Header+Converters] Default value: CSV::DEFAULT_OPTIONS.fetch(:header_converters) # => nil Identical in functionality to option {converters}[#class-CSV-label-Option+converters] except that: - The converters apply only to the header row. - The built-in header converters are +:downcase+ and +:symbol+. This section assumes prior execution of: str = <<-EOT Name,Value foo,0 bar,1 baz,2 EOT # With no header converter table = CSV.parse(str, headers: true) table.headers # => ["Name", "Value"] The value may be a header converter name (see {Stored Converters}[#class-CSV-label-Stored+Converters]): table = CSV.parse(str, headers: true, header_converters: :downcase) table.headers # => ["name", "value"] The value may be a converter list (see {Converter Lists}[#class-CSV-label-Converter+Lists]): header_converters = [:downcase, :symbol] table = CSV.parse(str, headers: true, header_converters: header_converters) table.headers # => [:name, :value] The value may be a \Proc custom converter (see {Custom Header Converters}[#class-CSV-label-Custom+Header+Converters]): upcase_converter = proc {|field| field.upcase } table = CSV.parse(str, headers: true, header_converters: upcase_converter) table.headers # => ["NAME", "VALUE"] See also {Custom Header Converters}[#class-CSV-label-Custom+Header+Converters] csv-3.3.5/doc/csv/options/parsing/headers.rdoc000066400000000000000000000027001501670011600212730ustar00rootroot00000000000000====== Option +headers+ Specifies a boolean, \Symbol, \Array, or \String to be used to define column headers. Default value: CSV::DEFAULT_OPTIONS.fetch(:headers) # => false --- Without +headers+: str = <<-EOT Name,Count foo,0 bar,1 bax,2 EOT csv = CSV.new(str) csv # => # csv.headers # => nil csv.shift # => ["Name", "Count"] --- If set to +true+ or the \Symbol +:first_row+, the first row of the data is treated as a row of headers: str = <<-EOT Name,Count foo,0 bar,1 bax,2 EOT csv = CSV.new(str, headers: true) csv # => # csv.headers # => ["Name", "Count"] csv.shift # => # --- If set to an \Array, the \Array elements are treated as headers: str = <<-EOT foo,0 bar,1 bax,2 EOT csv = CSV.new(str, headers: ['Name', 'Count']) csv csv.headers # => ["Name", "Count"] csv.shift # => # --- If set to a \String +str+, method CSV::parse_line(str, options) is called with the current +options+, and the returned \Array is treated as headers: str = <<-EOT foo,0 bar,1 bax,2 EOT csv = CSV.new(str, headers: 'Name,Count') csv csv.headers # => ["Name", "Count"] csv.shift # => # csv-3.3.5/doc/csv/options/parsing/liberal_parsing.rdoc000066400000000000000000000027771501670011600230330ustar00rootroot00000000000000====== Option +liberal_parsing+ Specifies the boolean or hash value that determines whether CSV will attempt to parse input not conformant with RFC 4180, such as double quotes in unquoted fields. Default value: CSV::DEFAULT_OPTIONS.fetch(:liberal_parsing) # => false For the next two examples: str = 'is,this "three, or four",fields' Without +liberal_parsing+: # Raises CSV::MalformedCSVError (Illegal quoting in str 1.) CSV.parse_line(str) With +liberal_parsing+: ary = CSV.parse_line(str, liberal_parsing: true) ary # => ["is", "this \"three", " or four\"", "fields"] Use the +backslash_quote+ sub-option to parse values that use a backslash to escape a double-quote character. This causes the parser to treat \" as if it were "". For the next two examples: str = 'Show,"Harry \"Handcuff\" Houdini, the one and only","Tampa Theater"' With +liberal_parsing+, but without the +backslash_quote+ sub-option: # Incorrect interpretation of backslash; incorrectly interprets the quoted comma as a field separator. ary = CSV.parse_line(str, liberal_parsing: true) ary # => ["Show", "\"Harry \\\"Handcuff\\\" Houdini", " the one and only\"", "Tampa Theater"] puts ary[1] # => "Harry \"Handcuff\" Houdini With +liberal_parsing+ and its +backslash_quote+ sub-option: ary = CSV.parse_line(str, liberal_parsing: { backslash_quote: true }) ary # => ["Show", "Harry \"Handcuff\" Houdini, the one and only", "Tampa Theater"] puts ary[1] # => Harry "Handcuff" Houdini, the one and only csv-3.3.5/doc/csv/options/parsing/nil_value.rdoc000066400000000000000000000005371501670011600216440ustar00rootroot00000000000000====== Option +nil_value+ Specifies the object that is to be substituted for each null (no-text) field. Default value: CSV::DEFAULT_OPTIONS.fetch(:nil_value) # => nil With the default, +nil+: CSV.parse_line('a,,b,,c') # => ["a", nil, "b", nil, "c"] With a different object: CSV.parse_line('a,,b,,c', nil_value: 0) # => ["a", 0, "b", 0, "c"] csv-3.3.5/doc/csv/options/parsing/return_headers.rdoc000066400000000000000000000010701501670011600226710ustar00rootroot00000000000000====== Option +return_headers+ Specifies the boolean that determines whether method #shift returns or ignores the header row. Default value: CSV::DEFAULT_OPTIONS.fetch(:return_headers) # => false Examples: str = <<-EOT Name,Count foo,0 bar,1 bax,2 EOT # Without return_headers first row is str. csv = CSV.new(str, headers: true) csv.shift # => # # With return_headers first row is headers. csv = CSV.new(str, headers: true, return_headers: true) csv.shift # => # csv-3.3.5/doc/csv/options/parsing/skip_blanks.rdoc000066400000000000000000000014201501670011600221560ustar00rootroot00000000000000====== Option +skip_blanks+ Specifies a boolean that determines whether blank lines in the input will be ignored; a line that contains a column separator is not considered to be blank. Default value: CSV::DEFAULT_OPTIONS.fetch(:skip_blanks) # => false See also option {skiplines}[#class-CSV-label-Option+skip_lines]. For examples in this section: str = <<-EOT foo,0 bar,1 baz,2 , EOT Using the default, +false+: ary = CSV.parse(str) ary # => [["foo", "0"], [], ["bar", "1"], ["baz", "2"], [], [nil, nil]] Using +true+: ary = CSV.parse(str, skip_blanks: true) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"], [nil, nil]] Using a truthy value: ary = CSV.parse(str, skip_blanks: :foo) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"], [nil, nil]] csv-3.3.5/doc/csv/options/parsing/skip_lines.rdoc000066400000000000000000000020741501670011600220240ustar00rootroot00000000000000====== Option +skip_lines+ Specifies an object to use in identifying comment lines in the input that are to be ignored: * If a \Regexp, ignores lines that match it. * If a \String, converts it to a \Regexp, ignores lines that match it. * If +nil+, no lines are considered to be comments. Default value: CSV::DEFAULT_OPTIONS.fetch(:skip_lines) # => nil For examples in this section: str = <<-EOT # Comment foo,0 bar,1 baz,2 # Another comment EOT str # => "# Comment\nfoo,0\nbar,1\nbaz,2\n# Another comment\n" Using the default, +nil+: ary = CSV.parse(str) ary # => [["# Comment"], ["foo", "0"], ["bar", "1"], ["baz", "2"], ["# Another comment"]] Using a \Regexp: ary = CSV.parse(str, skip_lines: /^#/) ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Using a \String: ary = CSV.parse(str, skip_lines: '#') ary # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] --- Raises an exception if given an object that is not a \Regexp, a \String, or +nil+: # Raises ArgumentError (:skip_lines has to respond to #match: 0) CSV.parse(str, skip_lines: 0) csv-3.3.5/doc/csv/options/parsing/strip.rdoc000066400000000000000000000005531501670011600210250ustar00rootroot00000000000000====== Option +strip+ Specifies the boolean value that determines whether whitespace is stripped from each input field. Default value: CSV::DEFAULT_OPTIONS.fetch(:strip) # => false With default value +false+: ary = CSV.parse_line(' a , b ') ary # => [" a ", " b "] With value +true+: ary = CSV.parse_line(' a , b ', strip: true) ary # => ["a", "b"] csv-3.3.5/doc/csv/options/parsing/unconverted_fields.rdoc000066400000000000000000000017201501670011600235430ustar00rootroot00000000000000====== Option +unconverted_fields+ Specifies the boolean that determines whether unconverted field values are to be available. Default value: CSV::DEFAULT_OPTIONS.fetch(:unconverted_fields) # => nil The unconverted field values are those found in the source data, prior to any conversions performed via option +converters+. When option +unconverted_fields+ is +true+, each returned row (\Array or \CSV::Row) has an added method, +unconverted_fields+, that returns the unconverted field values: str = <<-EOT foo,0 bar,1 baz,2 EOT # Without unconverted_fields csv = CSV.parse(str, converters: :integer) csv # => [["foo", 0], ["bar", 1], ["baz", 2]] csv.first.respond_to?(:unconverted_fields) # => false # With unconverted_fields csv = CSV.parse(str, converters: :integer, unconverted_fields: true) csv # => [["foo", 0], ["bar", 1], ["baz", 2]] csv.first.respond_to?(:unconverted_fields) # => true csv.first.unconverted_fields # => ["foo", "0"] csv-3.3.5/doc/csv/recipes/000077500000000000000000000000001501670011600153045ustar00rootroot00000000000000csv-3.3.5/doc/csv/recipes/filtering.rdoc000066400000000000000000000213001501670011600201340ustar00rootroot00000000000000== Recipes for Filtering \CSV These recipes are specific code examples for specific \CSV filtering tasks. For other recipes, see {Recipes for CSV}[./recipes_rdoc.html]. All code snippets on this page assume that the following has been executed: require 'csv' === Contents - {Source and Output Formats}[#label-Source+and+Output+Formats] - {Filtering String to String}[#label-Filtering+String+to+String] - {Recipe: Filter String to String parsing Headers}[#label-Recipe-3A+Filter+String+to+String+parsing+Headers] - {Recipe: Filter String to String parsing and writing Headers}[#label-Recipe-3A+Filter+String+to+String+parsing+and+writing+Headers] - {Recipe: Filter String to String Without Headers}[#label-Recipe-3A+Filter+String+to+String+Without+Headers] - {Filtering String to IO Stream}[#label-Filtering+String+to+IO+Stream] - {Recipe: Filter String to IO Stream parsing Headers}[#label-Recipe-3A+Filter+String+to+IO+Stream+parsing+Headers] - {Recipe: Filter String to IO Stream parsing and writing Headers}[#label-Recipe-3A+Filter+String+to+IO+Stream+parsing+and+writing+Headers] - {Recipe: Filter String to IO Stream Without Headers}[#label-Recipe-3A+Filter+String+to+IO+Stream+Without+Headers] - {Filtering IO Stream to String}[#label-Filtering+IO+Stream+to+String] - {Recipe: Filter IO Stream to String parsing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+String+parsing+Headers] - {Recipe: Filter IO Stream to String parsing and writing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+String+parsing+and+writing+Headers] - {Recipe: Filter IO Stream to String Without Headers}[#label-Recipe-3A+Filter+IO+Stream+to+String+Without+Headers] - {Filtering IO Stream to IO Stream}[#label-Filtering+IO+Stream+to+IO+Stream] - {Recipe: Filter IO Stream to IO Stream parsing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+IO+Stream+parsing+Headers] - {Recipe: Filter IO Stream to IO Stream parsing and writing Headers}[#label-Recipe-3A+Filter+IO+Stream+to+IO+Stream+parsing+and+writing+Headers] - {Recipe: Filter IO Stream to IO Stream Without Headers}[#label-Recipe-3A+Filter+IO+Stream+to+IO+Stream+Without+Headers] === Source and Output Formats You can use a Unix-style "filter" for \CSV data. The filter reads source \CSV data and writes output \CSV data as modified by the filter. The input and output \CSV data may be any mixture of \Strings and \IO streams. ==== Filtering \String to \String You can filter one \String to another, with or without headers. ===== Recipe: Filter \String to \String parsing Headers Use class method CSV.filter with option +headers+ to filter a \String to another \String: in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" out_string = '' CSV.filter(in_string, out_string, headers: true) do |row| row['Name'] = row['Name'].upcase row['Value'] *= 4 end out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" ===== Recipe: Filter \String to \String parsing and writing Headers Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter a \String to another \String including header row: in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" out_string = '' CSV.filter(in_string, out_string, headers: true, out_write_headers: true) do |row| unless row.is_a?(Array) row['Name'] = row['Name'].upcase row['Value'] *= 4 end end out_string # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n" ===== Recipe: Filter \String to \String Without Headers Use class method CSV.filter without option +headers+ to filter a \String to another \String: in_string = "foo,0\nbar,1\nbaz,2\n" out_string = '' CSV.filter(in_string, out_string) do |row| row[0] = row[0].upcase row[1] *= 4 end out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" ==== Filtering \String to \IO Stream You can filter a \String to an \IO stream, with or without headers. ===== Recipe: Filter \String to \IO Stream parsing Headers Use class method CSV.filter with option +headers+ to filter a \String to an \IO stream: in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" path = 't.csv' File.open(path, 'w') do |out_io| CSV.filter(in_string, out_io, headers: true) do |row| row['Name'] = row['Name'].upcase row['Value'] *= 4 end end p File.read(path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n" ===== Recipe: Filter \String to \IO Stream parsing and writing Headers Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter a \String to an \IO stream including header row: in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" path = 't.csv' File.open(path, 'w') do |out_io| CSV.filter(in_string, out_io, headers: true, out_write_headers: true ) do |row| unless row.is_a?(Array) row['Name'] = row['Name'].upcase row['Value'] *= 4 end end end p File.read(path) # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n" ===== Recipe: Filter \String to \IO Stream Without Headers Use class method CSV.filter without option +headers+ to filter a \String to an \IO stream: in_string = "foo,0\nbar,1\nbaz,2\n" path = 't.csv' File.open(path, 'w') do |out_io| CSV.filter(in_string, out_io) do |row| row[0] = row[0].upcase row[1] *= 4 end end p File.read(path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n" ==== Filtering \IO Stream to \String You can filter an \IO stream to a \String, with or without headers. ===== Recipe: Filter \IO Stream to \String parsing Headers Use class method CSV.filter with option +headers+ to filter an \IO stream to a \String: in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" path = 't.csv' File.write(path, in_string) out_string = '' File.open(path) do |in_io| CSV.filter(in_io, out_string, headers: true) do |row| row['Name'] = row['Name'].upcase row['Value'] *= 4 end end out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" ===== Recipe: Filter \IO Stream to \String parsing and writing Headers Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter an \IO stream to a \String including header row: in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" path = 't.csv' File.write(path, in_string) out_string = '' File.open(path) do |in_io| CSV.filter(in_io, out_string, headers: true, out_write_headers: true) do |row| unless row.is_a?(Array) row['Name'] = row['Name'].upcase row['Value'] *= 4 end end end out_string # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n" ===== Recipe: Filter \IO Stream to \String Without Headers Use class method CSV.filter without option +headers+ to filter an \IO stream to a \String: in_string = "foo,0\nbar,1\nbaz,2\n" path = 't.csv' File.write(path, in_string) out_string = '' File.open(path) do |in_io| CSV.filter(in_io, out_string) do |row| row[0] = row[0].upcase row[1] *= 4 end end out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" ==== Filtering \IO Stream to \IO Stream You can filter an \IO stream to another \IO stream, with or without headers. ===== Recipe: Filter \IO Stream to \IO Stream parsing Headers Use class method CSV.filter with option +headers+ to filter an \IO stream to another \IO stream: in_path = 't.csv' in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" File.write(in_path, in_string) out_path = 'u.csv' File.open(in_path) do |in_io| File.open(out_path, 'w') do |out_io| CSV.filter(in_io, out_io, headers: true) do |row| row['Name'] = row['Name'].upcase row['Value'] *= 4 end end end p File.read(out_path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n" ===== Recipe: Filter \IO Stream to \IO Stream parsing and writing Headers Use class method CSV.filter with option +headers+ and +out_write_headers+ to filter an \IO stream to another \IO stream including header row: in_path = 't.csv' in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" File.write(in_path, in_string) out_path = 'u.csv' File.open(in_path) do |in_io| File.open(out_path, 'w') do |out_io| CSV.filter(in_io, out_io, headers: true, out_write_headers: true) do |row| unless row.is_a?(Array) row['Name'] = row['Name'].upcase row['Value'] *= 4 end end end end p File.read(out_path) # => "Name,Value\nFOO,0000\nBAR,1111\nBAZ,2222\n" ===== Recipe: Filter \IO Stream to \IO Stream Without Headers Use class method CSV.filter without option +headers+ to filter an \IO stream to another \IO stream: in_path = 't.csv' in_string = "foo,0\nbar,1\nbaz,2\n" File.write(in_path, in_string) out_path = 'u.csv' File.open(in_path) do |in_io| File.open(out_path, 'w') do |out_io| CSV.filter(in_io, out_io) do |row| row[0] = row[0].upcase row[1] *= 4 end end end p File.read(out_path) # => "FOO,0000\nBAR,1111\nBAZ,2222\n" csv-3.3.5/doc/csv/recipes/generating.rdoc000066400000000000000000000244631501670011600203110ustar00rootroot00000000000000== Recipes for Generating \CSV These recipes are specific code examples for specific \CSV generating tasks. For other recipes, see {Recipes for CSV}[./recipes_rdoc.html]. All code snippets on this page assume that the following has been executed: require 'csv' === Contents - {Output Formats}[#label-Output+Formats] - {Generating to a String}[#label-Generating+to+a+String] - {Recipe: Generate to String with Headers}[#label-Recipe-3A+Generate+to+String+with+Headers] - {Recipe: Generate to String Without Headers}[#label-Recipe-3A+Generate+to+String+Without+Headers] - {Generating to a File}[#label-Generating+to+a+File] - {Recipe: Generate to File with Headers}[#label-Recipe-3A+Generate+to+File+with+Headers] - {Recipe: Generate to File Without Headers}[#label-Recipe-3A+Generate+to+File+Without+Headers] - {Generating to IO an Stream}[#label-Generating+to+an+IO+Stream] - {Recipe: Generate to IO Stream with Headers}[#label-Recipe-3A+Generate+to+IO+Stream+with+Headers] - {Recipe: Generate to IO Stream Without Headers}[#label-Recipe-3A+Generate+to+IO+Stream+Without+Headers] - {Converting Fields}[#label-Converting+Fields] - {Recipe: Filter Generated Field Strings}[#label-Recipe-3A+Filter+Generated+Field+Strings] - {Recipe: Specify Multiple Write Converters}[#label-Recipe-3A+Specify+Multiple+Write+Converters] - {RFC 4180 Compliance}[#label-RFC+4180+Compliance] - {Row Separator}[#label-Row+Separator] - {Recipe: Generate Compliant Row Separator}[#label-Recipe-3A+Generate+Compliant+Row+Separator] - {Recipe: Generate Non-Compliant Row Separator}[#label-Recipe-3A+Generate+Non-Compliant+Row+Separator] - {Column Separator}[#label-Column+Separator] - {Recipe: Generate Compliant Column Separator}[#label-Recipe-3A+Generate+Compliant+Column+Separator] - {Recipe: Generate Non-Compliant Column Separator}[#label-Recipe-3A+Generate+Non-Compliant+Column+Separator] - {Quotes}[#label-Quotes] - {Recipe: Quote All Fields}[#label-Recipe-3A+Quote+All+Fields] - {Recipe: Quote Empty Fields}[#label-Recipe-3A+Quote+Empty+Fields] - {Recipe: Generate Compliant Quote Character}[#label-Recipe-3A+Generate+Compliant+Quote+Character] - {Recipe: Generate Non-Compliant Quote Character}[#label-Recipe-3A+Generate+Non-Compliant+Quote+Character] === Output Formats You can generate \CSV output to a \String, to a \File (via its path), or to an \IO stream. ==== Generating to a \String You can generate \CSV output to a \String, with or without headers. ===== Recipe: Generate to \String with Headers Use class method CSV.generate with option +headers+ to generate to a \String. This example uses method CSV#<< to append the rows that are to be generated: output_string = CSV.generate('', headers: ['Name', 'Value'], write_headers: true) do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "Name,Value\nFoo,0\nBar,1\nBaz,2\n" ===== Recipe: Generate to \String Without Headers Use class method CSV.generate without option +headers+ to generate to a \String. This example uses method CSV#<< to append the rows that are to be generated: output_string = CSV.generate do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "Foo,0\nBar,1\nBaz,2\n" ==== Generating to a \File You can generate /CSV data to a \File, with or without headers. ===== Recipe: Generate to \File with Headers Use class method CSV.open with option +headers+ generate to a \File. This example uses method CSV#<< to append the rows that are to be generated: path = 't.csv' CSV.open(path, 'w', headers: ['Name', 'Value'], write_headers: true) do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end p File.read(path) # => "Name,Value\nFoo,0\nBar,1\nBaz,2\n" ===== Recipe: Generate to \File Without Headers Use class method CSV.open without option +headers+ to generate to a \File. This example uses method CSV#<< to append the rows that are to be generated: path = 't.csv' CSV.open(path, 'w') do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end p File.read(path) # => "Foo,0\nBar,1\nBaz,2\n" ==== Generating to an \IO Stream You can generate \CSV data to an \IO stream, with or without headers. ==== Recipe: Generate to \IO Stream with Headers Use class method CSV.new with option +headers+ to generate \CSV data to an \IO stream: path = 't.csv' File.open(path, 'w') do |file| csv = CSV.new(file, headers: ['Name', 'Value'], write_headers: true) csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end p File.read(path) # => "Name,Value\nFoo,0\nBar,1\nBaz,2\n" ===== Recipe: Generate to \IO Stream Without Headers Use class method CSV.new without option +headers+ to generate \CSV data to an \IO stream: path = 't.csv' File.open(path, 'w') do |file| csv = CSV.new(file) csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end p File.read(path) # => "Foo,0\nBar,1\nBaz,2\n" === Converting Fields You can use _write_ _converters_ to convert fields when generating \CSV. ==== Recipe: Filter Generated Field Strings Use option :write_converters and a custom converter to convert field values when generating \CSV. This example defines and uses a custom write converter to strip whitespace from generated fields: strip_converter = proc {|field| field.respond_to?(:strip) ? field.strip : field } output_string = CSV.generate(write_converters: strip_converter) do |csv| csv << [' foo ', 0] csv << [' bar ', 1] csv << [' baz ', 2] end output_string # => "foo,0\nbar,1\nbaz,2\n" ==== Recipe: Specify Multiple Write Converters Use option :write_converters and multiple custom converters to convert field values when generating \CSV. This example defines and uses two custom write converters to strip and upcase generated fields: strip_converter = proc {|field| field.respond_to?(:strip) ? field.strip : field } upcase_converter = proc {|field| field.respond_to?(:upcase) ? field.upcase : field } converters = [strip_converter, upcase_converter] output_string = CSV.generate(write_converters: converters) do |csv| csv << [' foo ', 0] csv << [' bar ', 1] csv << [' baz ', 2] end output_string # => "FOO,0\nBAR,1\nBAZ,2\n" === RFC 4180 Compliance By default, \CSV generates data that is compliant with {RFC 4180}[https://www.rfc-editor.org/rfc/rfc4180] with respect to: - Column separator. - Quote character. ==== Row Separator RFC 4180 specifies the row separator CRLF (Ruby "\r\n"). ===== Recipe: Generate Compliant Row Separator For strict compliance, use option +:row_sep+ to specify row separator "\r\n": output_string = CSV.generate('', row_sep: "\r\n") do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "Foo,0\r\nBar,1\r\nBaz,2\r\n" ===== Recipe: Generate Non-Compliant Row Separator For data with non-compliant row separators, use option +:row_sep+ with a different value: This example source uses semicolon (";') as its row separator: output_string = CSV.generate('', row_sep: ";") do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "Foo,0;Bar,1;Baz,2;" ==== Column Separator RFC 4180 specifies column separator COMMA (Ruby ","). ===== Recipe: Generate Compliant Column Separator Because the \CSV default comma separator is ",", you need not specify option +:col_sep+ for compliant data: output_string = CSV.generate('') do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "Foo,0\nBar,1\nBaz,2\n" ===== Recipe: Generate Non-Compliant Column Separator For data with non-compliant column separators, use option +:col_sep+. This example source uses TAB ("\t") as its column separator: output_string = CSV.generate('', col_sep: "\t") do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "Foo\t0\nBar\t1\nBaz\t2\n" ==== Quotes IFC 4180 allows most fields to be quoted or not. By default, \CSV does not quote most fields. However, a field containing the current row separator, column separator, or quote character is automatically quoted, producing IFC 4180 compliance: # Field contains row separator. output_string = CSV.generate('') do |csv| row_sep = csv.row_sep csv << ["Foo#{row_sep}Foo", 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "\"Foo\nFoo\",0\nBar,1\nBaz,2\n" # Field contains column separator. output_string = CSV.generate('') do |csv| col_sep = csv.col_sep csv << ["Foo#{col_sep}Foo", 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "\"Foo,Foo\",0\nBar,1\nBaz,2\n" # Field contains quote character. output_string = CSV.generate('') do |csv| quote_char = csv.quote_char csv << ["Foo#{quote_char}Foo", 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "\"Foo\"\"Foo\",0\nBar,1\nBaz,2\n" ===== Recipe: Quote All Fields Use option +:force_quotes+ to force quoted fields: output_string = CSV.generate('', force_quotes: true) do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "\"Foo\",\"0\"\n\"Bar\",\"1\"\n\"Baz\",\"2\"\n" ===== Recipe: Quote Empty Fields Use option +:quote_empty+ to force quoting for empty fields: output_string = CSV.generate('', quote_empty: true) do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['', 2] end output_string # => "Foo,0\nBar,1\n\"\",2\n" ===== Recipe: Generate Compliant Quote Character RFC 4180 specifies quote character DQUOTE (Ruby "\""). Because the \CSV default quote character is also "\"", you need not specify option +:quote_char+ for compliant data: output_string = CSV.generate('', force_quotes: true) do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "\"Foo\",\"0\"\n\"Bar\",\"1\"\n\"Baz\",\"2\"\n" ===== Recipe: Generate Non-Compliant Quote Character For data with non-compliant quote characters, use option +:quote_char+. This example source uses SQUOTE ("'") as its quote character: output_string = CSV.generate('', quote_char: "'", force_quotes: true) do |csv| csv << ['Foo', 0] csv << ['Bar', 1] csv << ['Baz', 2] end output_string # => "'Foo','0'\n'Bar','1'\n'Baz','2'\n" csv-3.3.5/doc/csv/recipes/parsing.rdoc000066400000000000000000000535641501670011600176350ustar00rootroot00000000000000== Recipes for Parsing \CSV These recipes are specific code examples for specific \CSV parsing tasks. For other recipes, see {Recipes for CSV}[./recipes_rdoc.html]. All code snippets on this page assume that the following has been executed: require 'csv' === Contents - {Source Formats}[#label-Source+Formats] - {Parsing from a String}[#label-Parsing+from+a+String] - {Recipe: Parse from String with Headers}[#label-Recipe-3A+Parse+from+String+with+Headers] - {Recipe: Parse from String Without Headers}[#label-Recipe-3A+Parse+from+String+Without+Headers] - {Parsing from a File}[#label-Parsing+from+a+File] - {Recipe: Parse from File with Headers}[#label-Recipe-3A+Parse+from+File+with+Headers] - {Recipe: Parse from File Without Headers}[#label-Recipe-3A+Parse+from+File+Without+Headers] - {Parsing from an IO Stream}[#label-Parsing+from+an+IO+Stream] - {Recipe: Parse from IO Stream with Headers}[#label-Recipe-3A+Parse+from+IO+Stream+with+Headers] - {Recipe: Parse from IO Stream Without Headers}[#label-Recipe-3A+Parse+from+IO+Stream+Without+Headers] - {RFC 4180 Compliance}[#label-RFC+4180+Compliance] - {Row Separator}[#label-Row+Separator] - {Recipe: Handle Compliant Row Separator}[#label-Recipe-3A+Handle+Compliant+Row+Separator] - {Recipe: Handle Non-Compliant Row Separator}[#label-Recipe-3A+Handle+Non-Compliant+Row+Separator] - {Column Separator}[#label-Column+Separator] - {Recipe: Handle Compliant Column Separator}[#label-Recipe-3A+Handle+Compliant+Column+Separator] - {Recipe: Handle Non-Compliant Column Separator}[#label-Recipe-3A+Handle+Non-Compliant+Column+Separator] - {Quote Character}[#label-Quote+Character] - {Recipe: Handle Compliant Quote Character}[#label-Recipe-3A+Handle+Compliant+Quote+Character] - {Recipe: Handle Non-Compliant Quote Character}[#label-Recipe-3A+Handle+Non-Compliant+Quote+Character] - {Recipe: Allow Liberal Parsing}[#label-Recipe-3A+Allow+Liberal+Parsing] - {Special Handling}[#label-Special+Handling] - {Special Line Handling}[#label-Special+Line+Handling] - {Recipe: Ignore Blank Lines}[#label-Recipe-3A+Ignore+Blank+Lines] - {Recipe: Ignore Selected Lines}[#label-Recipe-3A+Ignore+Selected+Lines] - {Special Field Handling}[#label-Special+Field+Handling] - {Recipe: Strip Fields}[#label-Recipe-3A+Strip+Fields] - {Recipe: Handle Null Fields}[#label-Recipe-3A+Handle+Null+Fields] - {Recipe: Handle Empty Fields}[#label-Recipe-3A+Handle+Empty+Fields] - {Converting Fields}[#label-Converting+Fields] - {Converting Fields to Objects}[#label-Converting+Fields+to+Objects] - {Recipe: Convert Fields to Integers}[#label-Recipe-3A+Convert+Fields+to+Integers] - {Recipe: Convert Fields to Floats}[#label-Recipe-3A+Convert+Fields+to+Floats] - {Recipe: Convert Fields to Numerics}[#label-Recipe-3A+Convert+Fields+to+Numerics] - {Recipe: Convert Fields to Dates}[#label-Recipe-3A+Convert+Fields+to+Dates] - {Recipe: Convert Fields to DateTimes}[#label-Recipe-3A+Convert+Fields+to+DateTimes] - {Recipe: Convert Fields to Times}[#label-Recipe-3A+Convert+Fields+to+Times] - {Recipe: Convert Assorted Fields to Objects}[#label-Recipe-3A+Convert+Assorted+Fields+to+Objects] - {Recipe: Convert Fields to Other Objects}[#label-Recipe-3A+Convert+Fields+to+Other+Objects] - {Recipe: Filter Field Strings}[#label-Recipe-3A+Filter+Field+Strings] - {Recipe: Register Field Converters}[#label-Recipe-3A+Register+Field+Converters] - {Using Multiple Field Converters}[#label-Using+Multiple+Field+Converters] - {Recipe: Specify Multiple Field Converters in Option :converters}[#label-Recipe-3A+Specify+Multiple+Field+Converters+in+Option+-3Aconverters] - {Recipe: Specify Multiple Field Converters in a Custom Converter List}[#label-Recipe-3A+Specify+Multiple+Field+Converters+in+a+Custom+Converter+List] - {Converting Headers}[#label-Converting+Headers] - {Recipe: Convert Headers to Lowercase}[#label-Recipe-3A+Convert+Headers+to+Lowercase] - {Recipe: Convert Headers to Symbols}[#label-Recipe-3A+Convert+Headers+to+Symbols] - {Recipe: Filter Header Strings}[#label-Recipe-3A+Filter+Header+Strings] - {Recipe: Register Header Converters}[#label-Recipe-3A+Register+Header+Converters] - {Using Multiple Header Converters}[#label-Using+Multiple+Header+Converters] - {Recipe: Specify Multiple Header Converters in Option :header_converters}[#label-Recipe-3A+Specify+Multiple+Header+Converters+in+Option+-3Aheader_converters] - {Recipe: Specify Multiple Header Converters in a Custom Header Converter List}[#label-Recipe-3A+Specify+Multiple+Header+Converters+in+a+Custom+Header+Converter+List] - {Diagnostics}[#label-Diagnostics] - {Recipe: Capture Unconverted Fields}[#label-Recipe-3A+Capture+Unconverted+Fields] - {Recipe: Capture Field Info}[#label-Recipe-3A+Capture+Field+Info] === Source Formats You can parse \CSV data from a \String, from a \File (via its path), or from an \IO stream. ==== Parsing from a \String You can parse \CSV data from a \String, with or without headers. ===== Recipe: Parse from \String with Headers Use class method CSV.parse with option +headers+ to read a source \String all at once (may have memory resource implications): string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" CSV.parse(string, headers: true) # => # Use instance method CSV#each with option +headers+ to read a source \String one row at a time: CSV.new(string, headers: true).each do |row| p row end Output: # # # ===== Recipe: Parse from \String Without Headers Use class method CSV.parse without option +headers+ to read a source \String all at once (may have memory resource implications): string = "foo,0\nbar,1\nbaz,2\n" CSV.parse(string) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Use instance method CSV#each without option +headers+ to read a source \String one row at a time: CSV.new(string).each do |row| p row end Output: ["foo", "0"] ["bar", "1"] ["baz", "2"] ==== Parsing from a \File You can parse \CSV data from a \File, with or without headers. ===== Recipe: Parse from \File with Headers Use class method CSV.read with option +headers+ to read a file all at once: string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" path = 't.csv' File.write(path, string) CSV.read(path, headers: true) # => # Use class method CSV.foreach with option +headers+ to read one row at a time: CSV.foreach(path, headers: true) do |row| p row end Output: # # # ===== Recipe: Parse from \File Without Headers Use class method CSV.read without option +headers+ to read a file all at once: string = "foo,0\nbar,1\nbaz,2\n" path = 't.csv' File.write(path, string) CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Use class method CSV.foreach without option +headers+ to read one row at a time: CSV.foreach(path) do |row| p row end Output: ["foo", "0"] ["bar", "1"] ["baz", "2"] ==== Parsing from an \IO Stream You can parse \CSV data from an \IO stream, with or without headers. ===== Recipe: Parse from \IO Stream with Headers Use class method CSV.parse with option +headers+ to read an \IO stream all at once: string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" path = 't.csv' File.write(path, string) File.open(path) do |file| CSV.parse(file, headers: true) end # => # Use class method CSV.foreach with option +headers+ to read one row at a time: File.open(path) do |file| CSV.foreach(file, headers: true) do |row| p row end end Output: # # # ===== Recipe: Parse from \IO Stream Without Headers Use class method CSV.parse without option +headers+ to read an \IO stream all at once: string = "foo,0\nbar,1\nbaz,2\n" path = 't.csv' File.write(path, string) File.open(path) do |file| CSV.parse(file) end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Use class method CSV.foreach without option +headers+ to read one row at a time: File.open(path) do |file| CSV.foreach(file) do |row| p row end end Output: ["foo", "0"] ["bar", "1"] ["baz", "2"] === RFC 4180 Compliance By default, \CSV parses data that is compliant with {RFC 4180}[https://www.rfc-editor.org/rfc/rfc4180] with respect to: - Row separator. - Column separator. - Quote character. ==== Row Separator RFC 4180 specifies the row separator CRLF (Ruby "\r\n"). Although the \CSV default row separator is "\n", the parser also by default handles row separator "\r" and the RFC-compliant "\r\n". ===== Recipe: Handle Compliant Row Separator For strict compliance, use option +:row_sep+ to specify row separator "\r\n", which allows the compliant row separator: source = "foo,1\r\nbar,1\r\nbaz,2\r\n" CSV.parse(source, row_sep: "\r\n") # => [["foo", "1"], ["bar", "1"], ["baz", "2"]] But rejects other row separators: source = "foo,1\nbar,1\nbaz,2\n" CSV.parse(source, row_sep: "\r\n") # Raised MalformedCSVError source = "foo,1\rbar,1\rbaz,2\r" CSV.parse(source, row_sep: "\r\n") # Raised MalformedCSVError source = "foo,1\n\rbar,1\n\rbaz,2\n\r" CSV.parse(source, row_sep: "\r\n") # Raised MalformedCSVError ===== Recipe: Handle Non-Compliant Row Separator For data with non-compliant row separators, use option +:row_sep+. This example source uses semicolon (";") as its row separator: source = "foo,1;bar,1;baz,2;" CSV.parse(source, row_sep: ';') # => [["foo", "1"], ["bar", "1"], ["baz", "2"]] ==== Column Separator RFC 4180 specifies column separator COMMA (Ruby ","). ===== Recipe: Handle Compliant Column Separator Because the \CSV default comma separator is ',', you need not specify option +:col_sep+ for compliant data: source = "foo,1\nbar,1\nbaz,2\n" CSV.parse(source) # => [["foo", "1"], ["bar", "1"], ["baz", "2"]] ===== Recipe: Handle Non-Compliant Column Separator For data with non-compliant column separators, use option +:col_sep+. This example source uses TAB ("\t") as its column separator: source = "foo,1\tbar,1\tbaz,2" CSV.parse(source, col_sep: "\t") # => [["foo", "1"], ["bar", "1"], ["baz", "2"]] ==== Quote Character RFC 4180 specifies quote character DQUOTE (Ruby "\""). ===== Recipe: Handle Compliant Quote Character Because the \CSV default quote character is "\"", you need not specify option +:quote_char+ for compliant data: source = "\"foo\",\"1\"\n\"bar\",\"1\"\n\"baz\",\"2\"\n" CSV.parse(source) # => [["foo", "1"], ["bar", "1"], ["baz", "2"]] ===== Recipe: Handle Non-Compliant Quote Character For data with non-compliant quote characters, use option +:quote_char+. This example source uses SQUOTE ("'") as its quote character: source = "'foo','1'\n'bar','1'\n'baz','2'\n" CSV.parse(source, quote_char: "'") # => [["foo", "1"], ["bar", "1"], ["baz", "2"]] ==== Recipe: Allow Liberal Parsing Use option +:liberal_parsing+ to specify that \CSV should attempt to parse input not conformant with RFC 4180, such as double quotes in unquoted fields: source = 'is,this "three, or four",fields' CSV.parse(source) # Raises MalformedCSVError CSV.parse(source, liberal_parsing: true) # => [["is", "this \"three", " or four\"", "fields"]] === Special Handling You can use parsing options to specify special handling for certain lines and fields. ==== Special Line Handling Use parsing options to specify special handling for blank lines, or for other selected lines. ===== Recipe: Ignore Blank Lines Use option +:skip_blanks+ to ignore blank lines: source = <<-EOT foo,0 bar,1 baz,2 , EOT parsed = CSV.parse(source, skip_blanks: true) parsed # => [["foo", "0"], ["bar", "1"], ["baz", "2"], [nil, nil]] ===== Recipe: Ignore Selected Lines Use option +:skip_lines+ to ignore selected lines. source = <<-EOT # Comment foo,0 bar,1 baz,2 # Another comment EOT parsed = CSV.parse(source, skip_lines: /^#/) parsed # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] ==== Special Field Handling Use parsing options to specify special handling for certain field values. ===== Recipe: Strip Fields Use option +:strip+ to strip parsed field values: CSV.parse_line(' a , b ', strip: true) # => ["a", "b"] ===== Recipe: Handle Null Fields Use option +:nil_value+ to specify a value that will replace each field that is null (no text): CSV.parse_line('a,,b,,c', nil_value: 0) # => ["a", 0, "b", 0, "c"] ===== Recipe: Handle Empty Fields Use option +:empty_value+ to specify a value that will replace each field that is empty (\String of length 0); CSV.parse_line('a,"",b,"",c', empty_value: 'x') # => ["a", "x", "b", "x", "c"] === Converting Fields You can use field converters to change parsed \String fields into other objects, or to otherwise modify the \String fields. ==== Converting Fields to Objects Use field converters to change parsed \String objects into other, more specific, objects. There are built-in field converters for converting to objects of certain classes: - \Float - \Integer - \Date - \DateTime - \Time Other built-in field converters include: - +:numeric+: converts to \Integer and \Float. - +:all+: converts to \DateTime, \Integer, \Float. You can also define field converters to convert to objects of other classes. ===== Recipe: Convert Fields to Integers Convert fields to \Integer objects using built-in converter +:integer+: source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" parsed = CSV.parse(source, headers: true, converters: :integer) parsed.map {|row| row['Value'].class} # => [Integer, Integer, Integer] ===== Recipe: Convert Fields to Floats Convert fields to \Float objects using built-in converter +:float+: source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" parsed = CSV.parse(source, headers: true, converters: :float) parsed.map {|row| row['Value'].class} # => [Float, Float, Float] ===== Recipe: Convert Fields to Numerics Convert fields to \Integer and \Float objects using built-in converter +:numeric+: source = "Name,Value\nfoo,0\nbar,1.1\nbaz,2.2\n" parsed = CSV.parse(source, headers: true, converters: :numeric) parsed.map {|row| row['Value'].class} # => [Integer, Float, Float] ===== Recipe: Convert Fields to Dates Convert fields to \Date objects using built-in converter +:date+: source = "Name,Date\nfoo,2001-02-03\nbar,2001-02-04\nbaz,2001-02-03\n" parsed = CSV.parse(source, headers: true, converters: :date) parsed.map {|row| row['Date'].class} # => [Date, Date, Date] ===== Recipe: Convert Fields to DateTimes Convert fields to \DateTime objects using built-in converter +:date_time+: source = "Name,DateTime\nfoo,2001-02-03\nbar,2001-02-04\nbaz,2020-05-07T14:59:00-05:00\n" parsed = CSV.parse(source, headers: true, converters: :date_time) parsed.map {|row| row['DateTime'].class} # => [DateTime, DateTime, DateTime] ===== Recipe: Convert Fields to Times Convert fields to \Time objects using built-in converter +:time+: source = "Name,Time\nfoo,2001-02-03\nbar,2001-02-04\nbaz,2020-05-07T14:59:00-05:00\n" parsed = CSV.parse(source, headers: true, converters: :time) parsed.map {|row| row['Time'].class} # => [Time, Time, Time] ===== Recipe: Convert Assorted Fields to Objects Convert assorted fields to objects using built-in converter +:all+: source = "Type,Value\nInteger,0\nFloat,1.0\nDateTime,2001-02-04\n" parsed = CSV.parse(source, headers: true, converters: :all) parsed.map {|row| row['Value'].class} # => [Integer, Float, DateTime] ===== Recipe: Convert Fields to Other Objects Define a custom field converter to convert \String fields into other objects. This example defines and uses a custom field converter that converts each column-1 value to a \Rational object: rational_converter = proc do |field, field_context| field_context.index == 1 ? field.to_r : field end source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" parsed = CSV.parse(source, headers: true, converters: rational_converter) parsed.map {|row| row['Value'].class} # => [Rational, Rational, Rational] ==== Recipe: Filter Field Strings Define a custom field converter to modify \String fields. This example defines and uses a custom field converter that strips whitespace from each field value: strip_converter = proc {|field| field.strip } source = "Name,Value\n foo , 0 \n bar , 1 \n baz , 2 \n" parsed = CSV.parse(source, headers: true, converters: strip_converter) parsed['Name'] # => ["foo", "bar", "baz"] parsed['Value'] # => ["0", "1", "2"] ==== Recipe: Register Field Converters Register a custom field converter, assigning it a name; then refer to the converter by its name: rational_converter = proc do |field, field_context| field_context.index == 1 ? field.to_r : field end CSV::Converters[:rational] = rational_converter source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" parsed = CSV.parse(source, headers: true, converters: :rational) parsed['Value'] # => [(0/1), (1/1), (2/1)] ==== Using Multiple Field Converters You can use multiple field converters in either of these ways: - Specify converters in option +:converters+. - Specify converters in a custom converter list. ===== Recipe: Specify Multiple Field Converters in Option +:converters+ Apply multiple field converters by specifying them in option +:converters+: source = "Name,Value\nfoo,0\nbar,1.0\nbaz,2.0\n" parsed = CSV.parse(source, headers: true, converters: [:integer, :float]) parsed['Value'] # => [0, 1.0, 2.0] ===== Recipe: Specify Multiple Field Converters in a Custom Converter List Apply multiple field converters by defining and registering a custom converter list: strip_converter = proc {|field| field.strip } CSV::Converters[:strip] = strip_converter CSV::Converters[:my_converters] = [:integer, :float, :strip] source = "Name,Value\n foo , 0 \n bar , 1.0 \n baz , 2.0 \n" parsed = CSV.parse(source, headers: true, converters: :my_converters) parsed['Name'] # => ["foo", "bar", "baz"] parsed['Value'] # => [0, 1.0, 2.0] === Converting Headers You can use header converters to modify parsed \String headers. Built-in header converters include: - +:symbol+: converts \String header to \Symbol. - +:downcase+: converts \String header to lowercase. You can also define header converters to otherwise modify header \Strings. ==== Recipe: Convert Headers to Lowercase Convert headers to lowercase using built-in converter +:downcase+: source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" parsed = CSV.parse(source, headers: true, header_converters: :downcase) parsed.headers # => ["name", "value"] ==== Recipe: Convert Headers to Symbols Convert headers to downcased Symbols using built-in converter +:symbol+: source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" parsed = CSV.parse(source, headers: true, header_converters: :symbol) parsed.headers # => [:name, :value] parsed.headers.map {|header| header.class} # => [Symbol, Symbol] ==== Recipe: Filter Header Strings Define a custom header converter to modify \String fields. This example defines and uses a custom header converter that capitalizes each header \String: capitalize_converter = proc {|header| header.capitalize } source = "NAME,VALUE\nfoo,0\nbar,1\nbaz,2\n" parsed = CSV.parse(source, headers: true, header_converters: capitalize_converter) parsed.headers # => ["Name", "Value"] ==== Recipe: Register Header Converters Register a custom header converter, assigning it a name; then refer to the converter by its name: capitalize_converter = proc {|header| header.capitalize } CSV::HeaderConverters[:capitalize] = capitalize_converter source = "NAME,VALUE\nfoo,0\nbar,1\nbaz,2\n" parsed = CSV.parse(source, headers: true, header_converters: :capitalize) parsed.headers # => ["Name", "Value"] ==== Using Multiple Header Converters You can use multiple header converters in either of these ways: - Specify header converters in option +:header_converters+. - Specify header converters in a custom header converter list. ===== Recipe: Specify Multiple Header Converters in Option :header_converters Apply multiple header converters by specifying them in option +:header_converters+: source = "Name,Value\nfoo,0\nbar,1.0\nbaz,2.0\n" parsed = CSV.parse(source, headers: true, header_converters: [:downcase, :symbol]) parsed.headers # => [:name, :value] ===== Recipe: Specify Multiple Header Converters in a Custom Header Converter List Apply multiple header converters by defining and registering a custom header converter list: CSV::HeaderConverters[:my_header_converters] = [:symbol, :downcase] source = "NAME,VALUE\nfoo,0\nbar,1.0\nbaz,2.0\n" parsed = CSV.parse(source, headers: true, header_converters: :my_header_converters) parsed.headers # => [:name, :value] === Diagnostics ==== Recipe: Capture Unconverted Fields To capture unconverted field values, use option +:unconverted_fields+: source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" parsed = CSV.parse(source, converters: :integer, unconverted_fields: true) parsed # => [["Name", "Value"], ["foo", 0], ["bar", 1], ["baz", 2]] parsed.each {|row| p row.unconverted_fields } Output: ["Name", "Value"] ["foo", "0"] ["bar", "1"] ["baz", "2"] ==== Recipe: Capture Field Info To capture field info in a custom converter, accept two block arguments. The first is the field value; the second is a +CSV::FieldInfo+ object: strip_converter = proc {|field, field_info| p field_info; field.strip } source = " foo , 0 \n bar , 1 \n baz , 2 \n" parsed = CSV.parse(source, converters: strip_converter) parsed # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] Output: # # # # # # csv-3.3.5/doc/csv/recipes/recipes.rdoc000066400000000000000000000003621501670011600176100ustar00rootroot00000000000000== Recipes for \CSV The recipes are specific code examples for specific tasks. See: - {Recipes for Parsing CSV}[./parsing_rdoc.html] - {Recipes for Generating CSV}[./generating_rdoc.html] - {Recipes for Filtering CSV}[./filtering_rdoc.html] csv-3.3.5/lib/000077500000000000000000000000001501670011600130605ustar00rootroot00000000000000csv-3.3.5/lib/csv.rb000066400000000000000000003035151501670011600142070ustar00rootroot00000000000000# encoding: US-ASCII # frozen_string_literal: true # = csv.rb -- CSV Reading and Writing # # Created by James Edward Gray II on 2005-10-31. # # See CSV for documentation. # # == Description # # Welcome to the new and improved CSV. # # This version of the CSV library began its life as FasterCSV. FasterCSV was # intended as a replacement to Ruby's then standard CSV library. It was # designed to address concerns users of that library had and it had three # primary goals: # # 1. Be significantly faster than CSV while remaining a pure Ruby library. # 2. Use a smaller and easier to maintain code base. (FasterCSV eventually # grew larger, was also but considerably richer in features. The parsing # core remains quite small.) # 3. Improve on the CSV interface. # # Obviously, the last one is subjective. I did try to defer to the original # interface whenever I didn't have a compelling reason to change it though, so # hopefully this won't be too radically different. # # We must have met our goals because FasterCSV was renamed to CSV and replaced # the original library as of Ruby 1.9. If you are migrating code from 1.8 or # earlier, you may have to change your code to comply with the new interface. # # == What's the Different From the Old CSV? # # I'm sure I'll miss something, but I'll try to mention most of the major # differences I am aware of, to help others quickly get up to speed: # # === \CSV Parsing # # * This parser is m17n aware. See CSV for full details. # * This library has a stricter parser and will throw MalformedCSVErrors on # problematic data. # * This library has a less liberal idea of a line ending than CSV. What you # set as the :row_sep is law. It can auto-detect your line endings # though. # * The old library returned empty lines as [nil]. This library calls # them []. # * This library has a much faster parser. # # === Interface # # * CSV now uses keyword parameters to set options. # * CSV no longer has generate_row() or parse_row(). # * The old CSV's Reader and Writer classes have been dropped. # * CSV::open() is now more like Ruby's open(). # * CSV objects now support most standard IO methods. # * CSV now has a new() method used to wrap objects like String and IO for # reading and writing. # * CSV::generate() is different from the old method. # * CSV no longer supports partial reads. It works line-by-line. # * CSV no longer allows the instance methods to override the separators for # performance reasons. They must be set in the constructor. # # If you use this library and find yourself missing any functionality I have # trimmed, please {let me know}[mailto:james@grayproductions.net]. # # == Documentation # # See CSV for documentation. # # == What is CSV, really? # # CSV maintains a pretty strict definition of CSV taken directly from # {the RFC}[https://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one # place and that is to make using this library easier. CSV will parse all valid # CSV. # # What you don't want to do is to feed CSV invalid data. Because of the way the # CSV format works, it's common for a parser to need to read until the end of # the file to be sure a field is invalid. This consumes a lot of time and memory. # # Luckily, when working with invalid CSV, Ruby's built-in methods will almost # always be superior in every way. For example, parsing non-quoted fields is as # easy as: # # data.split(",") # # == Questions and/or Comments # # Feel free to email {James Edward Gray II}[mailto:james@grayproductions.net] # with any questions. require "forwardable" require "date" require "time" require "stringio" require_relative "csv/fields_converter" require_relative "csv/input_record_separator" require_relative "csv/parser" require_relative "csv/row" require_relative "csv/table" require_relative "csv/writer" # == \CSV # # === \CSV Data # # \CSV (comma-separated values) data is a text representation of a table: # - A _row_ _separator_ delimits table rows. # A common row separator is the newline character "\n". # - A _column_ _separator_ delimits fields in a row. # A common column separator is the comma character ",". # # This \CSV \String, with row separator "\n" # and column separator ",", # has three rows and two columns: # "foo,0\nbar,1\nbaz,2\n" # # Despite the name \CSV, a \CSV representation can use different separators. # # For more about tables, see the Wikipedia article # "{Table (information)}[https://en.wikipedia.org/wiki/Table_(information)]", # especially its section # "{Simple table}[https://en.wikipedia.org/wiki/Table_(information)#Simple_table]" # # == \Class \CSV # # Class \CSV provides methods for: # - Parsing \CSV data from a \String object, a \File (via its file path), or an \IO object. # - Generating \CSV data to a \String object. # # To make \CSV available: # require 'csv' # # All examples here assume that this has been done. # # == Keeping It Simple # # A \CSV object has dozens of instance methods that offer fine-grained control # of parsing and generating \CSV data. # For many needs, though, simpler approaches will do. # # This section summarizes the singleton methods in \CSV # that allow you to parse and generate without explicitly # creating \CSV objects. # For details, follow the links. # # === Simple Parsing # # Parsing methods commonly return either of: # - An \Array of Arrays of Strings: # - The outer \Array is the entire "table". # - Each inner \Array is a row. # - Each \String is a field. # - A CSV::Table object. For details, see # {\CSV with Headers}[#class-CSV-label-CSV+with+Headers]. # # ==== Parsing a \String # # The input to be parsed can be a string: # string = "foo,0\nbar,1\nbaz,2\n" # # \Method CSV.parse returns the entire \CSV data: # CSV.parse(string) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # \Method CSV.parse_line returns only the first row: # CSV.parse_line(string) # => ["foo", "0"] # # \CSV extends class \String with instance method String#parse_csv, # which also returns only the first row: # string.parse_csv # => ["foo", "0"] # # ==== Parsing Via a \File Path # # The input to be parsed can be in a file: # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # # \Method CSV.read returns the entire \CSV data: # CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # \Method CSV.foreach iterates, passing each row to the given block: # CSV.foreach(path) do |row| # p row # end # Output: # ["foo", "0"] # ["bar", "1"] # ["baz", "2"] # # \Method CSV.table returns the entire \CSV data as a CSV::Table object: # CSV.table(path) # => # # # ==== Parsing from an Open \IO Stream # # The input to be parsed can be in an open \IO stream: # # \Method CSV.read returns the entire \CSV data: # File.open(path) do |file| # CSV.read(file) # end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # As does method CSV.parse: # File.open(path) do |file| # CSV.parse(file) # end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # \Method CSV.parse_line returns only the first row: # File.open(path) do |file| # CSV.parse_line(file) # end # => ["foo", "0"] # # \Method CSV.foreach iterates, passing each row to the given block: # File.open(path) do |file| # CSV.foreach(file) do |row| # p row # end # end # Output: # ["foo", "0"] # ["bar", "1"] # ["baz", "2"] # # \Method CSV.table returns the entire \CSV data as a CSV::Table object: # File.open(path) do |file| # CSV.table(file) # end # => # # # === Simple Generating # # \Method CSV.generate returns a \String; # this example uses method CSV#<< to append the rows # that are to be generated: # output_string = CSV.generate do |csv| # csv << ['foo', 0] # csv << ['bar', 1] # csv << ['baz', 2] # end # output_string # => "foo,0\nbar,1\nbaz,2\n" # # \Method CSV.generate_line returns a \String containing the single row # constructed from an \Array: # CSV.generate_line(['foo', '0']) # => "foo,0\n" # # \CSV extends class \Array with instance method Array#to_csv, # which forms an \Array into a \String: # ['foo', '0'].to_csv # => "foo,0\n" # # === "Filtering" \CSV # # \Method CSV.filter provides a Unix-style filter for \CSV data. # The input data is processed to form the output data: # in_string = "foo,0\nbar,1\nbaz,2\n" # out_string = '' # CSV.filter(in_string, out_string) do |row| # row[0] = row[0].upcase # row[1] *= 4 # end # out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" # # == \CSV Objects # # There are three ways to create a \CSV object: # - \Method CSV.new returns a new \CSV object. # - \Method CSV.instance returns a new or cached \CSV object. # - \Method \CSV() also returns a new or cached \CSV object. # # === Instance Methods # # \CSV has three groups of instance methods: # - Its own internally defined instance methods. # - Methods included by module Enumerable. # - Methods delegated to class IO. See below. # # ==== Delegated Methods # # For convenience, a CSV object will delegate to many methods in class IO. # (A few have wrapper "guard code" in \CSV.) You may call: # * IO#binmode # * #binmode? # * IO#close # * IO#close_read # * IO#close_write # * IO#closed? # * #eof # * #eof? # * IO#external_encoding # * IO#fcntl # * IO#fileno # * #flock # * IO#flush # * IO#fsync # * IO#internal_encoding # * #ioctl # * IO#isatty # * #path # * IO#pid # * IO#pos # * IO#pos= # * IO#reopen # * #rewind # * IO#seek # * #stat # * IO#string # * IO#sync # * IO#sync= # * IO#tell # * #to_i # * #to_io # * IO#truncate # * IO#tty? # # === Options # # The default values for options are: # DEFAULT_OPTIONS = { # # For both parsing and generating. # col_sep: ",", # row_sep: :auto, # quote_char: '"', # # For parsing. # field_size_limit: nil, # converters: nil, # unconverted_fields: nil, # headers: false, # return_headers: false, # header_converters: nil, # skip_blanks: false, # skip_lines: nil, # liberal_parsing: false, # nil_value: nil, # empty_value: "", # strip: false, # # For generating. # write_headers: nil, # quote_empty: true, # force_quotes: false, # write_converters: nil, # write_nil_value: nil, # write_empty_value: "", # } # # ==== Options for Parsing # # Options for parsing, described in detail below, include: # - +row_sep+: Specifies the row separator; used to delimit rows. # - +col_sep+: Specifies the column separator; used to delimit fields. # - +quote_char+: Specifies the quote character; used to quote fields. # - +field_size_limit+: Specifies the maximum field size + 1 allowed. # Deprecated since 3.2.3. Use +max_field_size+ instead. # - +max_field_size+: Specifies the maximum field size allowed. # - +converters+: Specifies the field converters to be used. # - +unconverted_fields+: Specifies whether unconverted fields are to be available. # - +headers+: Specifies whether data contains headers, # or specifies the headers themselves. # - +return_headers+: Specifies whether headers are to be returned. # - +header_converters+: Specifies the header converters to be used. # - +skip_blanks+: Specifies whether blanks lines are to be ignored. # - +skip_lines+: Specifies how comments lines are to be recognized. # - +strip+: Specifies whether leading and trailing whitespace are to be # stripped from fields. This must be compatible with +col_sep+; if it is not, # then an +ArgumentError+ exception will be raised. # - +liberal_parsing+: Specifies whether \CSV should attempt to parse # non-compliant data. # - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field. # - +empty_value+: Specifies the object that is to be substituted for each empty field. # # :include: ../doc/csv/options/common/row_sep.rdoc # # :include: ../doc/csv/options/common/col_sep.rdoc # # :include: ../doc/csv/options/common/quote_char.rdoc # # :include: ../doc/csv/options/parsing/field_size_limit.rdoc # # :include: ../doc/csv/options/parsing/converters.rdoc # # :include: ../doc/csv/options/parsing/unconverted_fields.rdoc # # :include: ../doc/csv/options/parsing/headers.rdoc # # :include: ../doc/csv/options/parsing/return_headers.rdoc # # :include: ../doc/csv/options/parsing/header_converters.rdoc # # :include: ../doc/csv/options/parsing/skip_blanks.rdoc # # :include: ../doc/csv/options/parsing/skip_lines.rdoc # # :include: ../doc/csv/options/parsing/strip.rdoc # # :include: ../doc/csv/options/parsing/liberal_parsing.rdoc # # :include: ../doc/csv/options/parsing/nil_value.rdoc # # :include: ../doc/csv/options/parsing/empty_value.rdoc # # ==== Options for Generating # # Options for generating, described in detail below, include: # - +row_sep+: Specifies the row separator; used to delimit rows. # - +col_sep+: Specifies the column separator; used to delimit fields. # - +quote_char+: Specifies the quote character; used to quote fields. # - +write_headers+: Specifies whether headers are to be written. # - +force_quotes+: Specifies whether each output field is to be quoted. # - +quote_empty+: Specifies whether each empty output field is to be quoted. # - +write_converters+: Specifies the field converters to be used in writing. # - +write_nil_value+: Specifies the object that is to be substituted for each +nil+-valued field. # - +write_empty_value+: Specifies the object that is to be substituted for each empty field. # # :include: ../doc/csv/options/common/row_sep.rdoc # # :include: ../doc/csv/options/common/col_sep.rdoc # # :include: ../doc/csv/options/common/quote_char.rdoc # # :include: ../doc/csv/options/generating/write_headers.rdoc # # :include: ../doc/csv/options/generating/force_quotes.rdoc # # :include: ../doc/csv/options/generating/quote_empty.rdoc # # :include: ../doc/csv/options/generating/write_converters.rdoc # # :include: ../doc/csv/options/generating/write_nil_value.rdoc # # :include: ../doc/csv/options/generating/write_empty_value.rdoc # # === \CSV with Headers # # CSV allows to specify column names of CSV file, whether they are in data, or # provided separately. If headers are specified, reading methods return an instance # of CSV::Table, consisting of CSV::Row. # # # Headers are part of data # data = CSV.parse(<<~ROWS, headers: true) # Name,Department,Salary # Bob,Engineering,1000 # Jane,Sales,2000 # John,Management,5000 # ROWS # # data.class #=> CSV::Table # data.first #=> # # data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engineering", "Salary"=>"1000"} # # # Headers provided by developer # data = CSV.parse('Bob,Engineering,1000', headers: %i[name department salary]) # data.first #=> # # # === \Converters # # By default, each value (field or header) parsed by \CSV is formed into a \String. # You can use a _field_ _converter_ or _header_ _converter_ # to intercept and modify the parsed values: # - See {Field Converters}[#class-CSV-label-Field+Converters]. # - See {Header Converters}[#class-CSV-label-Header+Converters]. # # Also by default, each value to be written during generation is written 'as-is'. # You can use a _write_ _converter_ to modify values before writing. # - See {Write Converters}[#class-CSV-label-Write+Converters]. # # ==== Specifying \Converters # # You can specify converters for parsing or generating in the +options+ # argument to various \CSV methods: # - Option +converters+ for converting parsed field values. # - Option +header_converters+ for converting parsed header values. # - Option +write_converters+ for converting values to be written (generated). # # There are three forms for specifying converters: # - A converter proc: executable code to be used for conversion. # - A converter name: the name of a stored converter. # - A converter list: an array of converter procs, converter names, and converter lists. # # ===== Converter Procs # # This converter proc, +strip_converter+, accepts a value +field+ # and returns field.strip: # strip_converter = proc {|field| field.strip } # In this call to CSV.parse, # the keyword argument converters: string_converter # specifies that: # - \Proc +string_converter+ is to be called for each parsed field. # - The converter's return value is to replace the +field+ value. # Example: # string = " foo , 0 \n bar , 1 \n baz , 2 \n" # array = CSV.parse(string, converters: strip_converter) # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # A converter proc can receive a second argument, +field_info+, # that contains details about the field. # This modified +strip_converter+ displays its arguments: # strip_converter = proc do |field, field_info| # p [field, field_info] # field.strip # end # string = " foo , 0 \n bar , 1 \n baz , 2 \n" # array = CSV.parse(string, converters: strip_converter) # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # Output: # [" foo ", #] # [" 0 ", #] # [" bar ", #] # [" 1 ", #] # [" baz ", #] # [" 2 ", #] # Each CSV::FieldInfo object shows: # - The 0-based field index. # - The 1-based line index. # - The field header, if any. # # ===== Stored \Converters # # A converter may be given a name and stored in a structure where # the parsing methods can find it by name. # # The storage structure for field converters is the \Hash CSV::Converters. # It has several built-in converter procs: # - :integer: converts each \String-embedded integer into a true \Integer. # - :float: converts each \String-embedded float into a true \Float. # - :date: converts each \String-embedded date into a true \Date. # - :date_time: converts each \String-embedded date-time into a true \DateTime # - :time: converts each \String-embedded time into a true \Time # . # This example creates a converter proc, then stores it: # strip_converter = proc {|field| field.strip } # CSV::Converters[:strip] = strip_converter # Then the parsing method call can refer to the converter # by its name, :strip: # string = " foo , 0 \n bar , 1 \n baz , 2 \n" # array = CSV.parse(string, converters: :strip) # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # The storage structure for header converters is the \Hash CSV::HeaderConverters, # which works in the same way. # It also has built-in converter procs: # - :downcase: Downcases each header. # - :symbol: Converts each header to a \Symbol. # # There is no such storage structure for write headers. # # In order for the parsing methods to access stored converters in non-main-Ractors, the # storage structure must be made shareable first. # Therefore, Ractor.make_shareable(CSV::Converters) and # Ractor.make_shareable(CSV::HeaderConverters) must be called before the creation # of Ractors that use the converters stored in these structures. (Since making the storage # structures shareable involves freezing them, any custom converters that are to be used # must be added first.) # # ===== Converter Lists # # A _converter_ _list_ is an \Array that may include any assortment of: # - Converter procs. # - Names of stored converters. # - Nested converter lists. # # Examples: # numeric_converters = [:integer, :float] # date_converters = [:date, :date_time] # [numeric_converters, strip_converter] # [strip_converter, date_converters, :float] # # Like a converter proc, a converter list may be named and stored in either # \CSV::Converters or CSV::HeaderConverters: # CSV::Converters[:custom] = [strip_converter, date_converters, :float] # CSV::HeaderConverters[:custom] = [:downcase, :symbol] # # There are two built-in converter lists: # CSV::Converters[:numeric] # => [:integer, :float] # CSV::Converters[:all] # => [:date_time, :numeric] # # ==== Field \Converters # # With no conversion, all parsed fields in all rows become Strings: # string = "foo,0\nbar,1\nbaz,2\n" # ary = CSV.parse(string) # ary # => # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # When you specify a field converter, each parsed field is passed to the converter; # its return value becomes the stored value for the field. # A converter might, for example, convert an integer embedded in a \String # into a true \Integer. # (In fact, that's what built-in field converter +:integer+ does.) # # There are three ways to use field \converters. # # - Using option {converters}[#class-CSV-label-Option+converters] with a parsing method: # ary = CSV.parse(string, converters: :integer) # ary # => [0, 1, 2] # => [["foo", 0], ["bar", 1], ["baz", 2]] # - Using option {converters}[#class-CSV-label-Option+converters] with a new \CSV instance: # csv = CSV.new(string, converters: :integer) # # Field converters in effect: # csv.converters # => [:integer] # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]] # - Using method #convert to add a field converter to a \CSV instance: # csv = CSV.new(string) # # Add a converter. # csv.convert(:integer) # csv.converters # => [:integer] # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]] # # Installing a field converter does not affect already-read rows: # csv = CSV.new(string) # csv.shift # => ["foo", "0"] # # Add a converter. # csv.convert(:integer) # csv.converters # => [:integer] # csv.read # => [["bar", 1], ["baz", 2]] # # There are additional built-in \converters, and custom \converters are also supported. # # ===== Built-In Field \Converters # # The built-in field converters are in \Hash CSV::Converters: # - Each key is a field converter name. # - Each value is one of: # - A \Proc field converter. # - An \Array of field converter names. # # Display: # CSV::Converters.each_pair do |name, value| # if value.kind_of?(Proc) # p [name, value.class] # else # p [name, value] # end # end # Output: # [:integer, Proc] # [:float, Proc] # [:numeric, [:integer, :float]] # [:date, Proc] # [:date_time, Proc] # [:time, Proc] # [:all, [:date_time, :numeric]] # # Each of these converters transcodes values to UTF-8 before attempting conversion. # If a value cannot be transcoded to UTF-8 the conversion will # fail and the value will remain unconverted. # # Converter +:integer+ converts each field that Integer() accepts: # data = '0,1,2,x' # # Without the converter # csv = CSV.parse_line(data) # csv # => ["0", "1", "2", "x"] # # With the converter # csv = CSV.parse_line(data, converters: :integer) # csv # => [0, 1, 2, "x"] # # Converter +:float+ converts each field that Float() accepts: # data = '1.0,3.14159,x' # # Without the converter # csv = CSV.parse_line(data) # csv # => ["1.0", "3.14159", "x"] # # With the converter # csv = CSV.parse_line(data, converters: :float) # csv # => [1.0, 3.14159, "x"] # # Converter +:numeric+ converts with both +:integer+ and +:float+.. # # Converter +:date+ converts each field that Date::parse accepts: # data = '2001-02-03,x' # # Without the converter # csv = CSV.parse_line(data) # csv # => ["2001-02-03", "x"] # # With the converter # csv = CSV.parse_line(data, converters: :date) # csv # => [#, "x"] # # Converter +:date_time+ converts each field that DateTime::parse accepts: # data = '2020-05-07T14:59:00-05:00,x' # # Without the converter # csv = CSV.parse_line(data) # csv # => ["2020-05-07T14:59:00-05:00", "x"] # # With the converter # csv = CSV.parse_line(data, converters: :date_time) # csv # => [#, "x"] # # Converter +time+ converts each field that Time::parse accepts: # data = '2020-05-07T14:59:00-05:00,x' # # Without the converter # csv = CSV.parse_line(data) # csv # => ["2020-05-07T14:59:00-05:00", "x"] # # With the converter # csv = CSV.parse_line(data, converters: :time) # csv # => [2020-05-07 14:59:00 -0500, "x"] # # Converter +:numeric+ converts with both +:date_time+ and +:numeric+.. # # As seen above, method #convert adds \converters to a \CSV instance, # and method #converters returns an \Array of the \converters in effect: # csv = CSV.new('0,1,2') # csv.converters # => [] # csv.convert(:integer) # csv.converters # => [:integer] # csv.convert(:date) # csv.converters # => [:integer, :date] # # ===== Custom Field \Converters # # You can define a custom field converter: # strip_converter = proc {|field| field.strip } # string = " foo , 0 \n bar , 1 \n baz , 2 \n" # array = CSV.parse(string, converters: strip_converter) # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # You can register the converter in \Converters \Hash, # which allows you to refer to it by name: # CSV::Converters[:strip] = strip_converter # string = " foo , 0 \n bar , 1 \n baz , 2 \n" # array = CSV.parse(string, converters: :strip) # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # ==== Header \Converters # # Header converters operate only on headers (and not on other rows). # # There are three ways to use header \converters; # these examples use built-in header converter +:downcase+, # which downcases each parsed header. # # - Option +header_converters+ with a singleton parsing method: # string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2" # tbl = CSV.parse(string, headers: true, header_converters: :downcase) # tbl.class # => CSV::Table # tbl.headers # => ["name", "count"] # # - Option +header_converters+ with a new \CSV instance: # csv = CSV.new(string, header_converters: :downcase) # # Header converters in effect: # csv.header_converters # => [:downcase] # tbl = CSV.parse(string, headers: true) # tbl.headers # => ["Name", "Count"] # # - Method #header_convert adds a header converter to a \CSV instance: # csv = CSV.new(string) # # Add a header converter. # csv.header_convert(:downcase) # csv.header_converters # => [:downcase] # tbl = CSV.parse(string, headers: true) # tbl.headers # => ["Name", "Count"] # # ===== Built-In Header \Converters # # The built-in header \converters are in \Hash CSV::HeaderConverters. # The keys there are the names of the \converters: # CSV::HeaderConverters.keys # => [:downcase, :symbol] # # Converter +:downcase+ converts each header by downcasing it: # string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2" # tbl = CSV.parse(string, headers: true, header_converters: :downcase) # tbl.class # => CSV::Table # tbl.headers # => ["name", "count"] # # Converter +:symbol+ converts each header by making it into a \Symbol: # string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2" # tbl = CSV.parse(string, headers: true, header_converters: :symbol) # tbl.headers # => [:name, :count] # Details: # - Strips leading and trailing whitespace. # - Downcases the header. # - Replaces embedded spaces with underscores. # - Removes non-word characters. # - Makes the string into a \Symbol. # # ===== Custom Header \Converters # # You can define a custom header converter: # upcase_converter = proc {|header| header.upcase } # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(string, headers: true, header_converters: upcase_converter) # table # => # # table.headers # => ["NAME", "VALUE"] # You can register the converter in \HeaderConverters \Hash, # which allows you to refer to it by name: # CSV::HeaderConverters[:upcase] = upcase_converter # table = CSV.parse(string, headers: true, header_converters: :upcase) # table # => # # table.headers # => ["NAME", "VALUE"] # # ===== Write \Converters # # When you specify a write converter for generating \CSV, # each field to be written is passed to the converter; # its return value becomes the new value for the field. # A converter might, for example, strip whitespace from a field. # # Using no write converter (all fields unmodified): # output_string = CSV.generate do |csv| # csv << [' foo ', 0] # csv << [' bar ', 1] # csv << [' baz ', 2] # end # output_string # => " foo ,0\n bar ,1\n baz ,2\n" # Using option +write_converters+ with two custom write converters: # strip_converter = proc {|field| field.respond_to?(:strip) ? field.strip : field } # upcase_converter = proc {|field| field.respond_to?(:upcase) ? field.upcase : field } # write_converters = [strip_converter, upcase_converter] # output_string = CSV.generate(write_converters: write_converters) do |csv| # csv << [' foo ', 0] # csv << [' bar ', 1] # csv << [' baz ', 2] # end # output_string # => "FOO,0\nBAR,1\nBAZ,2\n" # # === Character Encodings (M17n or Multilingualization) # # This new CSV parser is m17n savvy. The parser works in the Encoding of the IO # or String object being read from or written to. Your data is never transcoded # (unless you ask Ruby to transcode it for you) and will literally be parsed in # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the # Encoding of your data. This is accomplished by transcoding the parser itself # into your Encoding. # # Some transcoding must take place, of course, to accomplish this multiencoding # support. For example, :col_sep, :row_sep, and # :quote_char must be transcoded to match your data. Hopefully this # makes the entire process feel transparent, since CSV's defaults should just # magically work for your data. However, you can set these values manually in # the target Encoding to avoid the translation. # # It's also important to note that while all of CSV's core parser is now # Encoding agnostic, some features are not. For example, the built-in # converters will try to transcode data to UTF-8 before making conversions. # Again, you can provide custom converters that are aware of your Encodings to # avoid this translation. It's just too hard for me to support native # conversions in all of Ruby's Encodings. # # Anyway, the practical side of this is simple: make sure IO and String objects # passed into CSV have the proper Encoding set and everything should just work. # CSV methods that allow you to open IO objects (CSV::foreach(), CSV::open(), # CSV::read(), and CSV::readlines()) do allow you to specify the Encoding. # # One minor exception comes when generating CSV into a String with an Encoding # that is not ASCII compatible. There's no existing data for CSV to use to # prepare itself and thus you will probably need to manually specify the desired # Encoding for most of those cases. It will try to guess using the fields in a # row of output though, when using CSV::generate_line() or Array#to_csv(). # # I try to point out any other Encoding issues in the documentation of methods # as they come up. # # This has been tested to the best of my ability with all non-"dummy" Encodings # Ruby ships with. However, it is brave new code and may have some bugs. # Please feel free to {report}[mailto:james@grayproductions.net] any issues you # find with it. # class CSV # The error thrown when the parser encounters illegal CSV formatting. class MalformedCSVError < RuntimeError attr_reader :line_number alias_method :lineno, :line_number def initialize(message, line_number) @line_number = line_number super("#{message} in line #{line_number}.") end end # The error thrown when the parser encounters invalid encoding in CSV. class InvalidEncodingError < MalformedCSVError attr_reader :encoding def initialize(encoding, line_number) @encoding = encoding super("Invalid byte sequence in #{encoding}", line_number) end end # # A FieldInfo Struct contains details about a field's position in the data # source it was read from. CSV will pass this Struct to some blocks that make # decisions based on field structure. See CSV.convert_fields() for an # example. # # index:: The zero-based index of the field in its row. # line:: The line of the data source this row is from. # header:: The header for the column, when available. # quoted?:: True or false, whether the original value is quoted or not. # FieldInfo = Struct.new(:index, :line, :header, :quoted?) # A Regexp used to find and convert some common Date formats. DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | \d{4}-\d{2}-\d{2} )\z /x # A Regexp used to find and convert some common (Date)Time formats. DateTimeMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} | # ISO-8601 and RFC-3339 (space instead of T) recognized by (Date)Time.parse \d{4}-\d{2}-\d{2} (?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)? )\z /x # The encoding used by all converters. ConverterEncoding = Encoding.find("UTF-8") # A \Hash containing the names and \Procs for the built-in field converters. # See {Built-In Field Converters}[#class-CSV-label-Built-In+Field+Converters]. # # This \Hash is intentionally left unfrozen, and may be extended with # custom field converters. # See {Custom Field Converters}[#class-CSV-label-Custom+Field+Converters]. Converters = { integer: lambda { |f| Integer(f.encode(ConverterEncoding)) rescue f }, float: lambda { |f| Float(f.encode(ConverterEncoding)) rescue f }, numeric: [:integer, :float], date: lambda { |f| begin e = f.encode(ConverterEncoding) e.match?(DateMatcher) ? Date.parse(e) : f rescue # encoding conversion or date parse errors f end }, date_time: lambda { |f| begin e = f.encode(ConverterEncoding) e.match?(DateTimeMatcher) ? DateTime.parse(e) : f rescue # encoding conversion or date parse errors f end }, time: lambda { |f| begin e = f.encode(ConverterEncoding) e.match?(DateTimeMatcher) ? Time.parse(e) : f rescue # encoding conversion or parse errors f end }, all: [:date_time, :numeric], } # A \Hash containing the names and \Procs for the built-in header converters. # See {Built-In Header Converters}[#class-CSV-label-Built-In+Header+Converters]. # # This \Hash is intentionally left unfrozen, and may be extended with # custom field converters. # See {Custom Header Converters}[#class-CSV-label-Custom+Header+Converters]. HeaderConverters = { downcase: lambda { |h| h.encode(ConverterEncoding).downcase }, symbol: lambda { |h| h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip. gsub(/\s+/, "_").to_sym }, symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym } } # Default values for method options. DEFAULT_OPTIONS = { # For both parsing and generating. col_sep: ",", row_sep: :auto, quote_char: '"', # For parsing. field_size_limit: nil, max_field_size: nil, converters: nil, unconverted_fields: nil, headers: false, return_headers: false, header_converters: nil, skip_blanks: false, skip_lines: nil, liberal_parsing: false, nil_value: nil, empty_value: "", strip: false, # For generating. write_headers: nil, quote_empty: true, force_quotes: false, write_converters: nil, write_nil_value: nil, write_empty_value: "", }.freeze class << self # :call-seq: # instance(string, **options) # instance(io = $stdout, **options) # instance(string, **options) {|csv| ... } # instance(io = $stdout, **options) {|csv| ... } # # Creates or retrieves cached \CSV objects. # For arguments and options, see CSV.new. # # This API is not Ractor-safe. # # --- # # With no block given, returns a \CSV object. # # The first call to +instance+ creates and caches a \CSV object: # s0 = 's0' # csv0 = CSV.instance(s0) # csv0.class # => CSV # # Subsequent calls to +instance+ with that _same_ +string+ or +io+ # retrieve that same cached object: # csv1 = CSV.instance(s0) # csv1.class # => CSV # csv1.equal?(csv0) # => true # Same CSV object # # A subsequent call to +instance+ with a _different_ +string+ or +io+ # creates and caches a _different_ \CSV object. # s1 = 's1' # csv2 = CSV.instance(s1) # csv2.equal?(csv0) # => false # Different CSV object # # All the cached objects remains available: # csv3 = CSV.instance(s0) # csv3.equal?(csv0) # true # Same CSV object # csv4 = CSV.instance(s1) # csv4.equal?(csv2) # true # Same CSV object # # --- # # When a block is given, calls the block with the created or retrieved # \CSV object; returns the block's return value: # CSV.instance(s0) {|csv| :foo } # => :foo def instance(data = $stdout, **options) # create a _signature_ for this method call, data object and options sig = [data.object_id] + options.values_at(*DEFAULT_OPTIONS.keys) # fetch or create the instance for this signature @@instances ||= Hash.new instance = (@@instances[sig] ||= new(data, **options)) if block_given? yield instance # run block, if given, returning result else instance # or return the instance end end # :call-seq: # filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table # filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table # filter(**options) {|row| ... } -> array_of_arrays or csv_table # # - Parses \CSV from a source (\String, \IO stream, or ARGF). # - Calls the given block with each parsed row: # - Without headers, each row is an \Array. # - With headers, each row is a CSV::Row. # - Generates \CSV to an output (\String, \IO stream, or STDOUT). # - Returns the parsed source: # - Without headers, an \Array of \Arrays. # - With headers, a CSV::Table. # # When +in_string_or_io+ is given, but not +out_string_or_io+, # parses from the given +in_string_or_io+ # and generates to STDOUT. # # \String input without headers: # # in_string = "foo,0\nbar,1\nbaz,2" # CSV.filter(in_string) do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] # # Output (to STDOUT): # # FOO,0 # BAR,-1 # BAZ,-2 # # \String input with headers: # # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2" # CSV.filter(in_string, headers: true) do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # => # # # Output (to STDOUT): # # Name,Value # FOO,0 # BAR,-1 # BAZ,-2 # # \IO stream input without headers: # # File.write('t.csv', "foo,0\nbar,1\nbaz,2") # File.open('t.csv') do |in_io| # CSV.filter(in_io) do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] # # Output (to STDOUT): # # FOO,0 # BAR,-1 # BAZ,-2 # # \IO stream input with headers: # # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2") # File.open('t.csv') do |in_io| # CSV.filter(in_io, headers: true) do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # end # => # # # Output (to STDOUT): # # Name,Value # FOO,0 # BAR,-1 # BAZ,-2 # # When both +in_string_or_io+ and +out_string_or_io+ are given, # parses from +in_string_or_io+ and generates to +out_string_or_io+. # # \String output without headers: # # in_string = "foo,0\nbar,1\nbaz,2" # out_string = '' # CSV.filter(in_string, out_string) do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] # out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n" # # \String output with headers: # # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2" # out_string = '' # CSV.filter(in_string, out_string, headers: true) do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # => # # out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n" # # \IO stream output without headers: # # in_string = "foo,0\nbar,1\nbaz,2" # File.open('t.csv', 'w') do |out_io| # CSV.filter(in_string, out_io) do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] # File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n" # # \IO stream output with headers: # # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2" # File.open('t.csv', 'w') do |out_io| # CSV.filter(in_string, out_io, headers: true) do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # end # => # # File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n" # # When neither +in_string_or_io+ nor +out_string_or_io+ given, # parses from {ARGF}[rdoc-ref:ARGF] # and generates to STDOUT. # # Without headers: # # # Put Ruby code into a file. # ruby = <<-EOT # require 'csv' # CSV.filter do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # EOT # File.write('t.rb', ruby) # # Put some CSV into a file. # File.write('t.csv', "foo,0\nbar,1\nbaz,2") # # Run the Ruby code with CSV filename as argument. # system(Gem.ruby, "t.rb", "t.csv") # # Output (to STDOUT): # # FOO,0 # BAR,-1 # BAZ,-2 # # With headers: # # # Put Ruby code into a file. # ruby = <<-EOT # require 'csv' # CSV.filter(headers: true) do |row| # row[0].upcase! # row[1] = - row[1].to_i # end # EOT # File.write('t.rb', ruby) # # Put some CSV into a file. # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2") # # Run the Ruby code with CSV filename as argument. # system(Gem.ruby, "t.rb", "t.csv") # # Output (to STDOUT): # # Name,Value # FOO,0 # BAR,-1 # BAZ,-2 # # Arguments: # # * Argument +in_string_or_io+ must be a \String or an \IO stream. # * Argument +out_string_or_io+ must be a \String or an \IO stream. # * Arguments **options must be keyword options. # # - Each option defined as an {option for parsing}[#class-CSV-label-Options+for+Parsing] # is used for parsing the filter input. # - Each option defined as an {option for generating}[#class-CSV-label-Options+for+Generating] # is used for generator the filter input. # # However, there are three options that may be used for both parsing and generating: # +col_sep+, +quote_char+, and +row_sep+. # # Therefore for method +filter+ (and method +filter+ only), # there are special options that allow these parsing and generating options # to be specified separately: # # - Options +input_col_sep+ and +output_col_sep+ # (and their aliases +in_col_sep+ and +out_col_sep+) # specify the column separators for parsing and generating. # - Options +input_quote_char+ and +output_quote_char+ # (and their aliases +in_quote_char+ and +out_quote_char+) # specify the quote characters for parsing and generting. # - Options +input_row_sep+ and +output_row_sep+ # (and their aliases +in_row_sep+ and +out_row_sep+) # specify the row separators for parsing and generating. # # Example options (for column separators): # # CSV.filter # Default for both parsing and generating. # CSV.filter(in_col_sep: ';') # ';' for parsing, default for generating. # CSV.filter(out_col_sep: '|') # Default for parsing, '|' for generating. # CSV.filter(in_col_sep: ';', out_col_sep: '|') # ';' for parsing, '|' for generating. # # Note that for a special option (e.g., +input_col_sep+) # and its corresponding "regular" option (e.g., +col_sep+), # the two are mutually overriding. # # Another example (possibly surprising): # # CSV.filter(in_col_sep: ';', col_sep: '|') # '|' for both parsing(!) and generating. # def filter(input=nil, output=nil, **options) # parse options for input, output, or both in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value} options.each do |key, value| case key when /\Ain(?:put)?_(.+)\Z/ in_options[$1.to_sym] = value when /\Aout(?:put)?_(.+)\Z/ out_options[$1.to_sym] = value else in_options[key] = value out_options[key] = value end end # build input and output wrappers input = new(input || ARGF, **in_options) output = new(output || $stdout, **out_options) # process headers need_manual_header_output = (in_options[:headers] and out_options[:headers] == true and out_options[:write_headers]) if need_manual_header_output first_row = input.shift if first_row if first_row.is_a?(Row) headers = first_row.headers yield headers output << headers end yield first_row output << first_row end end # read, yield, write input.each do |row| yield row output << row end end # # :call-seq: # foreach(path_or_io, mode='r', **options) {|row| ... ) # foreach(path_or_io, mode='r', **options) -> new_enumerator # # Calls the block with each row read from source +path_or_io+. # # \Path input without headers: # # string = "foo,0\nbar,1\nbaz,2\n" # in_path = 't.csv' # File.write(in_path, string) # CSV.foreach(in_path) {|row| p row } # # Output: # # ["foo", "0"] # ["bar", "1"] # ["baz", "2"] # # \Path input with headers: # # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # in_path = 't.csv' # File.write(in_path, string) # CSV.foreach(in_path, headers: true) {|row| p row } # # Output: # # # # # # \IO stream input without headers: # # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # File.open('t.csv') do |in_io| # CSV.foreach(in_io) {|row| p row } # end # # Output: # # ["foo", "0"] # ["bar", "1"] # ["baz", "2"] # # \IO stream input with headers: # # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # File.open('t.csv') do |in_io| # CSV.foreach(in_io, headers: true) {|row| p row } # end # # Output: # # # # # # With no block given, returns an \Enumerator: # # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # CSV.foreach(path) # => # # # Arguments: # * Argument +path_or_io+ must be a file path or an \IO stream. # * Argument +mode+, if given, must be a \File mode. # See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes]. # * Arguments **options must be keyword options. # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]. # * This method optionally accepts an additional :encoding option # that you can use to specify the Encoding of the data read from +path+ or +io+. # You must provide this unless your data is in the encoding # given by Encoding::default_external. # Parsing will use this to determine how to parse the data. # You may provide a second Encoding to # have the data transcoded as it is read. For example, # encoding: 'UTF-32BE:UTF-8' # would read +UTF-32BE+ data from the file # but transcode it to +UTF-8+ before parsing. def foreach(path, mode="r", **options, &block) return to_enum(__method__, path, mode, **options) unless block_given? open(path, mode, **options) do |csv| csv.each(&block) end end # # :call-seq: # generate(csv_string, **options) {|csv| ... } # generate(**options) {|csv| ... } # # * Argument +csv_string+, if given, must be a \String object; # defaults to a new empty \String. # * Arguments +options+, if given, should be generating options. # See {Options for Generating}[#class-CSV-label-Options+for+Generating]. # # --- # # Creates a new \CSV object via CSV.new(csv_string, **options); # calls the block with the \CSV object, which the block may modify; # returns the \String generated from the \CSV object. # # Note that a passed \String *is* modified by this method. # Pass csv_string.dup if the \String must be preserved. # # This method has one additional option: :encoding, # which sets the base Encoding for the output if no no +str+ is specified. # CSV needs this hint if you plan to output non-ASCII compatible data. # # --- # # Add lines: # input_string = "foo,0\nbar,1\nbaz,2\n" # output_string = CSV.generate(input_string) do |csv| # csv << ['bat', 3] # csv << ['bam', 4] # end # output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n" # input_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n" # output_string.equal?(input_string) # => true # Same string, modified # # Add lines into new string, preserving old string: # input_string = "foo,0\nbar,1\nbaz,2\n" # output_string = CSV.generate(input_string.dup) do |csv| # csv << ['bat', 3] # csv << ['bam', 4] # end # output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n" # input_string # => "foo,0\nbar,1\nbaz,2\n" # output_string.equal?(input_string) # => false # Different strings # # Create lines from nothing: # output_string = CSV.generate do |csv| # csv << ['foo', 0] # csv << ['bar', 1] # csv << ['baz', 2] # end # output_string # => "foo,0\nbar,1\nbaz,2\n" # # --- # # Raises an exception if +csv_string+ is not a \String object: # # Raises TypeError (no implicit conversion of Integer into String) # CSV.generate(0) # def generate(str=nil, **options) encoding = options[:encoding] # add a default empty String, if none was given if str str = StringIO.new(str) str.seek(0, IO::SEEK_END) str.set_encoding(encoding) if encoding else str = +"" str.force_encoding(encoding) if encoding end csv = new(str, **options) # wrap yield csv # yield for appending csv.string # return final String end # :call-seq: # CSV.generate_line(ary) # CSV.generate_line(ary, **options) # # Returns the \String created by generating \CSV from +ary+ # using the specified +options+. # # Argument +ary+ must be an \Array. # # Special options: # * Option :row_sep defaults to "\n"> on Ruby 3.0 or later # and $INPUT_RECORD_SEPARATOR ($/) otherwise.: # $INPUT_RECORD_SEPARATOR # => "\n" # * This method accepts an additional option, :encoding, which sets the base # Encoding for the output. This method will try to guess your Encoding from # the first non-+nil+ field in +row+, if possible, but you may need to use # this parameter as a backup plan. # # For other +options+, # see {Options for Generating}[#class-CSV-label-Options+for+Generating]. # # --- # # Returns the \String generated from an \Array: # CSV.generate_line(['foo', '0']) # => "foo,0\n" # # --- # # Raises an exception if +ary+ is not an \Array: # # Raises NoMethodError (undefined method `find' for :foo:Symbol) # CSV.generate_line(:foo) # def generate_line(row, **options) options = {row_sep: InputRecordSeparator.value}.merge(options) str = +"" if options[:encoding] str.force_encoding(options[:encoding]) else fallback_encoding = nil output_encoding = nil row.each do |field| next unless field.is_a?(String) fallback_encoding ||= field.encoding next if field.ascii_only? output_encoding = field.encoding break end output_encoding ||= fallback_encoding if output_encoding str.force_encoding(output_encoding) end end (new(str, **options) << row).string end # :call-seq: # CSV.generate_lines(rows) # CSV.generate_lines(rows, **options) # # Returns the \String created by generating \CSV from # using the specified +options+. # # Argument +rows+ must be an \Array of row. Row is \Array of \String or \CSV::Row. # # Special options: # * Option :row_sep defaults to "\n" on Ruby 3.0 or later # and $INPUT_RECORD_SEPARATOR ($/) otherwise.: # $INPUT_RECORD_SEPARATOR # => "\n" # * This method accepts an additional option, :encoding, which sets the base # Encoding for the output. This method will try to guess your Encoding from # the first non-+nil+ field in +row+, if possible, but you may need to use # this parameter as a backup plan. # # For other +options+, # see {Options for Generating}[#class-CSV-label-Options+for+Generating]. # # --- # # Returns the \String generated from an # CSV.generate_lines([['foo', '0'], ['bar', '1'], ['baz', '2']]) # => "foo,0\nbar,1\nbaz,2\n" # # --- # # Raises an exception # # Raises NoMethodError (undefined method `each' for :foo:Symbol) # CSV.generate_lines(:foo) # def generate_lines(rows, **options) self.generate(**options) do |csv| rows.each do |row| csv << row end end end # # :call-seq: # open(path_or_io, mode = "rb", **options ) -> new_csv # open(path_or_io, mode = "rb", **options ) { |csv| ... } -> object # # possible options elements: # keyword form: # :invalid => nil # raise error on invalid byte sequence (default) # :invalid => :replace # replace invalid byte sequence # :undef => :replace # replace undefined conversion # :replace => string # replacement string ("?" or "\uFFFD" if not specified) # # * Argument +path_or_io+, must be a file path or an \IO stream. # :include: ../doc/csv/arguments/io.rdoc # * Argument +mode+, if given, must be a \File mode. # See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes]. # * Arguments **options must be keyword options. # See {Options for Generating}[#class-CSV-label-Options+for+Generating]. # * This method optionally accepts an additional :encoding option # that you can use to specify the Encoding of the data read from +path+ or +io+. # You must provide this unless your data is in the encoding # given by Encoding::default_external. # Parsing will use this to determine how to parse the data. # You may provide a second Encoding to # have the data transcoded as it is read. For example, # encoding: 'UTF-32BE:UTF-8' # would read +UTF-32BE+ data from the file # but transcode it to +UTF-8+ before parsing. # # --- # # These examples assume prior execution of: # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # # string_io = StringIO.new # string_io << "foo,0\nbar,1\nbaz,2\n" # # --- # # With no block given, returns a new \CSV object. # # Create a \CSV object using a file path: # csv = CSV.open(path) # csv # => # # # Create a \CSV object using an open \File: # csv = CSV.open(File.open(path)) # csv # => # # # Create a \CSV object using a \StringIO: # csv = CSV.open(string_io) # csv # => # # --- # # With a block given, calls the block with the created \CSV object; # returns the block's return value: # # Using a file path: # csv = CSV.open(path) {|csv| p csv} # csv # => # # Output: # # # # Using an open \File: # csv = CSV.open(File.open(path)) {|csv| p csv} # csv # => # # Output: # # # # Using a \StringIO: # csv = CSV.open(string_io) {|csv| p csv} # csv # => # # Output: # # # --- # # Raises an exception if the argument is not a \String object or \IO object: # # Raises TypeError (no implicit conversion of Symbol into String) # CSV.open(:foo) def open(filename_or_io, mode="r", **options) # wrap a File opened with the remaining +args+ with no newline # decorator file_opts = {} may_enable_bom_detection_automatically(filename_or_io, mode, options, file_opts) file_opts.merge!(options) unless file_opts.key?(:newline) file_opts[:universal_newline] ||= false end options.delete(:invalid) options.delete(:undef) options.delete(:replace) options.delete_if {|k, _| /newline\z/.match?(k)} if filename_or_io.is_a?(StringIO) f = create_stringio(filename_or_io.string, mode, **file_opts) else begin f = File.open(filename_or_io, mode, **file_opts) rescue ArgumentError => e raise unless /needs binmode/.match?(e.message) and mode == "r" mode = "rb" file_opts = {encoding: Encoding.default_external}.merge(file_opts) retry end end begin csv = new(f, **options) rescue Exception f.close raise end # handle blocks like Ruby's open(), not like the CSV library if block_given? begin yield csv ensure csv.close end else csv end end # # :call-seq: # parse(string) -> array_of_arrays # parse(io) -> array_of_arrays # parse(string, headers: ..., **options) -> csv_table # parse(io, headers: ..., **options) -> csv_table # parse(string, **options) {|row| ... } # parse(io, **options) {|row| ... } # # Parses +string+ or +io+ using the specified +options+. # # - Argument +string+ should be a \String object; # it will be put into a new StringIO object positioned at the beginning. # :include: ../doc/csv/arguments/io.rdoc # - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing] # # ====== Without Option +headers+ # # Without {option +headers+}[#class-CSV-label-Option+headers] case. # # These examples assume prior execution of: # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # # --- # # With no block given, returns an \Array of Arrays formed from the source. # # Parse a \String: # a_of_a = CSV.parse(string) # a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # Parse an open \File: # a_of_a = File.open(path) do |file| # CSV.parse(file) # end # a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # --- # # With a block given, calls the block with each parsed row: # # Parse a \String: # CSV.parse(string) {|row| p row } # # Output: # ["foo", "0"] # ["bar", "1"] # ["baz", "2"] # # Parse an open \File: # File.open(path) do |file| # CSV.parse(file) {|row| p row } # end # # Output: # ["foo", "0"] # ["bar", "1"] # ["baz", "2"] # # ====== With Option +headers+ # # With {option +headers+}[#class-CSV-label-Option+headers] case. # # These examples assume prior execution of: # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # # --- # # With no block given, returns a CSV::Table object formed from the source. # # Parse a \String: # csv_table = CSV.parse(string, headers: ['Name', 'Count']) # csv_table # => # # # Parse an open \File: # csv_table = File.open(path) do |file| # CSV.parse(file, headers: ['Name', 'Count']) # end # csv_table # => # # # --- # # With a block given, calls the block with each parsed row, # which has been formed into a CSV::Row object: # # Parse a \String: # CSV.parse(string, headers: ['Name', 'Count']) {|row| p row } # # Output: # # # # # # # # Parse an open \File: # File.open(path) do |file| # CSV.parse(file, headers: ['Name', 'Count']) {|row| p row } # end # # Output: # # # # # # # # --- # # Raises an exception if the argument is not a \String object or \IO object: # # Raises NoMethodError (undefined method `close' for :foo:Symbol) # CSV.parse(:foo) # # --- # # Please make sure if your text contains \BOM or not. CSV.parse will not remove # \BOM automatically. You might want to remove \BOM before calling CSV.parse : # # remove BOM on calling File.open # File.open(path, encoding: 'bom|utf-8') do |file| # CSV.parse(file, headers: true) do |row| # # you can get value by column name because BOM is removed # p row['Name'] # end # end # # Output: # # "foo" # # "bar" # # "baz" def parse(str, **options, &block) csv = new(str, **options) return csv.each(&block) if block_given? # slurp contents, if no block is given begin csv.read ensure csv.close end end # :call-seq: # CSV.parse_line(string) -> new_array or nil # CSV.parse_line(io) -> new_array or nil # CSV.parse_line(string, **options) -> new_array or nil # CSV.parse_line(io, **options) -> new_array or nil # CSV.parse_line(string, headers: true, **options) -> csv_row or nil # CSV.parse_line(io, headers: true, **options) -> csv_row or nil # # Returns the data created by parsing the first line of +string+ or +io+ # using the specified +options+. # # - Argument +string+ should be a \String object; # it will be put into a new StringIO object positioned at the beginning. # :include: ../doc/csv/arguments/io.rdoc # - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing] # # ====== Without Option +headers+ # # Without option +headers+, returns the first row as a new \Array. # # These examples assume prior execution of: # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # # Parse the first line from a \String object: # CSV.parse_line(string) # => ["foo", "0"] # # Parse the first line from a File object: # File.open(path) do |file| # CSV.parse_line(file) # => ["foo", "0"] # end # => ["foo", "0"] # # Returns +nil+ if the argument is an empty \String: # CSV.parse_line('') # => nil # # ====== With Option +headers+ # # With {option +headers+}[#class-CSV-label-Option+headers], # returns the first row as a CSV::Row object. # # These examples assume prior execution of: # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # # Parse the first line from a \String object: # CSV.parse_line(string, headers: true) # => # # # Parse the first line from a File object: # File.open(path) do |file| # CSV.parse_line(file, headers: true) # end # => # # # --- # # Raises an exception if the argument is +nil+: # # Raises ArgumentError (Cannot parse nil as CSV): # CSV.parse_line(nil) # def parse_line(line, **options) new(line, **options).each.first end # # :call-seq: # read(source, **options) -> array_of_arrays # read(source, headers: true, **options) -> csv_table # # Opens the given +source+ with the given +options+ (see CSV.open), # reads the source (see CSV#read), and returns the result, # which will be either an \Array of Arrays or a CSV::Table. # # Without headers: # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # With headers: # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # CSV.read(path, headers: true) # => # def read(path, **options) open(path, **options) { |csv| csv.read } end # :call-seq: # CSV.readlines(source, **options) # # Alias for CSV.read. def readlines(path, **options) read(path, **options) end # :call-seq: # CSV.table(source, **options) # # Calls CSV.read with +source+, +options+, and certain default options: # - +headers+: +true+ # - +converters+: +:numeric+ # - +header_converters+: +:symbol+ # # Returns a CSV::Table object. # # Example: # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # CSV.table(path) # => # def table(path, **options) default_options = { headers: true, converters: :numeric, header_converters: :symbol, } options = default_options.merge(options) read(path, **options) end ON_WINDOWS = /mingw|mswin/.match?(RUBY_PLATFORM) private_constant :ON_WINDOWS private def may_enable_bom_detection_automatically(filename_or_io, mode, options, file_opts) if filename_or_io.is_a?(StringIO) # Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support: # https://github.com/ruby/stringio/pull/47 return if RUBY_VERSION < "2.7" else # "bom|utf-8" may be buggy on Windows: # https://bugs.ruby-lang.org/issues/20526 return if ON_WINDOWS end return unless Encoding.default_external == Encoding::UTF_8 return if options.key?(:encoding) return if options.key?(:external_encoding) return if mode.is_a?(String) and mode.include?(":") file_opts[:encoding] = "bom|utf-8" end if RUBY_VERSION < "2.7" def create_stringio(str, mode, opts) opts.delete_if {|k, _| k == :universal_newline or DEFAULT_OPTIONS.key?(k)} raise ArgumentError, "Unsupported options parsing StringIO: #{opts.keys}" unless opts.empty? StringIO.new(str, mode) end else def create_stringio(str, mode, opts) StringIO.new(str, mode, **opts) end end end # :call-seq: # CSV.new(string) # CSV.new(io) # CSV.new(string, **options) # CSV.new(io, **options) # # Returns the new \CSV object created using +string+ or +io+ # and the specified +options+. # # - Argument +string+ should be a \String object; # it will be put into a new StringIO object positioned at the beginning. # :include: ../doc/csv/arguments/io.rdoc # - Argument +options+: See: # * {Options for Parsing}[#class-CSV-label-Options+for+Parsing] # * {Options for Generating}[#class-CSV-label-Options+for+Generating] # For performance reasons, the options cannot be overridden # in a \CSV object, so those specified here will endure. # # In addition to the \CSV instance methods, several \IO methods are delegated. # See {Delegated Methods}[#class-CSV-label-Delegated+Methods]. # # --- # # Create a \CSV object from a \String object: # csv = CSV.new('foo,0') # csv # => # # # Create a \CSV object from a \File object: # File.write('t.csv', 'foo,0') # csv = CSV.new(File.open('t.csv')) # csv # => # # # --- # # Raises an exception if the argument is +nil+: # # Raises ArgumentError (Cannot parse nil as CSV): # CSV.new(nil) # def initialize(data, col_sep: ",", row_sep: :auto, quote_char: '"', field_size_limit: nil, max_field_size: nil, converters: nil, unconverted_fields: nil, headers: false, return_headers: false, write_headers: nil, header_converters: nil, skip_blanks: false, force_quotes: false, skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil, nil_value: nil, empty_value: "", strip: false, quote_empty: true, write_converters: nil, write_nil_value: nil, write_empty_value: "") raise ArgumentError.new("Cannot parse nil as CSV") if data.nil? if data.is_a?(String) if encoding if encoding.is_a?(String) data_external_encoding, data_internal_encoding = encoding.split(":", 2) if data_internal_encoding data = data.encode(data_internal_encoding, data_external_encoding) else data = data.dup.force_encoding(data_external_encoding) end else data = data.dup.force_encoding(encoding) end end @io = StringIO.new(data) else @io = data end @encoding = determine_encoding(encoding, internal_encoding) @base_fields_converter_options = { nil_value: nil_value, empty_value: empty_value, } @write_fields_converter_options = { nil_value: write_nil_value, empty_value: write_empty_value, } @initial_converters = converters @initial_header_converters = header_converters @initial_write_converters = write_converters if max_field_size.nil? and field_size_limit max_field_size = field_size_limit - 1 end @parser_options = { column_separator: col_sep, row_separator: row_sep, quote_character: quote_char, max_field_size: max_field_size, unconverted_fields: unconverted_fields, headers: headers, return_headers: return_headers, skip_blanks: skip_blanks, skip_lines: skip_lines, liberal_parsing: liberal_parsing, encoding: @encoding, nil_value: nil_value, empty_value: empty_value, strip: strip, } @parser = nil @parser_enumerator = nil @eof_error = nil @writer_options = { encoding: @encoding, force_encoding: (not encoding.nil?), force_quotes: force_quotes, headers: headers, write_headers: write_headers, column_separator: col_sep, row_separator: row_sep, quote_character: quote_char, quote_empty: quote_empty, } @writer = nil writer if @writer_options[:write_headers] end class TSV < CSV def initialize(data, **options) super(data, **({col_sep: "\t"}.merge(options))) end end # :call-seq: # csv.col_sep -> string # # Returns the encoded column separator; used for parsing and writing; # see {Option +col_sep+}[#class-CSV-label-Option+col_sep]: # CSV.new('').col_sep # => "," def col_sep parser.column_separator end # :call-seq: # csv.row_sep -> string # # Returns the encoded row separator; used for parsing and writing; # see {Option +row_sep+}[#class-CSV-label-Option+row_sep]: # CSV.new('').row_sep # => "\n" def row_sep parser.row_separator end # :call-seq: # csv.quote_char -> character # # Returns the encoded quote character; used for parsing and writing; # see {Option +quote_char+}[#class-CSV-label-Option+quote_char]: # CSV.new('').quote_char # => "\"" def quote_char parser.quote_character end # :call-seq: # csv.field_size_limit -> integer or nil # # Returns the limit for field size; used for parsing; # see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]: # CSV.new('').field_size_limit # => nil # # Deprecated since 3.2.3. Use +max_field_size+ instead. def field_size_limit parser.field_size_limit end # :call-seq: # csv.max_field_size -> integer or nil # # Returns the limit for field size; used for parsing; # see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]: # CSV.new('').max_field_size # => nil # # Since 3.2.3. def max_field_size parser.max_field_size end # :call-seq: # csv.skip_lines -> regexp or nil # # Returns the \Regexp used to identify comment lines; used for parsing; # see {Option +skip_lines+}[#class-CSV-label-Option+skip_lines]: # CSV.new('').skip_lines # => nil def skip_lines parser.skip_lines end # :call-seq: # csv.converters -> array # # Returns an \Array containing field converters; # see {Field Converters}[#class-CSV-label-Field+Converters]: # csv = CSV.new('') # csv.converters # => [] # csv.convert(:integer) # csv.converters # => [:integer] # csv.convert(proc {|x| x.to_s }) # csv.converters # # Notes that you need to call # +Ractor.make_shareable(CSV::Converters)+ on the main Ractor to use # this method. def converters parser_fields_converter.map do |converter| name = Converters.rassoc(converter) name ? name.first : converter end end # :call-seq: # csv.unconverted_fields? -> object # # Returns the value that determines whether unconverted fields are to be # available; used for parsing; # see {Option +unconverted_fields+}[#class-CSV-label-Option+unconverted_fields]: # CSV.new('').unconverted_fields? # => nil def unconverted_fields? parser.unconverted_fields? end # :call-seq: # csv.headers -> object # # Returns the value that determines whether headers are used; used for parsing; # see {Option +headers+}[#class-CSV-label-Option+headers]: # CSV.new('').headers # => nil def headers if @writer @writer.headers else parsed_headers = parser.headers return parsed_headers if parsed_headers raw_headers = @parser_options[:headers] raw_headers = nil if raw_headers == false raw_headers end end # :call-seq: # csv.return_headers? -> true or false # # Returns the value that determines whether headers are to be returned; used for parsing; # see {Option +return_headers+}[#class-CSV-label-Option+return_headers]: # CSV.new('').return_headers? # => false def return_headers? parser.return_headers? end # :call-seq: # csv.write_headers? -> true or false # # Returns the value that determines whether headers are to be written; used for generating; # see {Option +write_headers+}[#class-CSV-label-Option+write_headers]: # CSV.new('').write_headers? # => nil def write_headers? @writer_options[:write_headers] end # :call-seq: # csv.header_converters -> array # # Returns an \Array containing header converters; used for parsing; # see {Header Converters}[#class-CSV-label-Header+Converters]: # CSV.new('').header_converters # => [] # # Notes that you need to call # +Ractor.make_shareable(CSV::HeaderConverters)+ on the main Ractor # to use this method. def header_converters header_fields_converter.map do |converter| name = HeaderConverters.rassoc(converter) name ? name.first : converter end end # :call-seq: # csv.skip_blanks? -> true or false # # Returns the value that determines whether blank lines are to be ignored; used for parsing; # see {Option +skip_blanks+}[#class-CSV-label-Option+skip_blanks]: # CSV.new('').skip_blanks? # => false def skip_blanks? parser.skip_blanks? end # :call-seq: # csv.force_quotes? -> true or false # # Returns the value that determines whether all output fields are to be quoted; # used for generating; # see {Option +force_quotes+}[#class-CSV-label-Option+force_quotes]: # CSV.new('').force_quotes? # => false def force_quotes? @writer_options[:force_quotes] end # :call-seq: # csv.liberal_parsing? -> true or false # # Returns the value that determines whether illegal input is to be handled; used for parsing; # see {Option +liberal_parsing+}[#class-CSV-label-Option+liberal_parsing]: # CSV.new('').liberal_parsing? # => false def liberal_parsing? parser.liberal_parsing? end # :call-seq: # csv.encoding -> encoding # # Returns the encoding used for parsing and generating; # see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]: # CSV.new('').encoding # => # attr_reader :encoding # :call-seq: # csv.line_no -> integer # # Returns the count of the rows parsed or generated. # # Parsing: # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # CSV.open(path) do |csv| # csv.each do |row| # p [csv.lineno, row] # end # end # Output: # [1, ["foo", "0"]] # [2, ["bar", "1"]] # [3, ["baz", "2"]] # # Generating: # CSV.generate do |csv| # p csv.lineno; csv << ['foo', 0] # p csv.lineno; csv << ['bar', 1] # p csv.lineno; csv << ['baz', 2] # end # Output: # 0 # 1 # 2 def lineno if @writer @writer.lineno else parser.lineno end end # :call-seq: # csv.line -> array # # Returns the line most recently read: # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # CSV.open(path) do |csv| # csv.each do |row| # p [csv.lineno, csv.line] # end # end # Output: # [1, "foo,0\n"] # [2, "bar,1\n"] # [3, "baz,2\n"] def line parser.line end ### IO and StringIO Delegation ### extend Forwardable def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?, :external_encoding, :fcntl, :fileno, :flush, :fsync, :internal_encoding, :isatty, :pid, :pos, :pos=, :reopen, :seek, :string, :sync, :sync=, :tell, :truncate, :tty? def binmode? if @io.respond_to?(:binmode?) @io.binmode? else false end end def flock(*args) raise NotImplementedError unless @io.respond_to?(:flock) @io.flock(*args) end def ioctl(*args) raise NotImplementedError unless @io.respond_to?(:ioctl) @io.ioctl(*args) end def path @io.path if @io.respond_to?(:path) end def stat(*args) raise NotImplementedError unless @io.respond_to?(:stat) @io.stat(*args) end def to_i raise NotImplementedError unless @io.respond_to?(:to_i) @io.to_i end def to_io @io.respond_to?(:to_io) ? @io.to_io : @io end def eof? return false if @eof_error begin parser_enumerator.peek false rescue MalformedCSVError => error @eof_error = error false rescue StopIteration true end end alias_method :eof, :eof? # Rewinds the underlying IO object and resets CSV's lineno() counter. def rewind @parser = nil @parser_enumerator = nil @eof_error = nil @writer.rewind if @writer @io.rewind end ### End Delegation ### # :call-seq: # csv << row -> self # # Appends a row to +self+. # # - Argument +row+ must be an \Array object or a CSV::Row object. # - The output stream must be open for writing. # # --- # # Append Arrays: # CSV.generate do |csv| # csv << ['foo', 0] # csv << ['bar', 1] # csv << ['baz', 2] # end # => "foo,0\nbar,1\nbaz,2\n" # # Append CSV::Rows: # headers = [] # CSV.generate do |csv| # csv << CSV::Row.new(headers, ['foo', 0]) # csv << CSV::Row.new(headers, ['bar', 1]) # csv << CSV::Row.new(headers, ['baz', 2]) # end # => "foo,0\nbar,1\nbaz,2\n" # # Headers in CSV::Row objects are not appended: # headers = ['Name', 'Count'] # CSV.generate do |csv| # csv << CSV::Row.new(headers, ['foo', 0]) # csv << CSV::Row.new(headers, ['bar', 1]) # csv << CSV::Row.new(headers, ['baz', 2]) # end # => "foo,0\nbar,1\nbaz,2\n" # # --- # # Raises an exception if +row+ is not an \Array or \CSV::Row: # CSV.generate do |csv| # # Raises NoMethodError (undefined method `collect' for :foo:Symbol) # csv << :foo # end # # Raises an exception if the output stream is not opened for writing: # path = 't.csv' # File.write(path, '') # File.open(path) do |file| # CSV.open(file) do |csv| # # Raises IOError (not opened for writing) # csv << ['foo', 0] # end # end def <<(row) writer << row self end alias_method :add_row, :<< alias_method :puts, :<< # :call-seq: # convert(converter_name) -> array_of_procs # convert {|field, field_info| ... } -> array_of_procs # # - With no block, installs a field converter (a \Proc). # - With a block, defines and installs a custom field converter. # - Returns the \Array of installed field converters. # # - Argument +converter_name+, if given, should be the name # of an existing field converter. # # See {Field Converters}[#class-CSV-label-Field+Converters]. # --- # # With no block, installs a field converter: # csv = CSV.new('') # csv.convert(:integer) # csv.convert(:float) # csv.convert(:date) # csv.converters # => [:integer, :float, :date] # # --- # # The block, if given, is called for each field: # - Argument +field+ is the field value. # - Argument +field_info+ is a CSV::FieldInfo object # containing details about the field. # # The examples here assume the prior execution of: # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # # Example giving a block: # csv = CSV.open(path) # csv.convert {|field, field_info| p [field, field_info]; field.upcase } # csv.read # => [["FOO", "0"], ["BAR", "1"], ["BAZ", "2"]] # # Output: # ["foo", #] # ["0", #] # ["bar", #] # ["1", #] # ["baz", #] # ["2", #] # # The block need not return a \String object: # csv = CSV.open(path) # csv.convert {|field, field_info| field.to_sym } # csv.read # => [[:foo, :"0"], [:bar, :"1"], [:baz, :"2"]] # # If +converter_name+ is given, the block is not called: # csv = CSV.open(path) # csv.convert(:integer) {|field, field_info| fail 'Cannot happen' } # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]] # # --- # # Raises a parse-time exception if +converter_name+ is not the name of a built-in # field converter: # csv = CSV.open(path) # csv.convert(:nosuch) => [nil] # # Raises NoMethodError (undefined method `arity' for nil:NilClass) # csv.read def convert(name = nil, &converter) parser_fields_converter.add_converter(name, &converter) end # :call-seq: # header_convert(converter_name) -> array_of_procs # header_convert {|header, field_info| ... } -> array_of_procs # # - With no block, installs a header converter (a \Proc). # - With a block, defines and installs a custom header converter. # - Returns the \Array of installed header converters. # # - Argument +converter_name+, if given, should be the name # of an existing header converter. # # See {Header Converters}[#class-CSV-label-Header+Converters]. # --- # # With no block, installs a header converter: # csv = CSV.new('') # csv.header_convert(:symbol) # csv.header_convert(:downcase) # csv.header_converters # => [:symbol, :downcase] # # --- # # The block, if given, is called for each header: # - Argument +header+ is the header value. # - Argument +field_info+ is a CSV::FieldInfo object # containing details about the header. # # The examples here assume the prior execution of: # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # # Example giving a block: # csv = CSV.open(path, headers: true) # csv.header_convert {|header, field_info| p [header, field_info]; header.upcase } # table = csv.read # table # => # # table.headers # => ["NAME", "VALUE"] # # Output: # ["Name", #] # ["Value", #] # The block need not return a \String object: # csv = CSV.open(path, headers: true) # csv.header_convert {|header, field_info| header.to_sym } # table = csv.read # table.headers # => [:Name, :Value] # # If +converter_name+ is given, the block is not called: # csv = CSV.open(path, headers: true) # csv.header_convert(:downcase) {|header, field_info| fail 'Cannot happen' } # table = csv.read # table.headers # => ["name", "value"] # --- # # Raises a parse-time exception if +converter_name+ is not the name of a built-in # field converter: # csv = CSV.open(path, headers: true) # csv.header_convert(:nosuch) # # Raises NoMethodError (undefined method `arity' for nil:NilClass) # csv.read def header_convert(name = nil, &converter) header_fields_converter.add_converter(name, &converter) end include Enumerable # :call-seq: # csv.each -> enumerator # csv.each {|row| ...} # # Calls the block with each successive row. # The data source must be opened for reading. # # Without headers: # string = "foo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string) # csv.each do |row| # p row # end # Output: # ["foo", "0"] # ["bar", "1"] # ["baz", "2"] # # With headers: # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string, headers: true) # csv.each do |row| # p row # end # Output: # # # # # --- # # Raises an exception if the source is not opened for reading: # string = "foo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string) # csv.close # # Raises IOError (not opened for reading) # csv.each do |row| # p row # end def each(&block) return to_enum(__method__) unless block_given? begin while true yield(parser_enumerator.next) end rescue StopIteration end end # :call-seq: # csv.read -> array or csv_table # # Forms the remaining rows from +self+ into: # - A CSV::Table object, if headers are in use. # - An \Array of Arrays, otherwise. # # The data source must be opened for reading. # # Without headers: # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # csv = CSV.open(path) # csv.read # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # With headers: # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) # csv = CSV.open(path, headers: true) # csv.read # => # # # --- # # Raises an exception if the source is not opened for reading: # string = "foo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string) # csv.close # # Raises IOError (not opened for reading) # csv.read def read rows = to_a if parser.use_headers? Table.new(rows, headers: parser.headers) else rows end end alias_method :readlines, :read # :call-seq: # csv.header_row? -> true or false # # Returns +true+ if the next row to be read is a header row\; # +false+ otherwise. # # Without headers: # string = "foo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string) # csv.header_row? # => false # # With headers: # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string, headers: true) # csv.header_row? # => true # csv.shift # => # # csv.header_row? # => false # # --- # # Raises an exception if the source is not opened for reading: # string = "foo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string) # csv.close # # Raises IOError (not opened for reading) # csv.header_row? def header_row? parser.header_row? end # :call-seq: # csv.shift -> array, csv_row, or nil # # Returns the next row of data as: # - An \Array if no headers are used. # - A CSV::Row object if headers are used. # # The data source must be opened for reading. # # Without headers: # string = "foo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string) # csv.shift # => ["foo", "0"] # csv.shift # => ["bar", "1"] # csv.shift # => ["baz", "2"] # csv.shift # => nil # # With headers: # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string, headers: true) # csv.shift # => # # csv.shift # => # # csv.shift # => # # csv.shift # => nil # # --- # # Raises an exception if the source is not opened for reading: # string = "foo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string) # csv.close # # Raises IOError (not opened for reading) # csv.shift def shift if @eof_error eof_error, @eof_error = @eof_error, nil raise eof_error end begin parser_enumerator.next rescue StopIteration nil end end alias_method :gets, :shift alias_method :readline, :shift # :call-seq: # csv.inspect -> string # # Returns a \String showing certain properties of +self+: # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # csv = CSV.new(string, headers: true) # s = csv.inspect # s # => "#" def inspect str = ["#<", self.class.to_s, " io_type:"] # show type of wrapped IO if @io == $stdout then str << "$stdout" elsif @io == $stdin then str << "$stdin" elsif @io == $stderr then str << "$stderr" else str << @io.class.to_s end # show IO.path(), if available if @io.respond_to?(:path) and (p = @io.path) str << " io_path:" << p.inspect end # show encoding str << " encoding:" << @encoding.name # show other attributes ["lineno", "col_sep", "row_sep", "quote_char"].each do |attr_name| if a = __send__(attr_name) str << " " << attr_name << ":" << a.inspect end end ["skip_blanks", "liberal_parsing"].each do |attr_name| if a = __send__("#{attr_name}?") str << " " << attr_name << ":" << a.inspect end end _headers = headers str << " headers:" << _headers.inspect if _headers str << ">" begin str.join('') rescue # any encoding error str.map do |s| e = Encoding::Converter.asciicompat_encoding(s.encoding) e ? s.encode(e) : s.force_encoding("ASCII-8BIT") end.join('') end end private def determine_encoding(encoding, internal_encoding) # honor the IO encoding if we can, otherwise default to ASCII-8BIT io_encoding = raw_encoding return io_encoding if io_encoding return Encoding.find(internal_encoding) if internal_encoding if encoding encoding, = encoding.split(":", 2) if encoding.is_a?(String) return Encoding.find(encoding) end Encoding.default_internal || Encoding.default_external end def normalize_converters(converters) converters ||= [] unless converters.is_a?(Array) converters = [converters] end converters.collect do |converter| case converter when Proc # custom code block [nil, converter] else # by name [converter, nil] end end end # # Processes +fields+ with @converters, or @header_converters # if +headers+ is passed as +true+, returning the converted field set. Any # converter that changes the field into something other than a String halts # the pipeline of conversion for that field. This is primarily an efficiency # shortcut. # def convert_fields(fields, headers = false) if headers header_fields_converter.convert(fields, nil, 0) else parser_fields_converter.convert(fields, @headers, lineno) end end # # Returns the encoding of the internal IO object. # def raw_encoding if @io.respond_to? :internal_encoding @io.internal_encoding || @io.external_encoding elsif @io.respond_to? :encoding @io.encoding else nil end end def parser_fields_converter @parser_fields_converter ||= build_parser_fields_converter end def build_parser_fields_converter specific_options = { builtin_converters_name: :Converters, } options = @base_fields_converter_options.merge(specific_options) build_fields_converter(@initial_converters, options) end def header_fields_converter @header_fields_converter ||= build_header_fields_converter end def build_header_fields_converter specific_options = { builtin_converters_name: :HeaderConverters, accept_nil: true, } options = @base_fields_converter_options.merge(specific_options) build_fields_converter(@initial_header_converters, options) end def writer_fields_converter @writer_fields_converter ||= build_writer_fields_converter end def build_writer_fields_converter build_fields_converter(@initial_write_converters, @write_fields_converter_options) end def build_fields_converter(initial_converters, options) fields_converter = FieldsConverter.new(options) normalize_converters(initial_converters).each do |name, converter| fields_converter.add_converter(name, &converter) end fields_converter end def parser @parser ||= Parser.new(@io, parser_options) end def parser_options @parser_options.merge(header_fields_converter: header_fields_converter, fields_converter: parser_fields_converter) end def parser_enumerator @parser_enumerator ||= parser.parse end def writer @writer ||= Writer.new(@io, writer_options) end def writer_options @writer_options.merge(header_fields_converter: header_fields_converter, fields_converter: writer_fields_converter) end end # Passes +args+ to CSV::instance. # # CSV("CSV,data").read # #=> [["CSV", "data"]] # # If a block is given, the instance is passed the block and the return value # becomes the return value of the block. # # CSV("CSV,data") { |c| # c.read.any? { |a| a.include?("data") } # } #=> true # # CSV("CSV,data") { |c| # c.read.any? { |a| a.include?("zombies") } # } #=> false # # CSV options may also be given. # # io = StringIO.new # CSV(io, col_sep: ";") { |csv| csv << ["a", "b", "c"] } # # This API is not Ractor-safe. # def CSV(*args, **options, &block) CSV.instance(*args, **options, &block) end require_relative "csv/version" require_relative "csv/core_ext/array" require_relative "csv/core_ext/string" csv-3.3.5/lib/csv/000077500000000000000000000000001501670011600136535ustar00rootroot00000000000000csv-3.3.5/lib/csv/core_ext/000077500000000000000000000000001501670011600154635ustar00rootroot00000000000000csv-3.3.5/lib/csv/core_ext/array.rb000066400000000000000000000003031501670011600171220ustar00rootroot00000000000000class Array # Equivalent to CSV::generate_line(self, options) # # ["CSV", "data"].to_csv # #=> "CSV,data\n" def to_csv(**options) CSV.generate_line(self, **options) end end csv-3.3.5/lib/csv/core_ext/string.rb000066400000000000000000000003021501670011600173110ustar00rootroot00000000000000class String # Equivalent to CSV::parse_line(self, options) # # "CSV,data".parse_csv # #=> ["CSV", "data"] def parse_csv(**options) CSV.parse_line(self, **options) end end csv-3.3.5/lib/csv/fields_converter.rb000066400000000000000000000053051501670011600175400ustar00rootroot00000000000000# frozen_string_literal: true class CSV # Note: Don't use this class directly. This is an internal class. class FieldsConverter include Enumerable NO_QUOTED_FIELDS = [] # :nodoc: def NO_QUOTED_FIELDS.[](_index) false end NO_QUOTED_FIELDS.freeze # # A CSV::FieldsConverter is a data structure for storing the # fields converter properties to be passed as a parameter # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options)) # def initialize(options={}) @converters = [] @nil_value = options[:nil_value] @empty_value = options[:empty_value] @empty_value_is_empty_string = (@empty_value == "") @accept_nil = options[:accept_nil] @builtin_converters_name = options[:builtin_converters_name] @need_static_convert = need_static_convert? end def add_converter(name=nil, &converter) if name.nil? # custom converter @converters << converter else # named converter combo = builtin_converters[name] case combo when Array # combo converter combo.each do |sub_name| add_converter(sub_name) end else # individual named converter @converters << combo end end end def each(&block) @converters.each(&block) end def empty? @converters.empty? end def convert(fields, headers, lineno, quoted_fields=NO_QUOTED_FIELDS) return fields unless need_convert? fields.collect.with_index do |field, index| if field.nil? field = @nil_value elsif field.is_a?(String) and field.empty? field = @empty_value unless @empty_value_is_empty_string end @converters.each do |converter| break if field.nil? and @accept_nil if converter.arity == 1 # straight field converter field = converter[field] else # FieldInfo converter if headers header = headers[index] else header = nil end quoted = quoted_fields[index] field = converter[field, FieldInfo.new(index, lineno, header, quoted)] end break unless field.is_a?(String) # short-circuit pipeline for speed end field # final state of each field, converted or original end end private def need_static_convert? not (@nil_value.nil? and @empty_value_is_empty_string) end def need_convert? @need_static_convert or (not @converters.empty?) end def builtin_converters @builtin_converters ||= ::CSV.const_get(@builtin_converters_name) end end end csv-3.3.5/lib/csv/input_record_separator.rb000066400000000000000000000004251501670011600207560ustar00rootroot00000000000000require "English" require "stringio" class CSV module InputRecordSeparator class << self if RUBY_VERSION >= "3.0.0" def value "\n" end else def value $INPUT_RECORD_SEPARATOR end end end end end csv-3.3.5/lib/csv/parser.rb000066400000000000000000001123621501670011600155010ustar00rootroot00000000000000# frozen_string_literal: true require "strscan" require_relative "input_record_separator" require_relative "row" require_relative "table" class CSV # Note: Don't use this class directly. This is an internal class. class Parser # # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO # or String object being read from or written to. Your data is never transcoded # (unless you ask Ruby to transcode it for you) and will literally be parsed in # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the # Encoding of your data. This is accomplished by transcoding the parser itself # into your Encoding. # class << self ARGF_OBJECT_ID = ARGF.object_id # Convenient method to check whether the give input reached EOF # or not. def eof?(input) # We can't use input != ARGF in Ractor. Because ARGF isn't a # shareable object. input.object_id != ARGF_OBJECT_ID and input.respond_to?(:eof) and input.eof? end end # Raised when encoding is invalid. class InvalidEncoding < StandardError end # Raised when unexpected case is happen. class UnexpectedError < StandardError end # # CSV::Scanner receives a CSV output, scans it and return the content. # It also controls the life cycle of the object with its methods +keep_start+, # +keep_end+, +keep_back+, +keep_drop+. # # Uses StringScanner (the official strscan gem). Strscan provides lexical # scanning operations on a String. We inherit its object and take advantage # on the methods. For more information, please visit: # https://docs.ruby-lang.org/en/master/StringScanner.html # class Scanner < StringScanner alias_method :scan_all, :scan def initialize(*args) super @keeps = [] end def each_line(row_separator) position = pos rest.each_line(row_separator) do |line| position += line.bytesize self.pos = position yield(line) end end def keep_start @keeps.push(pos) end def keep_end start = @keeps.pop string.byteslice(start, pos - start) end def keep_back self.pos = @keeps.pop end def keep_drop @keeps.pop end end # # CSV::InputsScanner receives IO inputs, encoding and the chunk_size. # It also controls the life cycle of the object with its methods +keep_start+, # +keep_end+, +keep_back+, +keep_drop+. # # CSV::InputsScanner.scan() tries to match with pattern at the current position. # If there's a match, the scanner advances the "scan pointer" and returns the matched string. # Otherwise, the scanner returns nil. # # CSV::InputsScanner.rest() returns the "rest" of the string (i.e. everything after the scan pointer). # If there is no more data (eos? = true), it returns "". # class InputsScanner def initialize(inputs, encoding, row_separator, chunk_size: 8192) @inputs = inputs.dup @encoding = encoding @row_separator = row_separator @chunk_size = chunk_size @last_scanner = @inputs.empty? @keeps = [] read_chunk end def each_line(row_separator) return enum_for(__method__, row_separator) unless block_given? buffer = nil input = @scanner.rest position = @scanner.pos offset = 0 n_row_separator_chars = row_separator.size # trace(__method__, :start, input) while true input.each_line(row_separator) do |line| @scanner.pos += line.bytesize if buffer if n_row_separator_chars == 2 and buffer.end_with?(row_separator[0]) and line.start_with?(row_separator[1]) buffer << line[0] line = line[1..-1] position += buffer.bytesize + offset @scanner.pos = position offset = 0 yield(buffer) buffer = nil next if line.empty? else buffer << line line = buffer buffer = nil end end if line.end_with?(row_separator) position += line.bytesize + offset @scanner.pos = position offset = 0 yield(line) else buffer = line end end break unless read_chunk input = @scanner.rest position = @scanner.pos offset = -buffer.bytesize if buffer end yield(buffer) if buffer end def scan(pattern) # trace(__method__, pattern, :start) value = @scanner.scan(pattern) # trace(__method__, pattern, :done, :last, value) if @last_scanner return value if @last_scanner read_chunk if value and @scanner.eos? # trace(__method__, pattern, :done, value) value end def scan_all(pattern) # trace(__method__, pattern, :start) value = @scanner.scan(pattern) # trace(__method__, pattern, :done, :last, value) if @last_scanner return value if @last_scanner # trace(__method__, pattern, :done, :nil) if value.nil? return nil if value.nil? while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) # trace(__method__, pattern, :sub, sub_value) value << sub_value end # trace(__method__, pattern, :done, value) value end def eos? @scanner.eos? end def keep_start # trace(__method__, :start) adjust_last_keep @keeps.push([@scanner, @scanner.pos, nil]) # trace(__method__, :done) end def keep_end # trace(__method__, :start) scanner, start, buffer = @keeps.pop if scanner == @scanner keep = @scanner.string.byteslice(start, @scanner.pos - start) else keep = @scanner.string.byteslice(0, @scanner.pos) end if buffer buffer << keep keep = buffer end # trace(__method__, :done, keep) keep end def keep_back # trace(__method__, :start) scanner, start, buffer = @keeps.pop if buffer # trace(__method__, :rescan, start, buffer) string = @scanner.string if scanner == @scanner keep = string.byteslice(start, string.bytesize - @scanner.pos - start) else keep = string end if keep and not keep.empty? @inputs.unshift(StringIO.new(keep)) @last_scanner = false end @scanner = StringScanner.new(buffer) else if @scanner != scanner message = "scanners are different but no buffer: " message += "#{@scanner.inspect}(#{@scanner.object_id}): " message += "#{scanner.inspect}(#{scanner.object_id})" raise UnexpectedError, message end # trace(__method__, :repos, start, buffer) @scanner.pos = start last_scanner, last_start, last_buffer = @keeps.last # Drop the last buffer when the last buffer is the same data # in the last keep. If we keep it, we have duplicated data # by the next keep_back. if last_scanner == @scanner and last_buffer and last_buffer == last_scanner.string.byteslice(last_start, start) @keeps.last[2] = nil end end read_chunk if @scanner.eos? end def keep_drop _, _, buffer = @keeps.pop # trace(__method__, :done, :empty) unless buffer return unless buffer last_keep = @keeps.last # trace(__method__, :done, :no_last_keep) unless last_keep return unless last_keep if last_keep[2] last_keep[2] << buffer else last_keep[2] = buffer end # trace(__method__, :done) end def rest @scanner.rest end def check(pattern) @scanner.check(pattern) end private def trace(*args) pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps]) end def adjust_last_keep # trace(__method__, :start) keep = @keeps.last # trace(__method__, :done, :empty) if keep.nil? return if keep.nil? scanner, start, buffer = keep string = @scanner.string if @scanner != scanner start = 0 end if start == 0 and @scanner.eos? keep_data = string else keep_data = string.byteslice(start, @scanner.pos - start) end if keep_data if buffer buffer << keep_data else keep[2] = keep_data.dup end end # trace(__method__, :done) end def read_chunk return false if @last_scanner adjust_last_keep input = @inputs.first case input when StringIO string = input.read raise InvalidEncoding unless string.valid_encoding? # trace(__method__, :stringio, string) @scanner = StringScanner.new(string) @inputs.shift @last_scanner = @inputs.empty? true else chunk = input.gets(@row_separator, @chunk_size) if chunk raise InvalidEncoding unless chunk.valid_encoding? # trace(__method__, :chunk, chunk) @scanner = StringScanner.new(chunk) if Parser.eof?(input) @inputs.shift @last_scanner = @inputs.empty? end true else # trace(__method__, :no_chunk) @scanner = StringScanner.new("".encode(@encoding)) @inputs.shift @last_scanner = @inputs.empty? if @last_scanner false else read_chunk end end end end end def initialize(input, options) @input = input @options = options @samples = [] prepare end def column_separator @column_separator end def row_separator @row_separator end def quote_character @quote_character end def field_size_limit @max_field_size&.succ end def max_field_size @max_field_size end def skip_lines @skip_lines end def unconverted_fields? @unconverted_fields end def headers @headers end def header_row? @use_headers and @headers.nil? end def return_headers? @return_headers end def skip_blanks? @skip_blanks end def liberal_parsing? @liberal_parsing end def lineno @lineno end def line last_line end def parse(&block) return to_enum(__method__) unless block_given? if @return_headers and @headers and @raw_headers headers = Row.new(@headers, @raw_headers, true) if @unconverted_fields headers = add_unconverted_fields(headers, []) end yield headers end begin @scanner ||= build_scanner __send__(@parse_method, &block) rescue InvalidEncoding if @scanner ignore_broken_line lineno = @lineno else lineno = @lineno + 1 end raise InvalidEncodingError.new(@encoding, lineno) rescue UnexpectedError => error if @scanner ignore_broken_line lineno = @lineno else lineno = @lineno + 1 end message = "This should not be happen: #{error.message}: " message += "Please report this to https://github.com/ruby/csv/issues" raise MalformedCSVError.new(message, lineno) end end def use_headers? @use_headers end private # A set of tasks to prepare the file in order to parse it def prepare prepare_variable prepare_quote_character prepare_backslash prepare_skip_lines prepare_strip prepare_separators validate_strip_and_col_sep_options prepare_quoted prepare_unquoted prepare_line prepare_header prepare_parser end def prepare_variable @encoding = @options[:encoding] liberal_parsing = @options[:liberal_parsing] if liberal_parsing @liberal_parsing = true if liberal_parsing.is_a?(Hash) @double_quote_outside_quote = liberal_parsing[:double_quote_outside_quote] @backslash_quote = liberal_parsing[:backslash_quote] else @double_quote_outside_quote = false @backslash_quote = false end else @liberal_parsing = false @backslash_quote = false end @unconverted_fields = @options[:unconverted_fields] @max_field_size = @options[:max_field_size] @skip_blanks = @options[:skip_blanks] @fields_converter = @options[:fields_converter] @header_fields_converter = @options[:header_fields_converter] end def prepare_quote_character @quote_character = @options[:quote_character] if @quote_character.nil? @escaped_quote_character = nil @escaped_quote = nil else @quote_character = @quote_character.to_s.encode(@encoding) if @quote_character.length != 1 message = ":quote_char has to be nil or a single character String" raise ArgumentError, message end @escaped_quote_character = Regexp.escape(@quote_character) @escaped_quote = Regexp.new(@escaped_quote_character) end end def prepare_backslash return unless @backslash_quote @backslash_character = "\\".encode(@encoding) @escaped_backslash_character = Regexp.escape(@backslash_character) @escaped_backslash = Regexp.new(@escaped_backslash_character) if @quote_character.nil? @backslash_quote_character = nil else @backslash_quote_character = @backslash_character + @escaped_quote_character end end def prepare_skip_lines skip_lines = @options[:skip_lines] case skip_lines when String @skip_lines = skip_lines.encode(@encoding) when Regexp, nil @skip_lines = skip_lines else unless skip_lines.respond_to?(:match) message = ":skip_lines has to respond to \#match: #{skip_lines.inspect}" raise ArgumentError, message end @skip_lines = skip_lines end end def prepare_strip @strip = @options[:strip] @escaped_strip = nil @strip_value = nil @rstrip_value = nil if @strip.is_a?(String) case @strip.length when 0 raise ArgumentError, ":strip must not be an empty String" when 1 # ok else raise ArgumentError, ":strip doesn't support 2 or more characters yet" end @strip = @strip.encode(@encoding) @escaped_strip = Regexp.escape(@strip) if @quote_character @strip_value = Regexp.new(@escaped_strip + "+".encode(@encoding)) @rstrip_value = Regexp.new(@escaped_strip + "+\\z".encode(@encoding)) end elsif @strip strip_values = " \t\f\v" @escaped_strip = strip_values.encode(@encoding) if @quote_character @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding)) @rstrip_value = Regexp.new("[#{strip_values}]+\\z".encode(@encoding)) end end end begin StringScanner.new("x").scan("x") rescue TypeError STRING_SCANNER_SCAN_ACCEPT_STRING = false else STRING_SCANNER_SCAN_ACCEPT_STRING = true end def prepare_separators column_separator = @options[:column_separator] @column_separator = column_separator.to_s.encode(@encoding) if @column_separator.size < 1 message = ":col_sep must be 1 or more characters: " message += column_separator.inspect raise ArgumentError, message end @row_separator = resolve_row_separator(@options[:row_separator]).encode(@encoding) @escaped_column_separator = Regexp.escape(@column_separator) @escaped_first_column_separator = Regexp.escape(@column_separator[0]) if @column_separator.size > 1 @column_end = Regexp.new(@escaped_column_separator) @column_ends = @column_separator.each_char.collect do |char| Regexp.new(Regexp.escape(char)) end @first_column_separators = Regexp.new(@escaped_first_column_separator + "+".encode(@encoding)) else if STRING_SCANNER_SCAN_ACCEPT_STRING @column_end = @column_separator else @column_end = Regexp.new(@escaped_column_separator) end @column_ends = nil @first_column_separators = nil end escaped_row_separator = Regexp.escape(@row_separator) @row_end = Regexp.new(escaped_row_separator) if @row_separator.size > 1 @row_ends = @row_separator.each_char.collect do |char| Regexp.new(Regexp.escape(char)) end else @row_ends = nil end @cr = "\r".encode(@encoding) @lf = "\n".encode(@encoding) @line_end = Regexp.new("\r\n|\n|\r".encode(@encoding)) @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding)) end # This method verifies that there are no (obvious) ambiguities with the # provided +col_sep+ and +strip+ parsing options. For example, if +col_sep+ # and +strip+ were both equal to +\t+, then there would be no clear way to # parse the input. def validate_strip_and_col_sep_options return unless @strip if @strip.is_a?(String) if @column_separator.start_with?(@strip) || @column_separator.end_with?(@strip) raise ArgumentError, "The provided strip (#{@escaped_strip}) and " \ "col_sep (#{@escaped_column_separator}) options are incompatible." end else if Regexp.new("\\A[#{@escaped_strip}]|[#{@escaped_strip}]\\z").match?(@column_separator) raise ArgumentError, "The provided strip (true) and " \ "col_sep (#{@escaped_column_separator}) options are incompatible." end end end def prepare_quoted if @quote_character @quotes = Regexp.new(@escaped_quote_character + "+".encode(@encoding)) no_quoted_values = @escaped_quote_character.dup if @backslash_quote no_quoted_values << @escaped_backslash_character end @quoted_value = Regexp.new("[^".encode(@encoding) + no_quoted_values + "]+".encode(@encoding)) end if @escaped_strip @split_column_separator = Regexp.new(@escaped_strip + "*".encode(@encoding) + @escaped_column_separator + @escaped_strip + "*".encode(@encoding)) else if @column_separator == " ".encode(@encoding) @split_column_separator = Regexp.new(@escaped_column_separator) else @split_column_separator = @column_separator end end end def prepare_unquoted return if @quote_character.nil? no_unquoted_values = "\r\n".encode(@encoding) no_unquoted_values << @escaped_first_column_separator unless @liberal_parsing no_unquoted_values << @escaped_quote_character end @unquoted_value = Regexp.new("[^".encode(@encoding) + no_unquoted_values + "]+".encode(@encoding)) end def resolve_row_separator(separator) if separator == :auto cr = "\r".encode(@encoding) lf = "\n".encode(@encoding) if @input.is_a?(StringIO) pos = @input.pos separator = detect_row_separator(@input.read, cr, lf) @input.seek(pos) elsif @input.respond_to?(:gets) if @input.is_a?(File) chunk_size = 32 * 1024 else chunk_size = 1024 end begin while separator == :auto # # if we run out of data, it's probably a single line # (ensure will set default value) # break unless sample = @input.gets(nil, chunk_size) # extend sample if we're unsure of the line ending if sample.end_with?(cr) sample << (@input.gets(nil, 1) || "") end @samples << sample separator = detect_row_separator(sample, cr, lf) end rescue IOError # do nothing: ensure will set default end end separator = InputRecordSeparator.value if separator == :auto end separator.to_s.encode(@encoding) end def detect_row_separator(sample, cr, lf) lf_index = sample.index(lf) if lf_index cr_index = sample[0, lf_index].index(cr) else cr_index = sample.index(cr) end if cr_index and lf_index if cr_index + 1 == lf_index cr + lf elsif cr_index < lf_index cr else lf end elsif cr_index cr elsif lf_index lf else :auto end end def prepare_line @lineno = 0 @last_line = nil @scanner = nil end def last_line if @scanner @last_line ||= @scanner.keep_end else @last_line end end def prepare_header @return_headers = @options[:return_headers] headers = @options[:headers] case headers when Array @raw_headers = headers quoted_fields = FieldsConverter::NO_QUOTED_FIELDS @use_headers = true when String @raw_headers, quoted_fields = parse_headers(headers) @use_headers = true when nil, false @raw_headers = nil @use_headers = false else @raw_headers = nil @use_headers = true end if @raw_headers @headers = adjust_headers(@raw_headers, quoted_fields) else @headers = nil end end def parse_headers(row) quoted_fields = [] converter = lambda do |field, info| quoted_fields << info.quoted? field end headers = CSV.parse_line(row, col_sep: @column_separator, row_sep: @row_separator, quote_char: @quote_character, converters: [converter]) [headers, quoted_fields] end def adjust_headers(headers, quoted_fields) adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields) adjusted_headers.each {|h| h.freeze if h.is_a? String} adjusted_headers end def prepare_parser @may_quoted = may_quoted? if @quote_character.nil? @parse_method = :parse_no_quote elsif @liberal_parsing or @strip @parse_method = :parse_quotable_robust else @parse_method = :parse_quotable_loose end end def may_quoted? return false if @quote_character.nil? if @input.is_a?(StringIO) pos = @input.pos sample = @input.read @input.seek(pos) else return false if @samples.empty? sample = @samples.first end sample[0, 128].index(@quote_character) end class UnoptimizedStringIO # :nodoc: def initialize(string) @io = StringIO.new(string, "rb:#{string.encoding}") end def gets(*args) @io.gets(*args) end def each_line(*args, &block) @io.each_line(*args, &block) end def eof? @io.eof? end end SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") if SCANNER_TEST SCANNER_TEST_CHUNK_SIZE_NAME = "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE" SCANNER_TEST_CHUNK_SIZE_VALUE = ENV[SCANNER_TEST_CHUNK_SIZE_NAME] def build_scanner inputs = @samples.collect do |sample| UnoptimizedStringIO.new(sample) end if @input.is_a?(StringIO) inputs << UnoptimizedStringIO.new(@input.read) else inputs << @input end begin chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME] rescue # Ractor::IsolationError # Ractor on Ruby 3.0 can't read ENV value. chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE end chunk_size = Integer((chunk_size_value || "1"), 10) InputsScanner.new(inputs, @encoding, @row_separator, chunk_size: chunk_size) end else def build_scanner string = nil if @samples.empty? and @input.is_a?(StringIO) string = @input.read elsif @samples.size == 1 and Parser.eof?(@input) string = @samples[0] end if string unless string.valid_encoding? index = string.lines(@row_separator).index do |line| !line.valid_encoding? end if index raise InvalidEncodingError.new(@encoding, @lineno + index + 1) end end Scanner.new(string) else inputs = @samples.collect do |sample| StringIO.new(sample) end inputs << @input InputsScanner.new(inputs, @encoding, @row_separator) end end end def skip_needless_lines return unless @skip_lines until @scanner.eos? @scanner.keep_start line = @scanner.scan_all(@not_line_end) || "".encode(@encoding) line << @row_separator if parse_row_end if skip_line?(line) @lineno += 1 @scanner.keep_drop else @scanner.keep_back return end end end def skip_line?(line) line = line.delete_suffix(@row_separator) case @skip_lines when String line.include?(@skip_lines) when Regexp @skip_lines.match?(line) else @skip_lines.match(line) end end def validate_field_size(field) return unless @max_field_size return if field.size <= @max_field_size ignore_broken_line message = "Field size exceeded: #{field.size} > #{@max_field_size}" raise MalformedCSVError.new(message, @lineno) end def parse_no_quote(&block) @scanner.each_line(@row_separator) do |line| next if @skip_lines and skip_line?(line) original_line = line line = line.delete_suffix(@row_separator) if line.empty? next if @skip_blanks row = [] else line = strip_value(line) row = line.split(@split_column_separator, -1) if @max_field_size row.each do |column| validate_field_size(column) end end n_columns = row.size i = 0 while i < n_columns row[i] = nil if row[i].empty? i += 1 end end @last_line = original_line emit_row(row, &block) end end def parse_quotable_loose(&block) @scanner.keep_start @scanner.each_line(@row_separator) do |line| if @skip_lines and skip_line?(line) @scanner.keep_drop @scanner.keep_start next end original_line = line line = line.delete_suffix(@row_separator) if line.empty? if @skip_blanks @scanner.keep_drop @scanner.keep_start next end row = [] quoted_fields = FieldsConverter::NO_QUOTED_FIELDS elsif line.include?(@cr) or line.include?(@lf) @scanner.keep_back @parse_method = :parse_quotable_robust return parse_quotable_robust(&block) else row = line.split(@split_column_separator, -1) quoted_fields = [] n_columns = row.size i = 0 while i < n_columns column = row[i] if column.empty? quoted_fields << false row[i] = nil else n_quotes = column.count(@quote_character) if n_quotes.zero? quoted_fields << false # no quote elsif n_quotes == 2 and column.start_with?(@quote_character) and column.end_with?(@quote_character) quoted_fields << true row[i] = column[1..-2] else @scanner.keep_back @parse_method = :parse_quotable_robust return parse_quotable_robust(&block) end validate_field_size(row[i]) end i += 1 end end @scanner.keep_drop @scanner.keep_start @last_line = original_line emit_row(row, quoted_fields, &block) end @scanner.keep_drop end def parse_quotable_robust(&block) row = [] quoted_fields = [] skip_needless_lines start_row while true @quoted_column_value = false @unquoted_column_value = false @scanner.scan_all(@strip_value) if @strip_value value = parse_column_value if value @scanner.scan_all(@strip_value) if @strip_value validate_field_size(value) end if parse_column_end row << value quoted_fields << @quoted_column_value elsif parse_row_end if row.empty? and value.nil? emit_row([], &block) unless @skip_blanks else row << value quoted_fields << @quoted_column_value emit_row(row, quoted_fields, &block) row = [] quoted_fields.clear end skip_needless_lines start_row elsif @scanner.eos? break if row.empty? and value.nil? row << value quoted_fields << @quoted_column_value emit_row(row, quoted_fields, &block) break else if @quoted_column_value if liberal_parsing? and (new_line = @scanner.check(@line_end)) message = "Illegal end-of-line sequence outside of a quoted field " + "<#{new_line.inspect}>" else message = "Any value after quoted field isn't allowed" end ignore_broken_line raise MalformedCSVError.new(message, @lineno) elsif @unquoted_column_value and (new_line = @scanner.scan(@line_end)) ignore_broken_line message = "Unquoted fields do not allow new line " + "<#{new_line.inspect}>" raise MalformedCSVError.new(message, @lineno) elsif @scanner.rest.start_with?(@quote_character) ignore_broken_line message = "Illegal quoting" raise MalformedCSVError.new(message, @lineno) elsif (new_line = @scanner.scan(@line_end)) ignore_broken_line message = "New line must be <#{@row_separator.inspect}> " + "not <#{new_line.inspect}>" raise MalformedCSVError.new(message, @lineno) else ignore_broken_line raise MalformedCSVError.new("TODO: Meaningful message", @lineno) end end end end def parse_column_value if @liberal_parsing quoted_value = parse_quoted_column_value if quoted_value @scanner.scan_all(@strip_value) if @strip_value unquoted_value = parse_unquoted_column_value if unquoted_value if @double_quote_outside_quote unquoted_value = unquoted_value.gsub(@quote_character * 2, @quote_character) if quoted_value.empty? # %Q{""...} case return @quote_character + unquoted_value end end @quote_character + quoted_value + @quote_character + unquoted_value else quoted_value end else parse_unquoted_column_value end elsif @may_quoted parse_quoted_column_value || parse_unquoted_column_value else parse_unquoted_column_value || parse_quoted_column_value end end def parse_unquoted_column_value value = @scanner.scan_all(@unquoted_value) return nil unless value @unquoted_column_value = true if @first_column_separators while true @scanner.keep_start is_column_end = @column_ends.all? do |column_end| @scanner.scan(column_end) end @scanner.keep_back break if is_column_end sub_separator = @scanner.scan_all(@first_column_separators) break if sub_separator.nil? value << sub_separator sub_value = @scanner.scan_all(@unquoted_value) break if sub_value.nil? value << sub_value end end value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote if @rstrip_value value.gsub!(@rstrip_value, "") end value end def parse_quoted_column_value quotes = @scanner.scan_all(@quotes) return nil unless quotes @quoted_column_value = true n_quotes = quotes.size if (n_quotes % 2).zero? quotes[0, (n_quotes - 2) / 2] else value = quotes[0, n_quotes / 2] while true quoted_value = @scanner.scan_all(@quoted_value) value << quoted_value if quoted_value if @backslash_quote if @scanner.scan(@escaped_backslash) if @scanner.scan(@escaped_quote) value << @quote_character else value << @backslash_character end next end end quotes = @scanner.scan_all(@quotes) unless quotes ignore_broken_line message = "Unclosed quoted field" raise MalformedCSVError.new(message, @lineno) end n_quotes = quotes.size if n_quotes == 1 break else value << quotes[0, n_quotes / 2] break if (n_quotes % 2) == 1 end end value end end def parse_column_end return true if @scanner.scan(@column_end) return false unless @column_ends @scanner.keep_start if @column_ends.all? {|column_end| @scanner.scan(column_end)} @scanner.keep_drop true else @scanner.keep_back false end end def parse_row_end return true if @scanner.scan(@row_end) return false unless @row_ends @scanner.keep_start if @row_ends.all? {|row_end| @scanner.scan(row_end)} @scanner.keep_drop true else @scanner.keep_back false end end def strip_value(value) return value unless @strip return value if value.nil? case @strip when String while value.delete_prefix!(@strip) # do nothing end while value.delete_suffix!(@strip) # do nothing end else value.strip! end value end def ignore_broken_line @scanner.scan_all(@not_line_end) @scanner.scan_all(@line_end) @lineno += 1 end def start_row if @last_line @last_line = nil else @scanner.keep_drop end @scanner.keep_start end def emit_row(row, quoted_fields=FieldsConverter::NO_QUOTED_FIELDS, &block) @lineno += 1 raw_row = row if @use_headers if @headers.nil? @headers = adjust_headers(row, quoted_fields) return unless @return_headers row = Row.new(@headers, row, true) else row = Row.new(@headers, @fields_converter.convert(raw_row, @headers, @lineno, quoted_fields)) end else # convert fields, if needed... row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields) end # inject unconverted fields and accessor, if requested... if @unconverted_fields and not row.respond_to?(:unconverted_fields) add_unconverted_fields(row, raw_row) end yield(row) end # This method injects an instance variable unconverted_fields into # +row+ and an accessor method for +row+ called unconverted_fields(). The # variable is set to the contents of +fields+. def add_unconverted_fields(row, fields) class << row attr_reader :unconverted_fields end row.instance_variable_set(:@unconverted_fields, fields) row end end end csv-3.3.5/lib/csv/row.rb000066400000000000000000000602771501670011600150230ustar00rootroot00000000000000# frozen_string_literal: true require "forwardable" class CSV # = \CSV::Row # A \CSV::Row instance represents a \CSV table row. # (see {class CSV}[../CSV.html]). # # The instance may have: # - Fields: each is an object, not necessarily a \String. # - Headers: each serves a key, and also need not be a \String. # # === Instance Methods # # \CSV::Row has three groups of instance methods: # - Its own internally defined instance methods. # - Methods included by module Enumerable. # - Methods delegated to class Array.: # * Array#empty? # * Array#length # * Array#size # # == Creating a \CSV::Row Instance # # Commonly, a new \CSV::Row instance is created by parsing \CSV source # that has headers: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.each {|row| p row } # Output: # # # # # # # # You can also create a row directly. See ::new. # # == Headers # # Like a \CSV::Table, a \CSV::Row has headers. # # A \CSV::Row that was created by parsing \CSV source # inherits its headers from the table: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # row = table.first # row.headers # => ["Name", "Value"] # # You can also create a new row with headers; # like the keys in a \Hash, the headers need not be Strings: # row = CSV::Row.new([:name, :value], ['foo', 0]) # row.headers # => [:name, :value] # # The new row retains its headers even if added to a table # that has headers: # table << row # => # # row.headers # => [:name, :value] # row[:name] # => "foo" # row['Name'] # => nil # # # # == Accessing Fields # # You may access a field in a \CSV::Row with either its \Integer index # (\Array-style) or its header (\Hash-style). # # Fetch a field using method #[]: # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) # row[1] # => 0 # row['Value'] # => 0 # # Set a field using method #[]=: # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) # row # => # # row[0] = 'bar' # row['Value'] = 1 # row # => # # class Row # :call-seq: # CSV::Row.new(headers, fields, header_row = false) -> csv_row # # Returns the new \CSV::Row instance constructed from # arguments +headers+ and +fields+; both should be Arrays; # note that the fields need not be Strings: # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) # row # => # # # If the \Array lengths are different, the shorter is +nil+-filled: # row = CSV::Row.new(['Name', 'Value', 'Date', 'Size'], ['foo', 0]) # row # => # # # Each \CSV::Row object is either a field row or a header row; # by default, a new row is a field row; for the row created above: # row.field_row? # => true # row.header_row? # => false # # If the optional argument +header_row+ is given as +true+, # the created row is a header row: # row = CSV::Row.new(['Name', 'Value'], ['foo', 0], header_row = true) # row # => # # row.field_row? # => false # row.header_row? # => true def initialize(headers, fields, header_row = false) @header_row = header_row headers.each { |h| h.freeze if h.is_a? String } # handle extra headers or fields @row = if headers.size >= fields.size headers.zip(fields) else fields.zip(headers).each(&:reverse!) end end # Internal data format used to compare equality. attr_reader :row protected :row ### Array Delegation ### extend Forwardable def_delegators :@row, :empty?, :length, :size # :call-seq: # row.initialize_copy(other_row) -> self # # Calls superclass method. def initialize_copy(other) super_return_value = super @row = @row.collect(&:dup) super_return_value end # :call-seq: # row.header_row? -> true or false # # Returns +true+ if this is a header row, +false+ otherwise. def header_row? @header_row end # :call-seq: # row.field_row? -> true or false # # Returns +true+ if this is a field row, +false+ otherwise. def field_row? not header_row? end # :call-seq: # row.headers -> array_of_headers # # Returns the headers for this row: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # row = table.first # row.headers # => ["Name", "Value"] def headers @row.map(&:first) end # :call-seq: # field(index) -> value # field(header) -> value # field(header, offset) -> value # # Returns the field value for the given +index+ or +header+. # # --- # # Fetch field value by \Integer index: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.field(0) # => "foo" # row.field(1) # => "bar" # # Counts backward from the last column if +index+ is negative: # row.field(-1) # => "0" # row.field(-2) # => "foo" # # Returns +nil+ if +index+ is out of range: # row.field(2) # => nil # row.field(-3) # => nil # # --- # # Fetch field value by header (first found): # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.field('Name') # => "Foo" # # Fetch field value by header, ignoring +offset+ leading fields: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.field('Name', 2) # => "Baz" # # Returns +nil+ if the header does not exist. def field(header_or_index, minimum_index = 0) # locate the pair finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc pair = @row[minimum_index..-1].public_send(finder, header_or_index) # return the field if we have a pair if pair.nil? nil else header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last end end alias_method :[], :field # # :call-seq: # fetch(header) -> value # fetch(header, default) -> value # fetch(header) {|row| ... } -> value # # Returns the field value as specified by +header+. # # --- # # With the single argument +header+, returns the field value # for that header (first found): # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.fetch('Name') # => "Foo" # # Raises exception +KeyError+ if the header does not exist. # # --- # # With arguments +header+ and +default+ given, # returns the field value for the header (first found) # if the header exists, otherwise returns +default+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.fetch('Name', '') # => "Foo" # row.fetch(:nosuch, '') # => "" # # --- # # With argument +header+ and a block given, # returns the field value for the header (first found) # if the header exists; otherwise calls the block # and returns its return value: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.fetch('Name') {|header| fail 'Cannot happen' } # => "Foo" # row.fetch(:nosuch) {|header| "Header '#{header} not found'" } # => "Header 'nosuch not found'" def fetch(header, *varargs) raise ArgumentError, "Too many arguments" if varargs.length > 1 pair = @row.assoc(header) if pair pair.last else if block_given? yield header elsif varargs.empty? raise KeyError, "key not found: #{header}" else varargs.first end end end # :call-seq: # row.has_key?(header) -> true or false # # Returns +true+ if there is a field with the given +header+, # +false+ otherwise. def has_key?(header) !!@row.assoc(header) end alias_method :include?, :has_key? alias_method :key?, :has_key? alias_method :member?, :has_key? alias_method :header?, :has_key? # # :call-seq: # row[index] = value -> value # row[header, offset] = value -> value # row[header] = value -> value # # Assigns the field value for the given +index+ or +header+; # returns +value+. # # --- # # Assign field value by \Integer index: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # row = table[0] # row[0] = 'Bat' # row[1] = 3 # row # => # # # Counts backward from the last column if +index+ is negative: # row[-1] = 4 # row[-2] = 'Bam' # row # => # # # Extends the row with nil:nil if positive +index+ is not in the row: # row[4] = 5 # row # => # # # Raises IndexError if negative +index+ is too small (too far from zero). # # --- # # Assign field value by header (first found): # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row['Name'] = 'Bat' # row # => # # # Assign field value by header, ignoring +offset+ leading fields: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row['Name', 2] = 4 # row # => # # # Append new field by (new) header: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # row = table[0] # row['New'] = 6 # row# => # def []=(*args) value = args.pop if args.first.is_a? Integer if @row[args.first].nil? # extending past the end with index @row[args.first] = [nil, value] @row.map! { |pair| pair.nil? ? [nil, nil] : pair } else # normal index assignment @row[args.first][1] = value end else index = index(*args) if index.nil? # appending a field self << [args.first, value] else # normal header assignment @row[index][1] = value end end end # # :call-seq: # row << [header, value] -> self # row << hash -> self # row << value -> self # # Adds a field to +self+; returns +self+: # # If the argument is a 2-element \Array [header, value], # a field is added with the given +header+ and +value+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row << ['NAME', 'Bat'] # row # => # # # If the argument is a \Hash, each key-value pair is added # as a field with header +key+ and value +value+. # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row << {NAME: 'Bat', name: 'Bam'} # row # => # # # Otherwise, the given +value+ is added as a field with no header. # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row << 'Bag' # row # => # def <<(arg) if arg.is_a?(Array) and arg.size == 2 # appending a header and name @row << arg elsif arg.is_a?(Hash) # append header and name pairs arg.each { |pair| @row << pair } else # append field value @row << [nil, arg] end self # for chaining end # :call-seq: # row.push(*values) -> self # # Appends each of the given +values+ to +self+ as a field; returns +self+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.push('Bat', 'Bam') # row # => # def push(*args) args.each { |arg| self << arg } self # for chaining end # # :call-seq: # delete(index) -> [header, value] or nil # delete(header) -> [header, value] or empty_array # delete(header, offset) -> [header, value] or empty_array # # Removes a specified field from +self+; returns the 2-element \Array # [header, value] if the field exists. # # If an \Integer argument +index+ is given, # removes and returns the field at offset +index+, # or returns +nil+ if the field does not exist: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.delete(1) # => ["Name", "Bar"] # row.delete(50) # => nil # # Otherwise, if the single argument +header+ is given, # removes and returns the first-found field with the given header, # of returns a new empty \Array if the field does not exist: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.delete('Name') # => ["Name", "Foo"] # row.delete('NAME') # => [] # # If argument +header+ and \Integer argument +offset+ are given, # removes and returns the first-found field with the given header # whose +index+ is at least as large as +offset+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.delete('Name', 1) # => ["Name", "Bar"] # row.delete('NAME', 1) # => [] def delete(header_or_index, minimum_index = 0) if header_or_index.is_a? Integer # by index @row.delete_at(header_or_index) elsif i = index(header_or_index, minimum_index) # by header @row.delete_at(i) else [ ] end end # :call-seq: # row.delete_if {|header, value| ... } -> self # # Removes fields from +self+ as selected by the block; returns +self+. # # Removes each field for which the block returns a truthy value: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.delete_if {|header, value| value.start_with?('B') } # => true # row # => # # row.delete_if {|header, value| header.start_with?('B') } # => false # # If no block is given, returns a new Enumerator: # row.delete_if # => #:delete_if> def delete_if(&block) return enum_for(__method__) { size } unless block_given? @row.delete_if(&block) self # for chaining end # :call-seq: # self.fields(*specifiers) -> array_of_fields # # Returns field values per the given +specifiers+, which may be any mixture of: # - \Integer index. # - \Range of \Integer indexes. # - 2-element \Array containing a header and offset. # - Header. # - \Range of headers. # # For +specifier+ in one of the first four cases above, # returns the result of self.field(specifier); see #field. # # Although there may be any number of +specifiers+, # the examples here will illustrate one at a time. # # When the specifier is an \Integer +index+, # returns self.field(index)L # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.fields(1) # => ["Bar"] # # When the specifier is a \Range of \Integers +range+, # returns self.field(range): # row.fields(1..2) # => ["Bar", "Baz"] # # When the specifier is a 2-element \Array +array+, # returns self.field(array)L # row.fields('Name', 1) # => ["Foo", "Bar"] # # When the specifier is a header +header+, # returns self.field(header)L # row.fields('Name') # => ["Foo"] # # When the specifier is a \Range of headers +range+, # forms a new \Range +new_range+ from the indexes of # range.start and range.end, # and returns self.field(new_range): # source = "Name,NAME,name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.fields('Name'..'NAME') # => ["Foo", "Bar"] # # Returns all fields if no argument given: # row.fields # => ["Foo", "Bar", "Baz"] def fields(*headers_and_or_indices) if headers_and_or_indices.empty? # return all fields--no arguments @row.map(&:last) else # or work like values_at() all = [] headers_and_or_indices.each do |h_or_i| if h_or_i.is_a? Range index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin : index(h_or_i.begin) index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end : index(h_or_i.end) new_range = h_or_i.exclude_end? ? (index_begin...index_end) : (index_begin..index_end) all.concat(fields.values_at(new_range)) else all << field(*Array(h_or_i)) end end return all end end alias_method :values_at, :fields # :call-seq: # index(header) -> index # index(header, offset) -> index # # Returns the index for the given header, if it exists; # otherwise returns +nil+. # # With the single argument +header+, returns the index # of the first-found field with the given +header+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.index('Name') # => 0 # row.index('NAME') # => nil # # With arguments +header+ and +offset+, # returns the index of the first-found field with given +header+, # but ignoring the first +offset+ fields: # row.index('Name', 1) # => 1 # row.index('Name', 3) # => nil def index(header, minimum_index = 0) # find the pair index = headers[minimum_index..-1].index(header) # return the index at the right offset, if we found one index.nil? ? nil : index + minimum_index end # :call-seq: # row.field?(value) -> true or false # # Returns +true+ if +value+ is a field in this row, +false+ otherwise: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.field?('Bar') # => true # row.field?('BAR') # => false def field?(data) fields.include? data end include Enumerable # :call-seq: # row.each {|header, value| ... } -> self # # Calls the block with each header-value pair; returns +self+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.each {|header, value| p [header, value] } # Output: # ["Name", "Foo"] # ["Name", "Bar"] # ["Name", "Baz"] # # If no block is given, returns a new Enumerator: # row.each # => #:each> def each(&block) return enum_for(__method__) { size } unless block_given? @row.each(&block) self # for chaining end alias_method :each_pair, :each # :call-seq: # row == other -> true or false # # Returns +true+ if +other+ is a /CSV::Row that has the same # fields (headers and values) in the same order as +self+; # otherwise returns +false+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # other_row = table[0] # row == other_row # => true # other_row = table[1] # row == other_row # => false def ==(other) return @row == other.row if other.is_a? CSV::Row @row == other end # :call-seq: # row.to_h -> hash # # Returns the new \Hash formed by adding each header-value pair in +self+ # as a key-value pair in the \Hash. # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.to_h # => {"Name"=>"foo", "Value"=>"0"} # # Header order is preserved, but repeated headers are ignored: # source = "Name,Name,Name\nFoo,Bar,Baz\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.to_h # => {"Name"=>"Foo"} def to_h hash = {} each do |key, _value| hash[key] = self[key] unless hash.key?(key) end hash end alias_method :to_hash, :to_h # :call-seq: # row.deconstruct_keys(keys) -> hash # # Returns the new \Hash suitable for pattern matching containing only the # keys specified as an argument. def deconstruct_keys(keys) if keys.nil? to_h else keys.to_h { |key| [key, self[key]] } end end alias_method :to_ary, :to_a # :call-seq: # row.deconstruct -> array # # Returns the new \Array suitable for pattern matching containing the values # of the row. def deconstruct fields end # :call-seq: # row.to_csv -> csv_string # # Returns the row as a \CSV String. Headers are not included: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.to_csv # => "foo,0\n" def to_csv(**options) fields.to_csv(**options) end alias_method :to_s, :to_csv # :call-seq: # row.dig(index_or_header, *identifiers) -> object # # Finds and returns the object in nested object that is specified # by +index_or_header+ and +specifiers+. # # The nested objects may be instances of various classes. # See {Dig Methods}[rdoc-ref:dig_methods.rdoc]. # # Examples: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.dig(1) # => "0" # row.dig('Value') # => "0" # row.dig(5) # => nil def dig(index_or_header, *indexes) value = field(index_or_header) if value.nil? nil elsif indexes.empty? value else unless value.respond_to?(:dig) raise TypeError, "#{value.class} does not have \#dig method" end value.dig(*indexes) end end # :call-seq: # row.inspect -> string # # Returns an ASCII-compatible \String showing: # - Class \CSV::Row. # - Header-value pairs. # Example: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # row = table[0] # row.inspect # => "#" def inspect str = ["#<", self.class.to_s] each do |header, field| str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) << ":" << field.inspect end str << ">" begin str.join('') rescue # any encoding error str.map do |s| e = Encoding::Converter.asciicompat_encoding(s.encoding) e ? s.encode(e) : s.force_encoding("ASCII-8BIT") end.join('') end end end end csv-3.3.5/lib/csv/table.rb000066400000000000000000001124201501670011600152670ustar00rootroot00000000000000# frozen_string_literal: true require "forwardable" class CSV # = \CSV::Table # A \CSV::Table instance represents \CSV data. # (see {class CSV}[../CSV.html]). # # The instance may have: # - Rows: each is a Table::Row object. # - Headers: names for the columns. # # === Instance Methods # # \CSV::Table has three groups of instance methods: # - Its own internally defined instance methods. # - Methods included by module Enumerable. # - Methods delegated to class Array.: # * Array#empty? # * Array#length # * Array#size # # == Creating a \CSV::Table Instance # # Commonly, a new \CSV::Table instance is created by parsing \CSV source # using headers: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.class # => CSV::Table # # You can also create an instance directly. See ::new. # # == Headers # # If a table has headers, the headers serve as labels for the columns of data. # Each header serves as the label for its column. # # The headers for a \CSV::Table object are stored as an \Array of Strings. # # Commonly, headers are defined in the first row of \CSV source: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.headers # => ["Name", "Value"] # # If no headers are defined, the \Array is empty: # table = CSV::Table.new([]) # table.headers # => [] # # == Access Modes # # \CSV::Table provides three modes for accessing table data: # - \Row mode. # - Column mode. # - Mixed mode (the default for a new table). # # The access mode for a\CSV::Table instance affects the behavior # of some of its instance methods: # - #[] # - #[]= # - #delete # - #delete_if # - #each # - #values_at # # === \Row Mode # # Set a table to row mode with method #by_row!: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # # # Specify a single row by an \Integer index: # # Get a row. # table[1] # => # # # Set a row, then get it. # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3]) # table[1] # => # # # Specify a sequence of rows by a \Range: # # Get rows. # table[1..2] # => [#, #] # # Set rows, then get them. # table[1..2] = [ # CSV::Row.new(['Name', 'Value'], ['bat', 4]), # CSV::Row.new(['Name', 'Value'], ['bad', 5]), # ] # table[1..2] # => [["Name", #], ["Value", #]] # # === Column Mode # # Set a table to column mode with method #by_col!: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # => # # # Specify a column by an \Integer index: # # Get a column. # table[0] # # Set a column, then get it. # table[0] = ['FOO', 'BAR', 'BAZ'] # table[0] # => ["FOO", "BAR", "BAZ"] # # Specify a column by its \String header: # # Get a column. # table['Name'] # => ["FOO", "BAR", "BAZ"] # # Set a column, then get it. # table['Name'] = ['Foo', 'Bar', 'Baz'] # table['Name'] # => ["Foo", "Bar", "Baz"] # # === Mixed Mode # # In mixed mode, you can refer to either rows or columns: # - An \Integer index refers to a row. # - A \Range index refers to multiple rows. # - A \String index refers to a column. # # Set a table to mixed mode with method #by_col_or_row!: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col_or_row! # => # # # Specify a single row by an \Integer index: # # Get a row. # table[1] # => # # # Set a row, then get it. # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3]) # table[1] # => # # # Specify a sequence of rows by a \Range: # # Get rows. # table[1..2] # => [#, #] # # Set rows, then get them. # table[1] = CSV::Row.new(['Name', 'Value'], ['bat', 4]) # table[2] = CSV::Row.new(['Name', 'Value'], ['bad', 5]) # table[1..2] # => [["Name", #], ["Value", #]] # # Specify a column by its \String header: # # Get a column. # table['Name'] # => ["foo", "bat", "bad"] # # Set a column, then get it. # table['Name'] = ['Foo', 'Bar', 'Baz'] # table['Name'] # => ["Foo", "Bar", "Baz"] class Table # :call-seq: # CSV::Table.new(array_of_rows, headers = nil) -> csv_table # # Returns a new \CSV::Table object. # # - Argument +array_of_rows+ must be an \Array of CSV::Row objects. # - Argument +headers+, if given, may be an \Array of Strings. # # --- # # Create an empty \CSV::Table object: # table = CSV::Table.new([]) # table # => # # # Create a non-empty \CSV::Table object: # rows = [ # CSV::Row.new([], []), # CSV::Row.new([], []), # CSV::Row.new([], []), # ] # table = CSV::Table.new(rows) # table # => # # # --- # # If argument +headers+ is an \Array of Strings, # those Strings become the table's headers: # table = CSV::Table.new([], headers: ['Name', 'Age']) # table.headers # => ["Name", "Age"] # # If argument +headers+ is not given and the table has rows, # the headers are taken from the first row: # rows = [ # CSV::Row.new(['Foo', 'Bar'], []), # CSV::Row.new(['foo', 'bar'], []), # CSV::Row.new(['FOO', 'BAR'], []), # ] # table = CSV::Table.new(rows) # table.headers # => ["Foo", "Bar"] # # If argument +headers+ is not given and the table is empty (has no rows), # the headers are also empty: # table = CSV::Table.new([]) # table.headers # => [] # # --- # # Raises an exception if argument +array_of_rows+ is not an \Array object: # # Raises NoMethodError (undefined method `first' for :foo:Symbol): # CSV::Table.new(:foo) # # Raises an exception if an element of +array_of_rows+ is not a \CSV::Table object: # # Raises NoMethodError (undefined method `headers' for :foo:Symbol): # CSV::Table.new([:foo]) def initialize(array_of_rows, headers: nil) @table = array_of_rows @headers = headers unless @headers if @table.empty? @headers = [] else @headers = @table.first.headers end end @mode = :col_or_row end # The current access mode for indexing and iteration. attr_reader :mode # Internal data format used to compare equality. attr_reader :table protected :table ### Array Delegation ### extend Forwardable def_delegators :@table, :empty?, :length, :size # :call-seq: # table.by_col -> table_dup # # Returns a duplicate of +self+, in column mode # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]): # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.mode # => :col_or_row # dup_table = table.by_col # dup_table.mode # => :col # dup_table.equal?(table) # => false # It's a dup # # This may be used to chain method calls without changing the mode # (but also will affect performance and memory usage): # dup_table.by_col['Name'] # # Also note that changes to the duplicate table will not affect the original. def by_col self.class.new(@table.dup).by_col! end # :call-seq: # table.by_col! -> self # # Sets the mode for +self+ to column mode # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]); returns +self+: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.mode # => :col_or_row # table1 = table.by_col! # table.mode # => :col # table1.equal?(table) # => true # Returned self def by_col! @mode = :col self end # :call-seq: # table.by_col_or_row -> table_dup # # Returns a duplicate of +self+, in mixed mode # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]): # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true).by_col! # table.mode # => :col # dup_table = table.by_col_or_row # dup_table.mode # => :col_or_row # dup_table.equal?(table) # => false # It's a dup # # This may be used to chain method calls without changing the mode # (but also will affect performance and memory usage): # dup_table.by_col_or_row['Name'] # # Also note that changes to the duplicate table will not affect the original. def by_col_or_row self.class.new(@table.dup).by_col_or_row! end # :call-seq: # table.by_col_or_row! -> self # # Sets the mode for +self+ to mixed mode # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]); returns +self+: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true).by_col! # table.mode # => :col # table1 = table.by_col_or_row! # table.mode # => :col_or_row # table1.equal?(table) # => true # Returned self def by_col_or_row! @mode = :col_or_row self end # :call-seq: # table.by_row -> table_dup # # Returns a duplicate of +self+, in row mode # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]): # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.mode # => :col_or_row # dup_table = table.by_row # dup_table.mode # => :row # dup_table.equal?(table) # => false # It's a dup # # This may be used to chain method calls without changing the mode # (but also will affect performance and memory usage): # dup_table.by_row[1] # # Also note that changes to the duplicate table will not affect the original. def by_row self.class.new(@table.dup).by_row! end # :call-seq: # table.by_row! -> self # # Sets the mode for +self+ to row mode # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]); returns +self+: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.mode # => :col_or_row # table1 = table.by_row! # table.mode # => :row # table1.equal?(table) # => true # Returned self def by_row! @mode = :row self end # :call-seq: # table.headers -> array_of_headers # # Returns a new \Array containing the \String headers for the table. # # If the table is not empty, returns the headers from the first row: # rows = [ # CSV::Row.new(['Foo', 'Bar'], []), # CSV::Row.new(['FOO', 'BAR'], []), # CSV::Row.new(['foo', 'bar'], []), # ] # table = CSV::Table.new(rows) # table.headers # => ["Foo", "Bar"] # table.delete(0) # table.headers # => ["FOO", "BAR"] # table.delete(0) # table.headers # => ["foo", "bar"] # # If the table is empty, returns a copy of the headers in the table itself: # table.delete(0) # table.headers # => ["Foo", "Bar"] def headers if @table.empty? @headers.dup else @table.first.headers end end # :call-seq: # table[n] -> row or column_data # table[range] -> array_of_rows or array_of_column_data # table[header] -> array_of_column_data # # Returns data from the table; does not modify the table. # # --- # # Fetch a \Row by Its \Integer Index:: # - Form: table[n], +n+ an integer. # - Access mode: :row or :col_or_row. # - Return value: _nth_ row of the table, if that row exists; # otherwise +nil+. # # Returns the _nth_ row of the table if that row exists: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # # table[1] # => # # table.by_col_or_row! # => # # table[1] # => # # # Counts backward from the last row if +n+ is negative: # table[-1] # => # # # Returns +nil+ if +n+ is too large or too small: # table[4] # => nil # table[-4] # => nil # # Raises an exception if the access mode is :row # and +n+ is not an \Integer: # table.by_row! # => # # # Raises TypeError (no implicit conversion of String into Integer): # table['Name'] # # --- # # Fetch a Column by Its \Integer Index:: # - Form: table[n], +n+ an \Integer. # - Access mode: :col. # - Return value: _nth_ column of the table, if that column exists; # otherwise an \Array of +nil+ fields of length self.size. # # Returns the _nth_ column of the table if that column exists: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # => # # table[1] # => ["0", "1", "2"] # # Counts backward from the last column if +n+ is negative: # table[-2] # => ["foo", "bar", "baz"] # # Returns an \Array of +nil+ fields if +n+ is too large or too small: # table[4] # => [nil, nil, nil] # table[-4] # => [nil, nil, nil] # # --- # # Fetch Rows by \Range:: # - Form: table[range], +range+ a \Range object. # - Access mode: :row or :col_or_row. # - Return value: rows from the table, beginning at row range.start, # if those rows exists. # # Returns rows from the table, beginning at row range.first, # if those rows exist: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # # rows = table[1..2] # => # # rows # => [#, #] # table.by_col_or_row! # => # # rows = table[1..2] # => # # rows # => [#, #] # # If there are too few rows, returns all from range.start to the end: # rows = table[1..50] # => # # rows # => [#, #] # # Special case: if range.start == table.size, returns an empty \Array: # table[table.size..50] # => [] # # If range.end is negative, calculates the ending index from the end: # rows = table[0..-1] # rows # => [#, #, #] # # If range.start is negative, calculates the starting index from the end: # rows = table[-1..2] # rows # => [#] # # If range.start is larger than table.size, returns +nil+: # table[4..4] # => nil # # --- # # Fetch Columns by \Range:: # - Form: table[range], +range+ a \Range object. # - Access mode: :col. # - Return value: column data from the table, beginning at column range.start, # if those columns exist. # # Returns column values from the table, if the column exists; # the values are arranged by row: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # table[0..1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # Special case: if range.start == headers.size, # returns an \Array (size: table.size) of empty \Arrays: # table[table.headers.size..50] # => [[], [], []] # # If range.end is negative, calculates the ending index from the end: # table[0..-1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # If range.start is negative, calculates the starting index from the end: # table[-2..2] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] # # If range.start is larger than table.size, # returns an \Array of +nil+ values: # table[4..4] # => [nil, nil, nil] # # --- # # Fetch a Column by Its \String Header:: # - Form: table[header], +header+ a \String header. # - Access mode: :col or :col_or_row # - Return value: column data from the table, if that +header+ exists. # # Returns column values from the table, if the column exists: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # => # # table['Name'] # => ["foo", "bar", "baz"] # table.by_col_or_row! # => # # col = table['Name'] # col # => ["foo", "bar", "baz"] # # Modifying the returned column values does not modify the table: # col[0] = 'bat' # col # => ["bat", "bar", "baz"] # table['Name'] # => ["foo", "bar", "baz"] # # Returns an \Array of +nil+ values if there is no such column: # table['Nosuch'] # => [nil, nil, nil] def [](index_or_header) if @mode == :row or # by index (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range))) @table[index_or_header] else # by header @table.map { |row| row[index_or_header] } end end # :call-seq: # table[n] = row -> row # table[n] = field_or_array_of_fields -> field_or_array_of_fields # table[header] = field_or_array_of_fields -> field_or_array_of_fields # # Puts data onto the table. # # --- # # Set a \Row by Its \Integer Index:: # - Form: table[n] = row, +n+ an \Integer, # +row+ a \CSV::Row instance or an \Array of fields. # - Access mode: :row or :col_or_row. # - Return value: +row+. # # If the row exists, it is replaced: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # new_row = CSV::Row.new(['Name', 'Value'], ['bat', 3]) # table.by_row! # => # # return_value = table[0] = new_row # return_value.equal?(new_row) # => true # Returned the row # table[0].to_h # => {"Name"=>"bat", "Value"=>3} # # With access mode :col_or_row: # table.by_col_or_row! # => # # table[0] = CSV::Row.new(['Name', 'Value'], ['bam', 4]) # table[0].to_h # => {"Name"=>"bam", "Value"=>4} # # With an \Array instead of a \CSV::Row, inherits headers from the table: # array = ['bad', 5] # return_value = table[0] = array # return_value.equal?(array) # => true # Returned the array # table[0].to_h # => {"Name"=>"bad", "Value"=>5} # # If the row does not exist, extends the table by adding rows: # assigns rows with +nil+ as needed: # table.size # => 3 # table[5] = ['bag', 6] # table.size # => 6 # table[3] # => nil # table[4]# => nil # table[5].to_h # => {"Name"=>"bag", "Value"=>6} # # Note that the +nil+ rows are actually +nil+, not a row of +nil+ fields. # # --- # # Set a Column by Its \Integer Index:: # - Form: table[n] = array_of_fields, +n+ an \Integer, # +array_of_fields+ an \Array of \String fields. # - Access mode: :col. # - Return value: +array_of_fields+. # # If the column exists, it is replaced: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # new_col = [3, 4, 5] # table.by_col! # => # # return_value = table[1] = new_col # return_value.equal?(new_col) # => true # Returned the column # table[1] # => [3, 4, 5] # # The rows, as revised: # table.by_row! # => # # table[0].to_h # => {"Name"=>"foo", "Value"=>3} # table[1].to_h # => {"Name"=>"bar", "Value"=>4} # table[2].to_h # => {"Name"=>"baz", "Value"=>5} # table.by_col! # => # # # If there are too few values, fills with +nil+ values: # table[1] = [0] # table[1] # => [0, nil, nil] # # If there are too many values, ignores the extra values: # table[1] = [0, 1, 2, 3, 4] # table[1] # => [0, 1, 2] # # If a single value is given, replaces all fields in the column with that value: # table[1] = 'bat' # table[1] # => ["bat", "bat", "bat"] # # --- # # Set a Column by Its \String Header:: # - Form: table[header] = field_or_array_of_fields, # +header+ a \String header, +field_or_array_of_fields+ a field value # or an \Array of \String fields. # - Access mode: :col or :col_or_row. # - Return value: +field_or_array_of_fields+. # # If the column exists, it is replaced: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # new_col = [3, 4, 5] # table.by_col! # => # # return_value = table['Value'] = new_col # return_value.equal?(new_col) # => true # Returned the column # table['Value'] # => [3, 4, 5] # # The rows, as revised: # table.by_row! # => # # table[0].to_h # => {"Name"=>"foo", "Value"=>3} # table[1].to_h # => {"Name"=>"bar", "Value"=>4} # table[2].to_h # => {"Name"=>"baz", "Value"=>5} # table.by_col! # => # # # If there are too few values, fills with +nil+ values: # table['Value'] = [0] # table['Value'] # => [0, nil, nil] # # If there are too many values, ignores the extra values: # table['Value'] = [0, 1, 2, 3, 4] # table['Value'] # => [0, 1, 2] # # If the column does not exist, extends the table by adding columns: # table['Note'] = ['x', 'y', 'z'] # table['Note'] # => ["x", "y", "z"] # # The rows, as revised: # table.by_row! # table[0].to_h # => {"Name"=>"foo", "Value"=>0, "Note"=>"x"} # table[1].to_h # => {"Name"=>"bar", "Value"=>1, "Note"=>"y"} # table[2].to_h # => {"Name"=>"baz", "Value"=>2, "Note"=>"z"} # table.by_col! # # If a single value is given, replaces all fields in the column with that value: # table['Value'] = 'bat' # table['Value'] # => ["bat", "bat", "bat"] def []=(index_or_header, value) if @mode == :row or # by index (@mode == :col_or_row and index_or_header.is_a? Integer) if value.is_a? Array @table[index_or_header] = Row.new(headers, value) else @table[index_or_header] = value end else # set column unless index_or_header.is_a? Integer index = @headers.index(index_or_header) || @headers.size @headers[index] = index_or_header end if value.is_a? Array # multiple values @table.each_with_index do |row, i| if row.header_row? row[index_or_header] = index_or_header else row[index_or_header] = value[i] end end else # repeated value @table.each do |row| if row.header_row? row[index_or_header] = index_or_header else row[index_or_header] = value end end end end end # :call-seq: # table.values_at(*indexes) -> array_of_rows # table.values_at(*headers) -> array_of_columns_data # # If the access mode is :row or :col_or_row, # and each argument is either an \Integer or a \Range, # returns rows. # Otherwise, returns columns data. # # In either case, the returned values are in the order # specified by the arguments. Arguments may be repeated. # # --- # # Returns rows as an \Array of \CSV::Row objects. # # No argument: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.values_at # => [] # # One index: # values = table.values_at(0) # values # => [#] # # Two indexes: # values = table.values_at(2, 0) # values # => [#, #] # # One \Range: # values = table.values_at(1..2) # values # => [#, #] # # \Ranges and indexes: # values = table.values_at(0..1, 1..2, 0, 2) # pp values # Output: # [#, # #, # #, # #, # #, # #] # # --- # # Returns columns data as row Arrays, # each consisting of the specified columns data for that row: # values = table.values_at('Name') # values # => [["foo"], ["bar"], ["baz"]] # values = table.values_at('Value', 'Name') # values # => [["0", "foo"], ["1", "bar"], ["2", "baz"]] def values_at(*indices_or_headers) if @mode == :row or # by indices ( @mode == :col_or_row and indices_or_headers.all? do |index| index.is_a?(Integer) or ( index.is_a?(Range) and index.first.is_a?(Integer) and index.last.is_a?(Integer) ) end ) @table.values_at(*indices_or_headers) else # by headers @table.map { |row| row.values_at(*indices_or_headers) } end end # :call-seq: # table << row_or_array -> self # # If +row_or_array+ is a \CSV::Row object, # it is appended to the table: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table << CSV::Row.new(table.headers, ['bat', 3]) # table[3] # => # # # If +row_or_array+ is an \Array, it is used to create a new # \CSV::Row object which is then appended to the table: # table << ['bam', 4] # table[4] # => # def <<(row_or_array) if row_or_array.is_a? Array # append Array @table << Row.new(headers, row_or_array) else # append Row @table << row_or_array end self # for chaining end # # :call-seq: # table.push(*rows_or_arrays) -> self # # A shortcut for appending multiple rows. Equivalent to: # rows.each {|row| self << row } # # Each argument may be either a \CSV::Row object or an \Array: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # rows = [ # CSV::Row.new(table.headers, ['bat', 3]), # ['bam', 4] # ] # table.push(*rows) # table[3..4] # => [#, #] def push(*rows) rows.each { |row| self << row } self # for chaining end # :call-seq: # table.delete(*indexes) -> deleted_values # table.delete(*headers) -> deleted_values # # If the access mode is :row or :col_or_row, # and each argument is either an \Integer or a \Range, # returns deleted rows. # Otherwise, returns deleted columns data. # # In either case, the returned values are in the order # specified by the arguments. Arguments may be repeated. # # --- # # Returns rows as an \Array of \CSV::Row objects. # # One index: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # deleted_values = table.delete(0) # deleted_values # => [#] # # Two indexes: # table = CSV.parse(source, headers: true) # deleted_values = table.delete(2, 0) # deleted_values # => [#, #] # # --- # # Returns columns data as column Arrays. # # One header: # table = CSV.parse(source, headers: true) # deleted_values = table.delete('Name') # deleted_values # => ["foo", "bar", "baz"] # # Two headers: # table = CSV.parse(source, headers: true) # deleted_values = table.delete('Value', 'Name') # deleted_values # => [["0", "1", "2"], ["foo", "bar", "baz"]] def delete(*indexes_or_headers) if indexes_or_headers.empty? raise ArgumentError, "wrong number of arguments (given 0, expected 1+)" end deleted_values = indexes_or_headers.map do |index_or_header| if @mode == :row or # by index (@mode == :col_or_row and index_or_header.is_a? Integer) @table.delete_at(index_or_header) else # by header if index_or_header.is_a? Integer @headers.delete_at(index_or_header) else @headers.delete(index_or_header) end @table.map { |row| row.delete(index_or_header).last } end end if indexes_or_headers.size == 1 deleted_values[0] else deleted_values end end # :call-seq: # table.delete_if {|row_or_column| ... } -> self # # Removes rows or columns for which the block returns a truthy value; # returns +self+. # # Removes rows when the access mode is :row or :col_or_row; # calls the block with each \CSV::Row object: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # # table.size # => 3 # table.delete_if {|row| row['Name'].start_with?('b') } # table.size # => 1 # # Removes columns when the access mode is :col; # calls the block with each column as a 2-element array # containing the header and an \Array of column fields: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # => # # table.headers.size # => 2 # table.delete_if {|column_data| column_data[1].include?('2') } # table.headers.size # => 1 # # Returns a new \Enumerator if no block is given: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.delete_if # => #:delete_if> def delete_if(&block) return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size } unless block_given? if @mode == :row or @mode == :col_or_row # by index @table.delete_if(&block) else # by header headers.each do |header| delete(header) if yield([header, self[header]]) end end self # for chaining end include Enumerable # :call-seq: # table.each {|row_or_column| ... ) -> self # # Calls the block with each row or column; returns +self+. # # When the access mode is :row or :col_or_row, # calls the block with each \CSV::Row object: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => # # table.each {|row| p row } # Output: # # # # # # # # When the access mode is :col, # calls the block with each column as a 2-element array # containing the header and an \Array of column fields: # table.by_col! # => # # table.each {|column_data| p column_data } # Output: # ["Name", ["foo", "bar", "baz"]] # ["Value", ["0", "1", "2"]] # # Returns a new \Enumerator if no block is given: # table.each # => #:each> def each(&block) return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given? if @mode == :col headers.each.with_index do |header, i| yield([header, @table.map {|row| row[header, i]}]) end else @table.each(&block) end self # for chaining end # :call-seq: # table == other_table -> true or false # # Returns +true+ if all each row of +self+ == # the corresponding row of +other_table+, otherwise, +false+. # # The access mode does no affect the result. # # Equal tables: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # other_table = CSV.parse(source, headers: true) # table == other_table # => true # # Different row count: # other_table.delete(2) # table == other_table # => false # # Different last row: # other_table << ['bat', 3] # table == other_table # => false def ==(other) return @table == other.table if other.is_a? CSV::Table @table == other end # :call-seq: # table.to_a -> array_of_arrays # # Returns the table as an \Array of \Arrays; # the headers are in the first row: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.to_a # => [["Name", "Value"], ["foo", "0"], ["bar", "1"], ["baz", "2"]] def to_a array = [headers] @table.each do |row| array.push(row.fields) unless row.header_row? end array end # :call-seq: # table.to_csv(**options) -> csv_string # # Returns the table as \CSV string. # See {Options for Generating}[../CSV.html#class-CSV-label-Options+for+Generating]. # # Defaults option +write_headers+ to +true+: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.to_csv # => "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # # Omits the headers if option +write_headers+ is given as +false+ # (see {Option +write_headers+}[../CSV.html#class-CSV-label-Option+write_headers]): # table.to_csv(write_headers: false) # => "foo,0\nbar,1\nbaz,2\n" # # Limit rows if option +limit+ is given like +2+: # table.to_csv(limit: 2) # => "Name,Value\nfoo,0\nbar,1\n" def to_csv(write_headers: true, limit: nil, **options) array = write_headers ? [headers.to_csv(**options)] : [] limit ||= @table.size limit = @table.size + 1 + limit if limit < 0 limit = 0 if limit < 0 @table.first(limit).each do |row| array.push(row.fields.to_csv(**options)) unless row.header_row? end array.join("") end alias_method :to_s, :to_csv # # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step, # returning nil if any intermediate step is nil. # def dig(index_or_header, *index_or_headers) value = self[index_or_header] if value.nil? nil elsif index_or_headers.empty? value else unless value.respond_to?(:dig) raise TypeError, "#{value.class} does not have \#dig method" end value.dig(*index_or_headers) end end # :call-seq: # table.inspect => string # # Returns a US-ASCII-encoded \String showing table: # - Class: CSV::Table. # - Access mode: :row, :col, or :col_or_row. # - Size: Row count, including the header row. # # Example: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.inspect # => "#\nName,Value\nfoo,0\nbar,1\nbaz,2\n" # def inspect inspected = +"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>" summary = to_csv(limit: 5) inspected << "\n" << summary if summary.encoding.ascii_compatible? inspected end end end csv-3.3.5/lib/csv/version.rb000066400000000000000000000001531501670011600156640ustar00rootroot00000000000000# frozen_string_literal: true class CSV # The version of the installed library. VERSION = "3.3.5" end csv-3.3.5/lib/csv/writer.rb000066400000000000000000000134761501670011600155270ustar00rootroot00000000000000# frozen_string_literal: true require_relative "input_record_separator" require_relative "row" class CSV # Note: Don't use this class directly. This is an internal class. class Writer # # A CSV::Writer receives an output, prepares the header, format and output. # It allows us to write new rows in the object and rewind it. # attr_reader :lineno attr_reader :headers def initialize(output, options) @output = output @options = options @lineno = 0 @fields_converter = nil prepare if @options[:write_headers] and @headers self << @headers end @fields_converter = @options[:fields_converter] end # # Adds a new row # def <<(row) case row when Row row = row.fields when Hash row = @headers.collect {|header| row[header]} end @headers ||= row if @use_headers @lineno += 1 if @fields_converter row = @fields_converter.convert(row, nil, lineno) end i = -1 converted_row = row.collect do |field| i += 1 quote(field, i) end line = converted_row.join(@column_separator) + @row_separator if @output_encoding line = line.encode(@output_encoding) end @output << line self end # # Winds back to the beginning # def rewind @lineno = 0 @headers = nil if @options[:headers].nil? end private def prepare @encoding = @options[:encoding] prepare_header prepare_format prepare_output end def prepare_header headers = @options[:headers] case headers when Array @headers = headers @use_headers = true when String @headers = CSV.parse_line(headers, col_sep: @options[:column_separator], row_sep: @options[:row_separator], quote_char: @options[:quote_character]) @use_headers = true when true @headers = nil @use_headers = true else @headers = nil @use_headers = false end return unless @headers converter = @options[:header_fields_converter] @headers = converter.convert(@headers, nil, 0, []) @headers.each do |header| header.freeze if header.is_a?(String) end end def prepare_force_quotes_fields(force_quotes) @force_quotes_fields = {} force_quotes.each do |name_or_index| case name_or_index when Integer index = name_or_index @force_quotes_fields[index] = true when String, Symbol name = name_or_index.to_s if @headers.nil? message = ":headers is required when you use field name " + "in :force_quotes: " + "#{name_or_index.inspect}: #{force_quotes.inspect}" raise ArgumentError, message end index = @headers.index(name) next if index.nil? @force_quotes_fields[index] = true else message = ":force_quotes element must be " + "field index or field name: " + "#{name_or_index.inspect}: #{force_quotes.inspect}" raise ArgumentError, message end end end def prepare_format @column_separator = @options[:column_separator].to_s.encode(@encoding) row_separator = @options[:row_separator] if row_separator == :auto @row_separator = InputRecordSeparator.value.encode(@encoding) else @row_separator = row_separator.to_s.encode(@encoding) end @quote_character = @options[:quote_character] force_quotes = @options[:force_quotes] if force_quotes.is_a?(Array) prepare_force_quotes_fields(force_quotes) @force_quotes = false elsif force_quotes @force_quotes_fields = nil @force_quotes = true else @force_quotes_fields = nil @force_quotes = false end unless @force_quotes @quotable_pattern = Regexp.new("[\r\n".encode(@encoding) + Regexp.escape(@column_separator) + Regexp.escape(@quote_character.encode(@encoding)) + "]".encode(@encoding)) end @quote_empty = @options.fetch(:quote_empty, true) end def prepare_output @output_encoding = nil return unless @output.is_a?(StringIO) output_encoding = @output.internal_encoding || @output.external_encoding if @encoding != output_encoding if @options[:force_encoding] @output_encoding = output_encoding else compatible_encoding = Encoding.compatible?(@encoding, output_encoding) if compatible_encoding @output.set_encoding(compatible_encoding) @output.seek(0, IO::SEEK_END) end end end end def quote_field(field) field = String(field) encoded_quote_character = @quote_character.encode(field.encoding) encoded_quote_character + field.gsub(encoded_quote_character, encoded_quote_character * 2) + encoded_quote_character end def quote(field, i) if @force_quotes quote_field(field) elsif @force_quotes_fields and @force_quotes_fields[i] quote_field(field) else if field.nil? # represent +nil+ fields as empty unquoted fields "" else field = String(field) # Stringify fields # represent empty fields as empty quoted fields if (@quote_empty and field.empty?) or (field.valid_encoding? and @quotable_pattern.match?(field)) quote_field(field) else field # unquoted field end end end end end end csv-3.3.5/profile/000077500000000000000000000000001501670011600137525ustar00rootroot00000000000000csv-3.3.5/profile/parse.rb000077500000000000000000000023421501670011600154150ustar00rootroot00000000000000#!/usr/bin/env ruby require "csv" require "optparse" n_columns = 1000 n_rows = 1000 type = "unquoted" alphas = nil hiraganas = nil builders = { "unquoted" => lambda {(alphas.join(",") + "\r\n") * n_rows}, "quoted" => lambda {(alphas.map {|s| %("#{s}")}.join(",") + "\r\n") * n_rows}, "include-column-separator" => lambda {(alphas.map {|s| %(",#{s}")}.join(",") + "\r\n") * n_rows}, "include-row-separator" => lambda {(alphas.map {|s| %("#{s}\r\n")}.join(",") + "\r\n") * n_rows}, "utf-8" => lambda {((hiraganas.join(",") + "\r\n") * n_rows).encode("UTF-8")}, "windows-31j" => lambda {((hiraganas.join(",") + "\r\n") * n_rows).encode("Windows-31J")}, } parser = OptionParser.new parser.on("--n-columns=N", Integer, "The number of columns to be parsed", "(#{n_columns})") do |n| n_columns = n end parser.on("--n-rows=N", Integer, "The number of rows to be parsed", "(#{n_rows})") do |n| n_rows = n end parser.on("--type=TYPE", builders.keys, "The type for profile", "(#{type})") do |t| type = t end parser.parse!(ARGV) alphas = ["AAAAA"] * n_columns hiragans = ["あああああ"] * n_columns data = builders[type].call require "profile" CSV.parse(data) csv-3.3.5/profile/write.rb000077500000000000000000000017231501670011600154370ustar00rootroot00000000000000#!/usr/bin/env ruby require "csv" require "optparse" n_columns = 5 n_rows = 100 type = "generate-line" parser = OptionParser.new parser.on("--n-columns=N", Integer, "The number of columns to be generated", "(#{n_columns})") do |n| n_columns = n end parser.on("--n-rows=N", Integer, "The number of rows to be generated", "(#{n_rows})") do |n| n_rows = n end parser.on("--type=TYPE", "The type to write", "(#{type})") do |t| type = t end parser.parse!(ARGV) fields = ["AAAAA"] * n_columns headers = n_columns.times.collect do |i| "header#{i}" end row = CSV::Row.new(headers, fields) raw_row = {} n_columns.times do |i| raw_row[headers[i]] = fields[i] end require "profile" case type when "generate-line" n_rows.times do CSV.generate_line(fields) end when "add" output = StringIO.new csv = CSV.new(output) n_rows.times do csv << row end else raise "unknown type: #{type.inspect}" end csv-3.3.5/run-test.rb000077500000000000000000000005161501670011600144250ustar00rootroot00000000000000#!/usr/bin/env ruby $VERBOSE = true $LOAD_PATH.unshift("test") $LOAD_PATH.unshift("test/lib") $LOAD_PATH.unshift("lib") Dir.glob("test/csv/**/*test_*.rb") do |test_rb| # Ensure we only load syntax that we can handle next if RUBY_VERSION < "2.7" && test_rb.end_with?("test_patterns.rb") require File.expand_path(test_rb) end csv-3.3.5/test/000077500000000000000000000000001501670011600132715ustar00rootroot00000000000000csv-3.3.5/test/csv/000077500000000000000000000000001501670011600140645ustar00rootroot00000000000000csv-3.3.5/test/csv/helper.rb000066400000000000000000000016371501670011600156770ustar00rootroot00000000000000require "tempfile" require "test/unit" require "csv" require_relative "../lib/with_different_ofs" module CSVHelper def with_chunk_size(chunk_size) chunk_size_keep = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] begin ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] = chunk_size yield ensure ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] = chunk_size_keep end end def with_verbose(verbose) original = $VERBOSE begin $VERBOSE = verbose yield ensure $VERBOSE = original end end def with_default_internal(encoding) original = Encoding.default_internal begin with_verbose(false) do Encoding.default_internal = encoding end yield ensure with_verbose(false) do Encoding.default_internal = original end end end end class Ractor alias value take unless method_defined? :value end if defined?(Ractor) csv-3.3.5/test/csv/interface/000077500000000000000000000000001501670011600160245ustar00rootroot00000000000000csv-3.3.5/test/csv/interface/test_delegation.rb000066400000000000000000000014771501670011600215340ustar00rootroot00000000000000# frozen_string_literal: false require_relative "../helper" class TestCSVInterfaceDelegation < Test::Unit::TestCase class TestStringIO < self def setup @csv = CSV.new("h1,h2") end def test_flock assert_raise(NotImplementedError) do @csv.flock(File::LOCK_EX) end end def test_ioctl assert_raise(NotImplementedError) do @csv.ioctl(0) end end def test_stat assert_raise(NotImplementedError) do @csv.stat end end def test_to_i assert_raise(NotImplementedError) do @csv.to_i end end def test_binmode? assert_equal(false, @csv.binmode?) end def test_path assert_equal(nil, @csv.path) end def test_to_io assert_instance_of(StringIO, @csv.to_io) end end end csv-3.3.5/test/csv/interface/test_read.rb000066400000000000000000000251541501670011600203320ustar00rootroot00000000000000# frozen_string_literal: false require_relative "../helper" class TestCSVInterfaceRead < Test::Unit::TestCase extend DifferentOFS def setup super @data = "" @data << "1\t2\t3\r\n" @data << "4\t5\r\n" @input = Tempfile.new(["interface-read", ".csv"], binmode: true) @input << @data @input.rewind @rows = [ ["1", "2", "3"], ["4", "5"], ] end def teardown @input.close(true) super end def test_foreach rows = [] CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n") do |row| rows << row end assert_equal(@rows, rows) end def test_foreach_stringio string_io = StringIO.new(@data) rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a assert_equal(@rows, rows) end def test_foreach_stringio_with_bom if RUBY_VERSION < "2.7" # Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support: # https://github.com/ruby/stringio/pull/47 omit("StringIO's BOM support isn't available with Ruby < 2.7") end string_io = StringIO.new("\ufeff#{@data}") # U+FEFF ZERO WIDTH NO-BREAK SPACE rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a assert_equal(@rows, rows) end if respond_to?(:ractor) ractor def test_foreach_in_ractor ractor = Ractor.new(@input.path) do |path| rows = [] CSV.foreach(path, col_sep: "\t", row_sep: "\r\n") do |row| rows << row end rows end rows = [ ["1", "2", "3"], ["4", "5"], ] assert_equal(rows, ractor.value) end end def test_foreach_mode rows = [] CSV.foreach(@input.path, "r", col_sep: "\t", row_sep: "\r\n") do |row| rows << row end assert_equal(@rows, rows) end def test_foreach_enumerator rows = CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").to_a assert_equal(@rows, rows) end def test_closed? csv = CSV.open(@input.path, "r+", col_sep: "\t", row_sep: "\r\n") assert_not_predicate(csv, :closed?) csv.close assert_predicate(csv, :closed?) end def test_open_auto_close csv = nil CSV.open(@input.path) do |_csv| csv = _csv end assert_predicate(csv, :closed?) end def test_open_closed csv = nil CSV.open(@input.path) do |_csv| csv = _csv csv.close end assert_predicate(csv, :closed?) end def test_open_block_return_value return_value = CSV.open(@input.path) do "Return value." end assert_equal("Return value.", return_value) end def test_open_mode_integer CSV.open(@input.path, File::RDONLY, col_sep: "\t") do |csv| assert_equal(@rows, csv.read) end end def test_open_encoding_valid # U+1F600 GRINNING FACE # U+1F601 GRINNING FACE WITH SMILING EYES File.open(@input.path, "w") do |file| file << "\u{1F600},\u{1F601}" end CSV.open(@input.path, encoding: "utf-8") do |csv| assert_equal([["\u{1F600}", "\u{1F601}"]], csv.to_a) end end def test_open_encoding_invalid # U+1F600 GRINNING FACE # U+1F601 GRINNING FACE WITH SMILING EYES File.open(@input.path, "w") do |file| file << "\u{1F600},\u{1F601}" end CSV.open(@input.path, encoding: "EUC-JP") do |csv| error = assert_raise(CSV::InvalidEncodingError) do csv.shift end assert_equal([Encoding::EUC_JP, "Invalid byte sequence in EUC-JP in line 1."], [error.encoding, error.message]) end end def test_open_encoding_nonexistent _output, error = capture_output do CSV.open(@input.path, encoding: "nonexistent") do end end assert_equal("path:0: warning: Unsupported encoding nonexistent ignored\n", error.gsub(/\A.+:\d+: /, "path:0: ")) end def test_open_encoding_utf_8_with_bom # U+FEFF ZERO WIDTH NO-BREAK SPACE, BOM # U+1F600 GRINNING FACE # U+1F601 GRINNING FACE WITH SMILING EYES File.open(@input.path, "w") do |file| file << "\u{FEFF}\u{1F600},\u{1F601}" end CSV.open(@input.path, encoding: "bom|utf-8") do |csv| assert_equal([["\u{1F600}", "\u{1F601}"]], csv.to_a) end end def test_open_invalid_byte_sequence_in_utf_8 CSV.open(@input.path, "w", encoding: Encoding::CP932) do |rows| error = assert_raise(Encoding::InvalidByteSequenceError) do rows << ["\x82\xa0"] end assert_equal('"\x82" on UTF-8', error.message) end end def test_open_with_invalid_nil CSV.open(@input.path, "w", encoding: Encoding::CP932, invalid: nil) do |rows| error = assert_raise(Encoding::InvalidByteSequenceError) do rows << ["\x82\xa0"] end assert_equal('"\x82" on UTF-8', error.message) end end def test_open_with_invalid_replace CSV.open(@input.path, "w", encoding: Encoding::CP932, invalid: :replace) do |rows| rows << ["\x82\xa0".force_encoding(Encoding::UTF_8)] end CSV.open(@input.path, encoding: Encoding::CP932) do |csv| assert_equal([["??"]], csv.to_a) end end def test_open_with_invalid_replace_and_replace_string CSV.open(@input.path, "w", encoding: Encoding::CP932, invalid: :replace, replace: "X") do |rows| rows << ["\x82\xa0".force_encoding(Encoding::UTF_8)] end CSV.open(@input.path, encoding: Encoding::CP932) do |csv| assert_equal([["XX"]], csv.to_a) end end def test_open_with_undef_replace # U+00B7 Middle Dot CSV.open(@input.path, "w", encoding: Encoding::CP932, undef: :replace) do |rows| rows << ["\u00B7"] end CSV.open(@input.path, encoding: Encoding::CP932) do |csv| assert_equal([["?"]], csv.to_a) end end def test_open_with_undef_replace_and_replace_string # U+00B7 Middle Dot CSV.open(@input.path, "w", encoding: Encoding::CP932, undef: :replace, replace: "X") do |rows| rows << ["\u00B7"] end CSV.open(@input.path, encoding: Encoding::CP932) do |csv| assert_equal([["X"]], csv.to_a) end end def test_open_with_newline CSV.open(@input.path, col_sep: "\t", universal_newline: true) do |csv| assert_equal(@rows, csv.to_a) end File.binwrite(@input.path, "1,2,3\r\n" "4,5\n") CSV.open(@input.path, newline: :universal) do |csv| assert_equal(@rows, csv.to_a) end end def test_open_with_bom if /mingw|mswin/.match?(RUBY_PLATFORM) omit("BOM detection on Windows may be buggy: Bug #20526") end csv_data = @input.read bom = "\ufeff" # U+FEFF ZERO WIDTH NO-BREAK SPACE File.binwrite(@input.path, "#{bom}#{csv_data}") @input.rewind CSV.open(@input.path, col_sep: "\t") do |csv| assert_equal(@rows, csv.to_a) end end def test_parse assert_equal(@rows, CSV.parse(@data, col_sep: "\t", row_sep: "\r\n")) end def test_parse_block rows = [] CSV.parse(@data, col_sep: "\t", row_sep: "\r\n") do |row| rows << row end assert_equal(@rows, rows) end def test_parse_enumerator rows = CSV.parse(@data, col_sep: "\t", row_sep: "\r\n").to_a assert_equal(@rows, rows) end def test_parse_headers_only table = CSV.parse("a,b,c", headers: true) assert_equal([ ["a", "b", "c"], [], ], [ table.headers, table.each.to_a, ]) end def test_parse_line assert_equal(["1", "2", "3"], CSV.parse_line("1;2;3", col_sep: ";")) end def test_parse_line_shortcut assert_equal(["1", "2", "3"], "1;2;3".parse_csv(col_sep: ";")) end def test_parse_line_empty assert_equal(nil, CSV.parse_line("")) # to signal eof end def test_parse_line_empty_line assert_equal([], CSV.parse_line("\n1,2,3")) end def test_read assert_equal(@rows, CSV.read(@input.path, col_sep: "\t", row_sep: "\r\n")) end if respond_to?(:ractor) ractor def test_read_in_ractor ractor = Ractor.new(@input.path) do |path| CSV.read(path, col_sep: "\t", row_sep: "\r\n") end rows = [ ["1", "2", "3"], ["4", "5"], ] assert_equal(rows, ractor.value) end end def test_readlines assert_equal(@rows, CSV.readlines(@input.path, col_sep: "\t", row_sep: "\r\n")) end def test_open_read rows = CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| csv.read end assert_equal(@rows, rows) end def test_open_readlines rows = CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| csv.readlines end assert_equal(@rows, rows) end def test_table table = CSV.table(@input.path, col_sep: "\t", row_sep: "\r\n") assert_equal(CSV::Table.new([ CSV::Row.new([:"1", :"2", :"3"], [4, 5, nil]), ]), table) end def test_shift # aliased as gets() and readline() CSV.open(@input.path, "rb+", col_sep: "\t", row_sep: "\r\n") do |csv| rows = [ csv.shift, csv.shift, csv.shift, ] assert_equal(@rows + [nil], rows) end end def test_enumerator CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| assert_equal(@rows, csv.each.to_a) end end def test_shift_and_each CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| rows = [] rows << csv.shift rows.concat(csv.each.to_a) assert_equal(@rows, rows) end end def test_each_twice CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| assert_equal([ @rows, [], ], [ csv.each.to_a, csv.each.to_a, ]) end end def test_eof? eofs = [] CSV.open(@input.path, col_sep: "\t", row_sep: "\r\n") do |csv| eofs << csv.eof? csv.shift eofs << csv.eof? csv.shift eofs << csv.eof? end assert_equal([false, false, true], eofs) end def test_new_nil assert_raise_with_message ArgumentError, "Cannot parse nil as CSV" do CSV.new(nil) end end def test_options_not_modified options = {}.freeze CSV.foreach(@input.path, **options) CSV.open(@input.path, **options) {} CSV.parse("", **options) CSV.parse_line("", **options) CSV.read(@input.path, **options) CSV.readlines(@input.path, **options) CSV.table(@input.path, **options) end end csv-3.3.5/test/csv/interface/test_read_write.rb000066400000000000000000000046341501670011600215440ustar00rootroot00000000000000# frozen_string_literal: false require_relative "../helper" class TestCSVInterfaceReadWrite < Test::Unit::TestCase extend DifferentOFS def test_filter input = <<-CSV.freeze 1;2;3 4;5 CSV output = "" CSV.filter(input, output, in_col_sep: ";", out_col_sep: ",", converters: :all) do |row| row.map! {|n| n * 2} row << "Added\r" end assert_equal(<<-CSV, output) 2,4,6,"Added\r" 8,10,"Added\r" CSV end def test_filter_headers_true input = <<-CSV.freeze Name,Value foo,0 bar,1 baz,2 CSV output = "" CSV.filter(input, output, headers: true) do |row| row[0] += "X" row[1] = row[1].to_i + 1 end assert_equal(<<-CSV, output) fooX,1 barX,2 bazX,3 CSV end def test_filter_headers_true_write_headers input = <<-CSV.freeze Name,Value foo,0 bar,1 baz,2 CSV output = "" CSV.filter(input, output, headers: true, out_write_headers: true) do |row| if row.is_a?(Array) row[0] += "X" row[1] += "Y" else row[0] += "X" row[1] = row[1].to_i + 1 end end assert_equal(<<-CSV, output) NameX,ValueY fooX,1 barX,2 bazX,3 CSV end def test_filter_headers_array_write_headers input = <<-CSV.freeze foo,0 bar,1 baz,2 CSV output = "" CSV.filter(input, output, headers: ["Name", "Value"], out_write_headers: true) do |row| row[0] += "X" row[1] = row[1].to_i + 1 end assert_equal(<<-CSV, output) Name,Value fooX,1 barX,2 bazX,3 CSV end def test_instance_same data = "" assert_equal(CSV.instance(data, col_sep: ";").object_id, CSV.instance(data, col_sep: ";").object_id) end def test_instance_append output = "" CSV.instance(output, col_sep: ";") << ["a", "b", "c"] assert_equal(<<-CSV, output) a;b;c CSV CSV.instance(output, col_sep: ";") << [1, 2, 3] assert_equal(<<-CSV, output) a;b;c 1;2;3 CSV end def test_instance_shortcut assert_equal(CSV.instance, CSV {|csv| csv}) end def test_instance_shortcut_with_io io = StringIO.new from_instance = CSV.instance(io, col_sep: ";") { |csv| csv << ["a", "b", "c"] } from_shortcut = CSV(io, col_sep: ";") { |csv| csv << ["e", "f", "g"] } assert_equal(from_instance, from_shortcut) assert_equal(from_instance.string, "a;b;c\ne;f;g\n") end end csv-3.3.5/test/csv/interface/test_write.rb000066400000000000000000000106011501670011600205400ustar00rootroot00000000000000# frozen_string_literal: false require_relative "../helper" class TestCSVInterfaceWrite < Test::Unit::TestCase extend DifferentOFS def setup super @output = Tempfile.new(["interface-write", ".csv"]) end def teardown @output.close(true) super end def test_generate_default csv_text = CSV.generate do |csv| csv << [1, 2, 3] << [4, nil, 5] end assert_equal(<<-CSV, csv_text) 1,2,3 4,,5 CSV end if respond_to?(:ractor) ractor def test_generate_default_in_ractor ractor = Ractor.new do CSV.generate do |csv| csv << [1, 2, 3] << [4, nil, 5] end end assert_equal(<<-CSV, ractor.value) 1,2,3 4,,5 CSV end end def test_generate_append csv_text = <<-CSV 1,2,3 4,,5 CSV CSV.generate(csv_text) do |csv| csv << ["last", %Q{"row"}] end assert_equal(<<-CSV, csv_text) 1,2,3 4,,5 last,"""row""" CSV end def test_generate_no_new_line csv_text = CSV.generate("test") do |csv| csv << ["row"] end assert_equal(<<-CSV, csv_text) testrow CSV end def test_generate_line_col_sep line = CSV.generate_line(["1", "2", "3"], col_sep: ";") assert_equal(<<-LINE, line) 1;2;3 LINE end def test_generate_line_row_sep line = CSV.generate_line(["1", "2"], row_sep: nil) assert_equal(<<-LINE.chomp, line) 1,2 LINE end def test_generate_line_shortcut line = ["1", "2", "3"].to_csv(col_sep: ";") assert_equal(<<-LINE, line) 1;2;3 LINE end def test_generate_lines lines = CSV.generate_lines([["foo", "bar"], [1, 2], [3, 4]]) assert_equal(<<-LINES, lines) foo,bar 1,2 3,4 LINES end def test_headers_detection headers = ["a", "b", "c"] CSV.open(@output.path, "w", headers: true) do |csv| csv << headers csv << ["1", "2", "3"] assert_equal(headers, csv.headers) end end def test_lineno CSV.open(@output.path, "w") do |csv| n_lines = 20 n_lines.times do csv << ["a", "b", "c"] end assert_equal(n_lines, csv.lineno) end end def test_append_row CSV.open(@output.path, "wb") do |csv| csv << CSV::Row.new([], ["1", "2", "3"]) << CSV::Row.new([], ["a", "b", "c"]) end assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 1,2,3 a,b,c CSV end if respond_to?(:ractor) ractor def test_append_row_in_ractor ractor = Ractor.new(@output.path) do |path| CSV.open(path, "wb") do |csv| csv << CSV::Row.new([], ["1", "2", "3"]) << CSV::Row.new([], ["a", "b", "c"]) end end ractor.value assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 1,2,3 a,b,c CSV end end def test_append_hash CSV.open(@output.path, "wb", headers: true) do |csv| csv << [:a, :b, :c] csv << {a: 1, b: 2, c: 3} csv << {a: 4, b: 5, c: 6} end assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) a,b,c 1,2,3 4,5,6 CSV end def test_append_hash_headers_array CSV.open(@output.path, "wb", headers: [:b, :a, :c]) do |csv| csv << {a: 1, b: 2, c: 3} csv << {a: 4, b: 5, c: 6} end assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 2,1,3 5,4,6 CSV end def test_append_hash_headers_string CSV.open(@output.path, "wb", headers: "b|a|c", col_sep: "|") do |csv| csv << {"a" => 1, "b" => 2, "c" => 3} csv << {"a" => 4, "b" => 5, "c" => 6} end assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) 2|1|3 5|4|6 CSV end def test_write_headers CSV.open(@output.path, "wb", headers: "b|a|c", write_headers: true, col_sep: "|" ) do |csv| csv << {"a" => 1, "b" => 2, "c" => 3} csv << {"a" => 4, "b" => 5, "c" => 6} end assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) b|a|c 2|1|3 5|4|6 CSV end def test_write_headers_empty CSV.open(@output.path, "wb", headers: "b|a|c", write_headers: true, col_sep: "|" ) do |csv| end assert_equal(<<-CSV, File.read(@output.path, mode: "rb")) b|a|c CSV end def test_options_not_modified options = {}.freeze CSV.generate(**options) {} CSV.generate_line([], **options) CSV.filter("", "", **options) CSV.instance("", **options) end end csv-3.3.5/test/csv/line_endings.gz000066400000000000000000000000731501670011600170640ustar00rootroot00000000000000){QGline_endings.csvK)))(JM211{Γcsv-3.3.5/test/csv/parse/000077500000000000000000000000001501670011600151765ustar00rootroot00000000000000csv-3.3.5/test/csv/parse/test_column_separator.rb000066400000000000000000000016751501670011600221500ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseColumnSeparator < Test::Unit::TestCase extend DifferentOFS def test_comma assert_equal([["a", "b", nil, "d"]], CSV.parse("a,b,,d", col_sep: ",")) end def test_space assert_equal([["a", "b", nil, "d"]], CSV.parse("a b d", col_sep: " ")) end def test_tab assert_equal([["a", "b", nil, "d"]], CSV.parse("a\tb\t\td", col_sep: "\t")) end def test_multiple_characters_include_sub_separator assert_equal([["a b", nil, "d"]], CSV.parse("a b d", col_sep: " ")) end def test_multiple_characters_leading_empty_fields data = <<-CSV <=><=>A<=>B<=>C 1<=>2<=>3 CSV assert_equal([ [nil, nil, "A", "B", "C"], ["1", "2", "3"], ], CSV.parse(data, col_sep: "<=>")) end end csv-3.3.5/test/csv/parse/test_convert.rb000066400000000000000000000106251501670011600202460ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseConvert < Test::Unit::TestCase extend DifferentOFS def setup super @data = "Numbers,:integer,1,:float,3.015" @parser = CSV.new(@data) @custom = lambda {|field| /\A:(\S.*?)\s*\Z/ =~ field ? $1.to_sym : field} @time = Time.utc(2018, 12, 30, 6, 41, 29) @windows_safe_time_data = @time.strftime("%a %b %d %H:%M:%S %Y") @preserving_converter = lambda do |field, info| f = field.encode(CSV::ConverterEncoding) return f if info.quoted? begin Integer(f, 10) rescue f end end @quoted_header_converter = lambda do |field, info| f = field.encode(CSV::ConverterEncoding) return f if info.quoted? f.to_sym end end def test_integer @parser.convert(:integer) assert_equal(["Numbers", ":integer", 1, ":float", "3.015"], @parser.shift) end def test_float @parser.convert(:float) assert_equal(["Numbers", ":integer", 1.0, ":float", 3.015], @parser.shift) end def test_float_integer @parser.convert(:float) @parser.convert(:integer) assert_equal(["Numbers", ":integer", 1.0, ":float", 3.015], @parser.shift) end def test_integer_float @parser.convert(:integer) @parser.convert(:float) assert_equal(["Numbers", ":integer", 1, ":float", 3.015], @parser.shift) end def test_numeric @parser.convert(:numeric) assert_equal(["Numbers", ":integer", 1, ":float", 3.015], @parser.shift) end def test_all @data << ",#{@windows_safe_time_data}" @parser = CSV.new(@data) @parser.convert(:all) assert_equal(["Numbers", ":integer", 1, ":float", 3.015, @time.to_datetime], @parser.shift) end def test_custom @parser.convert do |field| /\A:(\S.*?)\s*\Z/ =~ field ? $1.to_sym : field end assert_equal(["Numbers", :integer, "1", :float, "3.015"], @parser.shift) end def test_builtin_custom @parser.convert(:numeric) @parser.convert(&@custom) assert_equal(["Numbers", :integer, 1, :float, 3.015], @parser.shift) end def test_custom_field_info_line @parser.convert do |field, info| assert_equal(1, info.line) info.index == 4 ? Float(field).floor : field end assert_equal(["Numbers", ":integer", "1", ":float", 3], @parser.shift) end def test_custom_field_info_header headers = ["one", "two", "three", "four", "five"] @parser = CSV.new(@data, headers: headers) @parser.convert do |field, info| info.header == "three" ? Integer(field) * 100 : field end assert_equal(CSV::Row.new(headers, ["Numbers", ":integer", 100, ":float", "3.015"]), @parser.shift) end def test_custom_blank_field converter = lambda {|field| field.nil?} row = CSV.parse_line('nil,', converters: converter) assert_equal([false, true], row) end def test_nil_value assert_equal(["nil", "", "a"], CSV.parse_line(',"",a', nil_value: "nil")) end def test_empty_value assert_equal([nil, "empty", "a"], CSV.parse_line(',"",a', empty_value: "empty")) end def test_quoted_parse_line row = CSV.parse_line('1,"2",3', converters: @preserving_converter) assert_equal([1, "2", 3], row) end def test_quoted_parse expected = [["quoted", "unquoted"], ["109", 1], ["10A", 2]] rows = CSV.parse(<<~CSV, converters: @preserving_converter) "quoted",unquoted "109",1 "10A",2 CSV assert_equal(expected, rows) end def test_quoted_alternating_quote row = CSV.parse_line('"1",2,"3"', converters: @preserving_converter) assert_equal(['1', 2, '3'], row) end def test_quoted_parse_headers expected = [["quoted", :unquoted], ["109", "1"], ["10A", "2"]] table = CSV.parse(<<~CSV, headers: true, header_converters: @quoted_header_converter) "quoted",unquoted "109",1 "10A",2 CSV assert_equal(expected, table.to_a) end def test_quoted_parse_with_string_headers expected = [["quoted", :unquoted], %w[109 1], %w[10A 2]] table = CSV.parse(<<~CSV, headers: '"quoted",unquoted', header_converters: @quoted_header_converter) "109",1 "10A",2 CSV assert_equal(expected, table.to_a) end end csv-3.3.5/test/csv/parse/test_each.rb000066400000000000000000000007131501670011600174630ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseEach < Test::Unit::TestCase extend DifferentOFS def test_twice data = <<-CSV Ruby,2.6.0,script CSV csv = CSV.new(data) assert_equal([ [["Ruby", "2.6.0", "script"]], [], ], [ csv.to_a, csv.to_a, ]) end end csv-3.3.5/test/csv/parse/test_general.rb000066400000000000000000000250211501670011600201770ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require "timeout" require_relative "../helper" # # Following tests are my interpretation of the # {CSV RCF}[https://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that # document in one place (intentionally) and that is to make the default row # separator $/. # class TestCSVParseGeneral < Test::Unit::TestCase extend DifferentOFS BIG_DATA = "123456789\n" * 512 def test_mastering_regex_example ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K} assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000", "It's \"10 Grand\", baby", "10K" ], CSV.parse_line(ex) ) end # Old Ruby 1.8 CSV library tests. def test_std_lib_csv [ ["\t", ["\t"]], ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]], ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], ["\"\"", [""]], ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], ["foo,\"\r\",baz", ["foo", "\r", "baz"]], ["foo,\"\",baz", ["foo", "", "baz"]], ["\",\"", [","]], ["foo", ["foo"]], [",,", [nil, nil, nil]], [",", [nil, nil]], ["foo,\"\n\",baz", ["foo", "\n", "baz"]], ["foo,,baz", ["foo", nil, "baz"]], ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]], ["\",\",\",\"", [",", ","]], ["foo,bar,", ["foo", "bar", nil]], [",foo,bar", [nil, "foo", "bar"]], ["foo,bar", ["foo", "bar"]], [";", [";"]], ["\t,\t", ["\t", "\t"]], ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]], ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]], [";,;", [";", ";"]] ].each do |csv_test| assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) end [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], ["\"\"", [""]], ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], ["foo,\"\r\",baz", ["foo", "\r", "baz"]], ["foo,\"\",baz", ["foo", "", "baz"]], ["foo", ["foo"]], [",,", [nil, nil, nil]], [",", [nil, nil]], ["foo,\"\n\",baz", ["foo", "\n", "baz"]], ["foo,,baz", ["foo", nil, "baz"]], ["foo,bar", ["foo", "bar"]], ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test| assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) end end # From: [ruby-core:6496] def test_aras_edge_cases [ [%Q{a,b}, ["a", "b"]], [%Q{a,"""b"""}, ["a", "\"b\""]], [%Q{a,"""b"}, ["a", "\"b"]], [%Q{a,"b"""}, ["a", "b\""]], [%Q{a,"\nb"""}, ["a", "\nb\""]], [%Q{a,"""\nb"}, ["a", "\"\nb"]], [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]], [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]], [%Q{a,,,}, ["a", nil, nil, nil]], [%Q{,}, [nil, nil]], [%Q{"",""}, ["", ""]], [%Q{""""}, ["\""]], [%Q{"""",""}, ["\"",""]], [%Q{,""}, [nil,""]], [%Q{,"\r"}, [nil,"\r"]], [%Q{"\r\n,"}, ["\r\n,"]], [%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case| assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) end end def test_james_edge_cases # A read at eof? should return nil. assert_equal(nil, CSV.parse_line("")) # # With Ruby 1.8 CSV it's impossible to tell an empty line from a line # containing a single +nil+ field. The old CSV library returns # [nil] in these cases, but Array.new makes more sense to # me. # assert_equal(Array.new, CSV.parse_line("\n1,2,3\n")) end def test_rob_edge_cases [ [%Q{"a\nb"}, ["a\nb"]], [%Q{"\n\n\n"}, ["\n\n\n"]], [%Q{a,"b\n\nc"}, ['a', "b\n\nc"]], [%Q{,"\r\n"}, [nil,"\r\n"]], [%Q{,"\r\n."}, [nil,"\r\n."]], [%Q{"a\na","one newline"}, ["a\na", 'one newline']], [%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']], [%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']], [%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']], [%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]], ].each do |edge_case| assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) end end def test_non_regex_edge_cases # An early version of the non-regex parser fails this test [ [ "foo,\"foo,bar,baz,foo\",\"foo\"", ["foo", "foo,bar,baz,foo", "foo"] ] ].each do |edge_case| assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) end assert_raise(CSV::MalformedCSVError) do CSV.parse_line("1,\"23\"4\"5\", 6") end end def test_malformed_csv_cr_first_line error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line("1,2\r,3", row_sep: "\n") end assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 1.", error.message) end def test_malformed_csv_cr_middle_line csv = <<-CSV line,1,abc line,2,"def\nghi" line,4,some\rjunk line,5,jkl CSV error = assert_raise(CSV::MalformedCSVError) do CSV.parse(csv) end assert_equal("Unquoted fields do not allow new line <\"\\r\"> in line 4.", error.message) end def test_malformed_csv_unclosed_quote error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line('1,2,"3...') end assert_equal("Unclosed quoted field in line 1.", error.message) end def test_malformed_csv_illegal_quote_middle_line csv = <<-CSV line,1,abc line,2,"def\nghi" line,4,8'10" line,5,jkl CSV error = assert_raise(CSV::MalformedCSVError) do CSV.parse(csv) end assert_equal("Illegal quoting in line 4.", error.message) end def test_the_parse_fails_fast_when_it_can_for_unquoted_fields assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA) end def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA) end def test_field_size_limit_controls_lookahead assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', field_size_limit: 2048 ) end def test_field_size_limit_max_allowed column = "abcde" assert_equal([[column]], CSV.parse("\"#{column}\"", field_size_limit: column.size + 1)) end def test_field_size_limit_quote_simple column = "abcde" assert_parse_errors_out("\"#{column}\"", field_size_limit: column.size) end def test_field_size_limit_no_quote_implicitly column = "abcde" assert_parse_errors_out("#{column}", field_size_limit: column.size) end def test_field_size_limit_no_quote_explicitly column = "abcde" assert_parse_errors_out("#{column}", field_size_limit: column.size, quote_char: nil) end def test_field_size_limit_in_extended_column_not_exceeding data = <<~DATA "a","b" " 2 ","" DATA assert_nothing_raised(CSV::MalformedCSVError) do CSV.parse(data, field_size_limit: 4) end end def test_field_size_limit_in_extended_column_exceeding data = <<~DATA "a","b" " 2345 ","" DATA assert_parse_errors_out(data, field_size_limit: 5) end def test_max_field_size_controls_lookahead assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', max_field_size: 2048 ) end def test_max_field_size_max_allowed column = "abcde" assert_equal([[column]], CSV.parse("\"#{column}\"", max_field_size: column.size)) end def test_max_field_size_quote_simple column = "abcde" assert_parse_errors_out("\"#{column}\"", max_field_size: column.size - 1) end def test_max_field_size_no_quote_implicitly column = "abcde" assert_parse_errors_out("#{column}", max_field_size: column.size - 1) end def test_max_field_size_no_quote_explicitly column = "abcde" assert_parse_errors_out("#{column}", max_field_size: column.size - 1, quote_char: nil) end def test_max_field_size_in_extended_column_not_exceeding data = <<~DATA "a","b" " 2 ","" DATA assert_nothing_raised(CSV::MalformedCSVError) do CSV.parse(data, max_field_size: 3) end end def test_max_field_size_in_extended_column_exceeding data = <<~DATA "a","b" " 2345 ","" DATA assert_parse_errors_out(data, max_field_size: 4) end def test_row_sep_auto_cr assert_equal([["a"]], CSV.parse("a\r")) end def test_row_sep_auto_lf assert_equal([["a"]], CSV.parse("a\n")) end def test_row_sep_auto_cr_lf assert_equal([["a"]], CSV.parse("a\r\n")) end def test_seeked_string_io input_with_bom = StringIO.new("\ufeffあ,い,う\r\na,b,c\r\n") input_with_bom.read(3) assert_equal([ ["あ", "い", "う"], ["a", "b", "c"], ], CSV.new(input_with_bom).each.to_a) end def test_quoted_col_sep_and_empty_line assert_equal([["one,"], [], ["three"]], CSV.parse(<<-CSV)) "one," "three" CSV end private { "YJIT"=>1, # for --yjit-call-threshold=1 "MJIT"=>5, "RJIT"=>5, # for --jit-wait }.any? do |jit, timeout| if (RubyVM.const_defined?(jit) and jit = RubyVM.const_get(jit) and jit.respond_to?(:enabled?) and jit.enabled?) PARSE_ERROR_TIMEOUT = timeout end end PARSE_ERROR_TIMEOUT ||= 0.2 def assert_parse_errors_out(data, timeout: PARSE_ERROR_TIMEOUT, **options) assert_raise(CSV::MalformedCSVError) do Timeout.timeout(timeout) do CSV.parse(data, **options) fail("Parse didn't error out") end end end end csv-3.3.5/test/csv/parse/test_header.rb000066400000000000000000000233171501670011600200200ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVHeaders < Test::Unit::TestCase extend DifferentOFS def setup super @data = <<-CSV first,second,third A,B,C 1,2,3 CSV end def test_first_row [:first_row, true].each do |setting| # two names for the same setting # activate headers csv = nil assert_nothing_raised(Exception) do csv = CSV.parse(@data, headers: setting) end # first data row - skipping headers row = csv[0] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([%w{first A}, %w{second B}, %w{third C}], row.to_a) # second data row row = csv[1] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([%w{first 1}, %w{second 2}, %w{third 3}], row.to_a) # empty assert_nil(csv[2]) end end def test_array_of_headers # activate headers csv = nil assert_nothing_raised(Exception) do csv = CSV.parse(@data, headers: [:my, :new, :headers]) end # first data row - skipping headers row = csv[0] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal( [[:my, "first"], [:new, "second"], [:headers, "third"]], row.to_a ) # second data row row = csv[1] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([[:my, "A"], [:new, "B"], [:headers, "C"]], row.to_a) # third data row row = csv[2] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([[:my, "1"], [:new, "2"], [:headers, "3"]], row.to_a) # empty assert_nil(csv[3]) # with return and convert assert_nothing_raised(Exception) do csv = CSV.parse( @data, headers: [:my, :new, :headers], return_headers: true, header_converters: lambda { |h| h.to_s } ) end row = csv[0] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([["my", :my], ["new", :new], ["headers", :headers]], row.to_a) assert_predicate(row, :header_row?) assert_not_predicate(row, :field_row?) end def test_csv_header_string # activate headers csv = nil assert_nothing_raised(Exception) do csv = CSV.parse(@data, headers: "my,new,headers") end # first data row - skipping headers row = csv[0] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([%w{my first}, %w{new second}, %w{headers third}], row.to_a) # second data row row = csv[1] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([%w{my A}, %w{new B}, %w{headers C}], row.to_a) # third data row row = csv[2] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([%w{my 1}, %w{new 2}, %w{headers 3}], row.to_a) # empty assert_nil(csv[3]) # with return and convert assert_nothing_raised(Exception) do csv = CSV.parse( @data, headers: "my,new,headers", return_headers: true, header_converters: :symbol ) end row = csv[0] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([[:my, "my"], [:new, "new"], [:headers, "headers"]], row.to_a) assert_predicate(row, :header_row?) assert_not_predicate(row, :field_row?) end def test_csv_header_string_inherits_separators # parse with custom col_sep csv = nil assert_nothing_raised(Exception) do csv = CSV.parse( @data.tr(",", "|"), col_sep: "|", headers: "my|new|headers" ) end # verify headers were recognized row = csv[0] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([%w{my first}, %w{new second}, %w{headers third}], row.to_a) end def test_return_headers # activate headers and request they are returned csv = nil assert_nothing_raised(Exception) do csv = CSV.parse(@data, headers: true, return_headers: true) end # header row row = csv[0] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal( [%w{first first}, %w{second second}, %w{third third}], row.to_a ) assert_predicate(row, :header_row?) assert_not_predicate(row, :field_row?) # first data row - skipping headers row = csv[1] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([%w{first A}, %w{second B}, %w{third C}], row.to_a) assert_not_predicate(row, :header_row?) assert_predicate(row, :field_row?) # second data row row = csv[2] assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([%w{first 1}, %w{second 2}, %w{third 3}], row.to_a) assert_not_predicate(row, :header_row?) assert_predicate(row, :field_row?) # empty assert_nil(csv[3]) end def test_converters # create test data where headers and fields look alike data = <<-CSV 1,2,3 1,2,3 CSV # normal converters do not affect headers csv = CSV.parse( data, headers: true, return_headers: true, converters: :numeric ) assert_equal([%w{1 1}, %w{2 2}, %w{3 3}], csv[0].to_a) assert_equal([["1", 1], ["2", 2], ["3", 3]], csv[1].to_a) assert_nil(csv[2]) # header converters do affect headers (only) assert_nothing_raised(Exception) do csv = CSV.parse( data, headers: true, return_headers: true, converters: :numeric, header_converters: :symbol ) end assert_equal([[:"1", "1"], [:"2", "2"], [:"3", "3"]], csv[0].to_a) assert_equal([[:"1", 1], [:"2", 2], [:"3", 3]], csv[1].to_a) assert_nil(csv[2]) end def test_builtin_downcase_converter csv = CSV.parse( "One,TWO Three", headers: true, return_headers: true, header_converters: :downcase ) assert_equal(%w{one two\ three}, csv.headers) end def test_builtin_symbol_converter # Note that the trailing space is intentional csv = CSV.parse( "One,TWO Three ", headers: true, return_headers: true, header_converters: :symbol ) assert_equal([:one, :two_three], csv.headers) end def test_builtin_symbol_raw_converter csv = CSV.parse( "a b,c d", headers: true, return_headers: true, header_converters: :symbol_raw ) assert_equal([:"a b", :"c d"], csv.headers) end def test_builtin_symbol_converter_with_punctuation csv = CSV.parse( "One, Two & Three ($)", headers: true, return_headers: true, header_converters: :symbol ) assert_equal([:one, :two_three], csv.headers) end def test_builtin_converters_with_blank_header csv = CSV.parse( "one,,three", headers: true, return_headers: true, header_converters: [:downcase, :symbol, :symbol_raw] ) assert_equal([:one, nil, :three], csv.headers) end def test_custom_converter converter = lambda { |header| header.tr(" ", "_") } csv = CSV.parse( "One,TWO Three", headers: true, return_headers: true, header_converters: converter ) assert_equal(%w{One TWO_Three}, csv.headers) end def test_table_support csv = nil assert_nothing_raised(Exception) do csv = CSV.parse(@data, headers: true) end assert_instance_of(CSV::Table, csv) end def test_skip_blanks @data = <<-CSV A,B,C 1,2,3 CSV expected = [%w[1 2 3]] CSV.parse(@data, headers: true, skip_blanks: true) do |row| assert_equal(expected.shift, row.fields) end expected = [%w[A B C], %w[1 2 3]] CSV.parse( @data, headers: true, return_headers: true, skip_blanks: true ) do |row| assert_equal(expected.shift, row.fields) end end def test_headers_reader # no headers assert_nil(CSV.new(@data).headers) # headers csv = CSV.new(@data, headers: true) assert_equal(true, csv.headers) # before headers are read csv.shift # set headers assert_equal(%w[first second third], csv.headers) # after headers are read end def test_blank_row @data += "\n#{@data}" # add a blank row # ensure that everything returned is a Row object CSV.parse(@data, headers: true) do |row| assert_instance_of(CSV::Row, row) end end def test_nil_row_header @data = <<-CSV A 1 CSV csv = CSV.parse(@data, headers: true) # ensure nil row creates Row object with headers row = csv[0] assert_equal([["A"], [nil]], [row.headers, row.fields]) end def test_parse_empty assert_equal(CSV::Table.new([]), CSV.parse("", headers: true)) end def test_parse_empty_line assert_equal(CSV::Table.new([]), CSV.parse("\n", headers: true)) end def test_specified_empty assert_equal(CSV::Table.new([], headers: ["header1"]), CSV.parse("", headers: ["header1"])) end def test_specified_empty_line assert_equal(CSV::Table.new([CSV::Row.new(["header1"], [])], headers: ["header1"]), CSV.parse("\n", headers: ["header1"])) end end csv-3.3.5/test/csv/parse/test_inputs_scanner.rb000066400000000000000000000055571501670011600216310ustar00rootroot00000000000000require_relative "../helper" class TestCSVParseInputsScanner < Test::Unit::TestCase include CSVHelper def test_scan_keep_nested_back input = CSV::Parser::UnoptimizedStringIO.new("abcdef") scanner = CSV::Parser::InputsScanner.new([input], Encoding::UTF_8, nil) scanner.keep_start assert_equal("abc", scanner.scan_all(/[a-c]+/)) scanner.keep_start assert_equal("def", scanner.scan_all(/[d-f]+/)) scanner.keep_back scanner.keep_back assert_equal("abcdef", scanner.scan_all(/[a-f]+/)) end def test_scan_keep_over_chunks_nested_back input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") scanner = CSV::Parser::InputsScanner.new([input], Encoding::UTF_8, nil, chunk_size: 2) scanner.keep_start assert_equal("abc", scanner.scan_all(/[a-c]+/)) scanner.keep_start assert_equal("def", scanner.scan_all(/[d-f]+/)) scanner.keep_back scanner.keep_back assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) end def test_scan_keep_over_chunks_nested_drop_back input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") scanner = CSV::Parser::InputsScanner.new([input], Encoding::UTF_8, nil, chunk_size: 3) scanner.keep_start assert_equal("ab", scanner.scan(/../)) scanner.keep_start assert_equal("c", scanner.scan(/./)) assert_equal("d", scanner.scan(/./)) scanner.keep_drop scanner.keep_back assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) end def test_each_line_keep_over_chunks_multibyte input = CSV::Parser::UnoptimizedStringIO.new("ab\n\u{3000}a\n") scanner = CSV::Parser::InputsScanner.new([input], Encoding::UTF_8, nil, chunk_size: 1) each_line = scanner.each_line("\n") assert_equal("ab\n", each_line.next) scanner.keep_start assert_equal("\u{3000}a\n", each_line.next) scanner.keep_back assert_equal("\u{3000}a\n", scanner.scan_all(/[^,]+/)) end def test_each_line_keep_over_chunks_fit_chunk_size input = CSV::Parser::UnoptimizedStringIO.new("\na") scanner = CSV::Parser::InputsScanner.new([input], Encoding::UTF_8, nil, chunk_size: 1) each_line = scanner.each_line("\n") assert_equal("\n", each_line.next) scanner.keep_start assert_equal("a", each_line.next) scanner.keep_back end end csv-3.3.5/test/csv/parse/test_invalid.rb000066400000000000000000000025431501670011600202140ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseInvalid < Test::Unit::TestCase def test_no_column_mixed_new_lines error = assert_raise(CSV::MalformedCSVError) do CSV.parse("\n" + "\r") end assert_equal("New line must be <\"\\n\"> not <\"\\r\"> in line 2.", error.message) end def test_ignore_invalid_line csv = CSV.new(<<-CSV, headers: true, return_headers: true) head1,head2,head3 aaa,bbb,ccc ddd,ee"e.fff ggg,hhh,iii CSV headers = ["head1", "head2", "head3"] assert_equal(CSV::Row.new(headers, headers), csv.shift) assert_equal(CSV::Row.new(headers, ["aaa", "bbb", "ccc"]), csv.shift) assert_equal(false, csv.eof?) error = assert_raise(CSV::MalformedCSVError) do csv.shift end assert_equal("Illegal quoting in line 3.", error.message) assert_equal(false, csv.eof?) assert_equal(CSV::Row.new(headers, ["ggg", "hhh", "iii"]), csv.shift) assert_equal(true, csv.eof?) end def test_ignore_invalid_line_cr_lf data = <<-CSV "1","OK"\r "2",""NOT" OK"\r "3","OK"\r CSV csv = CSV.new(data) assert_equal(['1', 'OK'], csv.shift) assert_raise(CSV::MalformedCSVError) { csv.shift } assert_equal(['3', 'OK'], csv.shift) end end csv-3.3.5/test/csv/parse/test_liberal_parsing.rb000066400000000000000000000131271501670011600217230ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseLiberalParsing < Test::Unit::TestCase extend DifferentOFS def test_middle_quote_start input = '"Johnson, Dwayne",Dwayne "The Rock" Johnson' error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line(input) end assert_equal("Illegal quoting in line 1.", error.message) assert_equal(["Johnson, Dwayne", 'Dwayne "The Rock" Johnson'], CSV.parse_line(input, liberal_parsing: true)) end def test_middle_quote_end input = '"quoted" field' error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line(input) end assert_equal("Any value after quoted field isn't allowed in line 1.", error.message) assert_equal(['"quoted" field'], CSV.parse_line(input, liberal_parsing: true)) end def test_endline_after_quoted_field_end csv = CSV.new("A\r\n\"B\"\nC\r\n", liberal_parsing: true) assert_equal(["A"], csv.gets) error = assert_raise(CSV::MalformedCSVError) do csv.gets end assert_equal('Illegal end-of-line sequence outside of a quoted field <"\n"> in line 2.', error.message) assert_equal(["C"], csv.gets) end def test_quote_after_column_separator error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true) end assert_equal("Unclosed quoted field in line 1.", error.message) end def test_quote_before_column_separator assert_equal(["is", 'this "three', ' or four"', "fields"], CSV.parse_line('is,this "three, or four",fields', liberal_parsing: true)) end def test_backslash_quote assert_equal([ "1", "\"Hamlet says, \\\"Seems", "\\\" madam! Nay it is; I know not \\\"seems.\\\"\"", ], CSV.parse_line('1,' + '"Hamlet says, \"Seems,' + '\" madam! Nay it is; I know not \"seems.\""', liberal_parsing: true)) end def test_space_quote input = <<~CSV Los Angeles, 34°03'N, 118°15'W New York City, 40°42'46"N, 74°00'21"W Paris, 48°51'24"N, 2°21'03"E CSV assert_equal( [ ["Los Angeles", " 34°03'N", " 118°15'W"], ["New York City", " 40°42'46\"N", " 74°00'21\"W"], ["Paris", " 48°51'24\"N", " 2°21'03\"E"], ], CSV.parse(input, liberal_parsing: true)) end def test_double_quote_outside_quote data = %Q{a,""b""} error = assert_raise(CSV::MalformedCSVError) do CSV.parse(data) end assert_equal("Any value after quoted field isn't allowed in line 1.", error.message) assert_equal([ [["a", %Q{""b""}]], [["a", %Q{"b"}]], ], [ CSV.parse(data, liberal_parsing: true), CSV.parse(data, liberal_parsing: { double_quote_outside_quote: true, }), ]) end class TestBackslashQuote < Test::Unit::TestCase extend ::DifferentOFS def test_double_quote_outside_quote data = %Q{a,""b""} assert_equal([ [["a", %Q{""b""}]], [["a", %Q{"b"}]], ], [ CSV.parse(data, liberal_parsing: { backslash_quote: true }), CSV.parse(data, liberal_parsing: { backslash_quote: true, double_quote_outside_quote: true }), ]) end def test_unquoted_value data = %q{\"\"a\"\"} assert_equal([ [[%q{\"\"a\"\"}]], [[%q{""a""}]], ], [ CSV.parse(data, liberal_parsing: true), CSV.parse(data, liberal_parsing: { backslash_quote: true }), ]) end def test_unquoted_value_multiple_characters_col_sep data = %q{a<\\"b<=>x} assert_equal([[%Q{a<"b}, "x"]], CSV.parse(data, col_sep: "<=>", liberal_parsing: { backslash_quote: true })) end def test_quoted_value data = %q{"\"\"a\"\""} assert_equal([ [[%q{"\"\"a\"\""}]], [[%q{""a""}]], [[%q{""a""}]], ], [ CSV.parse(data, liberal_parsing: true), CSV.parse(data, liberal_parsing: { backslash_quote: true }), CSV.parse(data, liberal_parsing: { backslash_quote: true, double_quote_outside_quote: true }), ]) end end end csv-3.3.5/test/csv/parse/test_quote_char_nil.rb000066400000000000000000000045511501670011600215630ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseQuoteCharNil < Test::Unit::TestCase extend DifferentOFS def test_full assert_equal(["a", "b"], CSV.parse_line(%Q{a,b}, quote_char: nil)) end def test_end_with_nil assert_equal(["a", nil, nil, nil], CSV.parse_line(%Q{a,,,}, quote_char: nil)) end def test_nil_nil assert_equal([nil, nil], CSV.parse_line(%Q{,}, quote_char: nil)) end def test_unquoted_value_multiple_characters_col_sep data = %q{ax} assert_equal([[%Q{a", quote_char: nil)) end def test_csv_header_string data = <<~DATA first,second,third A,B,C 1,2,3 DATA assert_equal( CSV::Table.new([ CSV::Row.new(["my", "new", "headers"], ["first", "second", "third"]), CSV::Row.new(["my", "new", "headers"], ["A", "B", "C"]), CSV::Row.new(["my", "new", "headers"], ["1", "2", "3"]) ]), CSV.parse(data, headers: "my,new,headers", quote_char: nil) ) end def test_comma assert_equal([["a", "b", nil, "d"]], CSV.parse("a,b,,d", col_sep: ",", quote_char: nil)) end def test_space assert_equal([["a", "b", nil, "d"]], CSV.parse("a b d", col_sep: " ", quote_char: nil)) end def encode_array(array, encoding) array.collect do |element| element ? element.encode(encoding) : element end end def test_space_no_ascii encoding = Encoding::UTF_16LE assert_equal([encode_array(["a", "b", nil, "d"], encoding)], CSV.parse("a b d".encode(encoding), col_sep: " ".encode(encoding), quote_char: nil)) end def test_multiple_space assert_equal([["a b", nil, "d"]], CSV.parse("a b d", col_sep: " ", quote_char: nil)) end def test_multiple_characters_leading_empty_fields data = <<-CSV <=><=>A<=>B<=>C 1<=>2<=>3 CSV assert_equal([ [nil, nil, "A", "B", "C"], ["1", "2", "3"], ], CSV.parse(data, col_sep: "<=>", quote_char: nil)) end def test_line lines = [ "abc,def\n", ] csv = CSV.new(lines.join(""), quote_char: nil) lines.each do |line| csv.shift assert_equal(line, csv.line) end end end csv-3.3.5/test/csv/parse/test_read.rb000066400000000000000000000007521501670011600175010ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseRead < Test::Unit::TestCase extend DifferentOFS def test_shift data = <<-CSV 1 2 3 CSV csv = CSV.new(data) assert_equal([ ["1"], [["2"], ["3"]], nil, ], [ csv.shift, csv.read, csv.shift, ]) end end csv-3.3.5/test/csv/parse/test_rewind.rb000066400000000000000000000017151501670011600200560ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseRewind < Test::Unit::TestCase extend DifferentOFS def parse(data, **options) csv = CSV.new(data, **options) records = csv.to_a csv.rewind [records, csv.to_a] end def test_default data = <<-CSV Ruby,2.6.0,script CSV assert_equal([ [["Ruby", "2.6.0", "script"]], [["Ruby", "2.6.0", "script"]], ], parse(data)) end def test_have_headers data = <<-CSV Language,Version,Type Ruby,2.6.0,script CSV assert_equal([ [CSV::Row.new(["Language", "Version", "Type"], ["Ruby", "2.6.0", "script"])], [CSV::Row.new(["Language", "Version", "Type"], ["Ruby", "2.6.0", "script"])], ], parse(data, headers: true)) end end csv-3.3.5/test/csv/parse/test_row_separator.rb000066400000000000000000000005441501670011600214540ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseRowSeparator < Test::Unit::TestCase extend DifferentOFS include CSVHelper def test_multiple_characters with_chunk_size("1") do assert_equal([["a"], ["b"]], CSV.parse("a\r\nb\r\n", row_sep: "\r\n")) end end end csv-3.3.5/test/csv/parse/test_skip_lines.rb000066400000000000000000000056141501670011600207300ustar00rootroot00000000000000# frozen_string_literal: false require_relative "../helper" class TestCSVParseSkipLines < Test::Unit::TestCase extend DifferentOFS include CSVHelper def test_default csv = CSV.new("a,b,c\n") assert_nil(csv.skip_lines) end def parse(data, **options) # We use Tempfile here to use CSV::Parser::InputsScanner. Tempfile.open(["csv-", ".csv"]) do |file| file.binmode file.print(data) file.close CSV.open(file, **options) do |csv| csv.read end end end def test_regexp csv = <<-CSV 1 #2 #3 4 CSV assert_equal([ ["1"], ["4"], ], parse(csv, :skip_lines => /\A\s*#/)) end def test_regexp_quoted csv = <<-CSV 1 #2 "#3" 4 CSV assert_equal([ ["1"], ["#3"], ["4"], ], parse(csv, :skip_lines => /\A\s*#/)) end def test_string csv = <<-CSV 1 .2 3. 4 CSV assert_equal([ ["1"], ["4"], ], parse(csv, :skip_lines => ".")) end class RegexStub end def test_not_matchable regex_stub = RegexStub.new csv = CSV.new("1\n", :skip_lines => regex_stub) error = assert_raise(ArgumentError) do csv.shift end assert_equal(":skip_lines has to respond to #match: #{regex_stub.inspect}", error.message) end class Matchable def initialize(pattern) @pattern = pattern end def match(line) @pattern.match(line) end end def test_matchable csv = <<-CSV 1 # 2 3 # 4 CSV assert_equal([ ["1"], ["3"], ], parse(csv, :skip_lines => Matchable.new(/\A#/))) end def test_multibyte_data # U+3042 HIRAGANA LETTER A # U+3044 HIRAGANA LETTER I # U+3046 HIRAGANA LETTER U value = "\u3042\u3044\u3046" with_chunk_size("5") do assert_equal([[value], [value]], parse("#{value}\n#{value}\n", :skip_lines => /\A#/)) end end def test_empty_line_and_liberal_parsing assert_equal([["a", "b"]], parse("a,b\n", :liberal_parsing => true, :skip_lines => /^$/)) end def test_crlf assert_equal([["a", "b"]], parse("a,b\r\n,\r\n", :skip_lines => /^,+$/)) end def test_crlf_strip_no_last_crlf assert_equal([["a"], ["b"]], parse("a\r\nb", row_sep: "\r\n", skip_lines: /^ *$/, strip: true)) end def test_crlf_quoted_lf assert_equal([["\n", ""]], parse("\"\n\",\"\"\r\n", row_sep: "\r\n", skip_lines: /not matched/)) end end csv-3.3.5/test/csv/parse/test_strip.rb000066400000000000000000000056701501670011600177330ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseStrip < Test::Unit::TestCase extend DifferentOFS def test_both assert_equal(["a", "b"], CSV.parse_line(%Q{ a , b }, strip: true)) end def test_left assert_equal(["a", "b"], CSV.parse_line(%Q{ a, b}, strip: true)) end def test_right assert_equal(["a", "b"], CSV.parse_line(%Q{a ,b }, strip: true)) end def test_middle assert_equal(["a b"], CSV.parse_line(%Q{a b}, strip: true)) end def test_quoted assert_equal([" a ", " b "], CSV.parse_line(%Q{" a "," b "}, strip: true)) end def test_liberal_parsing assert_equal([" a ", "b", " c ", " d "], CSV.parse_line(%Q{" a ", b , " c "," d " }, strip: true, liberal_parsing: true)) end def test_string assert_equal(["a", " b"], CSV.parse_line(%Q{ a , " b" }, strip: " ")) end def test_no_quote assert_equal([" a ", " b "], CSV.parse_line(%Q{" a ", b }, strip: %Q{"}, quote_char: nil)) end def test_do_not_strip_cr assert_equal([ ["a", "b "], ["a", "b "], ], CSV.parse(%Q{"a" ,"b " \r} + %Q{"a" ,"b " \r}, strip: true)) end def test_do_not_strip_lf assert_equal([ ["a", "b "], ["a", "b "], ], CSV.parse(%Q{"a" ,"b " \n} + %Q{"a" ,"b " \n}, strip: true)) end def test_do_not_strip_crlf assert_equal([ ["a", "b "], ["a", "b "], ], CSV.parse(%Q{"a" ,"b " \r\n} + %Q{"a" ,"b " \r\n}, strip: true)) end def test_col_sep_incompatible_true message = "The provided strip (true) and " \ "col_sep (\\t) options are incompatible." assert_raise_with_message(ArgumentError, message) do CSV.parse_line(%Q{"a"\t"b"\n}, col_sep: "\t", strip: true) end end def test_col_sep_incompatible_string message = "The provided strip (\\t) and " \ "col_sep (\\t) options are incompatible." assert_raise_with_message(ArgumentError, message) do CSV.parse_line(%Q{"a"\t"b"\n}, col_sep: "\t", strip: "\t") end end def test_col_sep_compatible_string assert_equal( ["a", "b"], CSV.parse_line(%Q{\va\tb\v\n}, col_sep: "\t", strip: "\v") ) end end csv-3.3.5/test/csv/parse/test_unconverted_fields.rb000066400000000000000000000066401501670011600224520ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" class TestCSVParseUnconvertedFields < Test::Unit::TestCase extend DifferentOFS def setup super @custom = lambda {|field| /\A:(\S.*?)\s*\Z/ =~ field ? $1.to_sym : field} @headers = ["first", "second", "third"] @data = <<-CSV first,second,third 1,2,3 CSV end def test_custom row = CSV.parse_line("Numbers,:integer,1,:float,3.015", converters: [:numeric, @custom], unconverted_fields: true) assert_equal([ ["Numbers", :integer, 1, :float, 3.015], ["Numbers", ":integer", "1", ":float", "3.015"], ], [ row, row.unconverted_fields, ]) end def test_no_fields row = CSV.parse_line("\n", converters: [:numeric, @custom], unconverted_fields: true) assert_equal([ [], [], ], [ row, row.unconverted_fields, ]) end def test_parsed_header row = CSV.parse_line(@data, converters: :numeric, unconverted_fields: true, headers: :first_row) assert_equal([ CSV::Row.new(@headers, [1, 2, 3]), ["1", "2", "3"], ], [ row, row.unconverted_fields, ]) end def test_return_headers row = CSV.parse_line(@data, converters: :numeric, unconverted_fields: true, headers: :first_row, return_headers: true) assert_equal([ CSV::Row.new(@headers, @headers), @headers, ], [ row, row.unconverted_fields, ]) end def test_header_converters row = CSV.parse_line(@data, converters: :numeric, unconverted_fields: true, headers: :first_row, return_headers: true, header_converters: :symbol) assert_equal([ CSV::Row.new(@headers.collect(&:to_sym), @headers), @headers, ], [ row, row.unconverted_fields, ]) end def test_specified_headers row = CSV.parse_line("\n", converters: :numeric, unconverted_fields: true, headers: %w{my new headers}, return_headers: true, header_converters: :symbol) assert_equal([ CSV::Row.new([:my, :new, :headers], ["my", "new", "headers"]), [], ], [ row, row.unconverted_fields, ]) end end csv-3.3.5/test/csv/test_data_converters.rb000066400000000000000000000251301501670011600206340ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "helper" class TestCSVDataConverters < Test::Unit::TestCase extend DifferentOFS def setup super @win_safe_time_str = Time.now.strftime("%a %b %d %H:%M:%S %Y") end def test_builtin_integer_converter # does convert [-5, 1, 10000000000].each do |n| assert_equal(n, CSV::Converters[:integer][n.to_s]) end # does not convert (%w{junk 1.0} + [""]).each do |str| assert_equal(str, CSV::Converters[:integer][str]) end end def test_builtin_float_converter # does convert [-5.1234, 0, 2.3e-11].each do |n| assert_equal(n, CSV::Converters[:float][n.to_s]) end # does not convert (%w{junk 1..0 .015F} + [""]).each do |str| assert_equal(str, CSV::Converters[:float][str]) end end def test_builtin_date_converter # does convert assert_instance_of( Date, CSV::Converters[:date][@win_safe_time_str.sub(/\d+:\d+:\d+ /, "")] ) # does not convert assert_instance_of(String, CSV::Converters[:date]["junk"]) end def test_builtin_date_time_converter # does convert assert_instance_of( DateTime, CSV::Converters[:date_time][@win_safe_time_str] ) # does not convert assert_instance_of(String, CSV::Converters[:date_time]["junk"]) end def test_builtin_date_time_converter_iso8601_date iso8601_string = "2018-01-14" datetime = DateTime.new(2018, 1, 14) assert_equal(datetime, CSV::Converters[:date_time][iso8601_string]) end def test_builtin_date_time_converter_iso8601_minute iso8601_string = "2018-01-14T22:25" datetime = DateTime.new(2018, 1, 14, 22, 25) assert_equal(datetime, CSV::Converters[:date_time][iso8601_string]) end def test_builtin_date_time_converter_iso8601_second iso8601_string = "2018-01-14T22:25:19" datetime = DateTime.new(2018, 1, 14, 22, 25, 19) assert_equal(datetime, CSV::Converters[:date_time][iso8601_string]) end def test_builtin_date_time_converter_iso8601_under_second iso8601_string = "2018-01-14T22:25:19.1" datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) assert_equal(datetime, CSV::Converters[:date_time][iso8601_string]) end def test_builtin_date_time_converter_iso8601_under_second_offset iso8601_string = "2018-01-14T22:25:19.1+09:00" datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") assert_equal(datetime, CSV::Converters[:date_time][iso8601_string]) end def test_builtin_date_time_converter_iso8601_offset iso8601_string = "2018-01-14T22:25:19+09:00" datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") assert_equal(datetime, CSV::Converters[:date_time][iso8601_string]) end def test_builtin_date_time_converter_iso8601_utc iso8601_string = "2018-01-14T22:25:19Z" datetime = DateTime.new(2018, 1, 14, 22, 25, 19) assert_equal(datetime, CSV::Converters[:date_time][iso8601_string]) end def test_builtin_date_time_converter_rfc3339_minute rfc3339_string = "2018-01-14 22:25" datetime = DateTime.new(2018, 1, 14, 22, 25) assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_second rfc3339_string = "2018-01-14 22:25:19" datetime = DateTime.new(2018, 1, 14, 22, 25, 19) assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_under_second rfc3339_string = "2018-01-14 22:25:19.1" datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_under_second_offset rfc3339_string = "2018-01-14 22:25:19.1+09:00" datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_offset rfc3339_string = "2018-01-14 22:25:19+09:00" datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_utc rfc3339_string = "2018-01-14 22:25:19Z" datetime = DateTime.new(2018, 1, 14, 22, 25, 19) assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_tab_minute rfc3339_string = "2018-01-14\t22:25" datetime = DateTime.new(2018, 1, 14, 22, 25) assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_tab_second rfc3339_string = "2018-01-14\t22:25:19" datetime = DateTime.new(2018, 1, 14, 22, 25, 19) assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_tab_under_second rfc3339_string = "2018-01-14\t22:25:19.1" datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_tab_under_second_offset rfc3339_string = "2018-01-14\t22:25:19.1+09:00" datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_tab_offset rfc3339_string = "2018-01-14\t22:25:19+09:00" datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_date_time_converter_rfc3339_tab_utc rfc3339_string = "2018-01-14\t22:25:19Z" datetime = DateTime.new(2018, 1, 14, 22, 25, 19) assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end def test_builtin_time_converter # does convert assert_instance_of(Time, CSV::Converters[:time][@win_safe_time_str]) # does not convert assert_instance_of(String, CSV::Converters[:time]["junk"]) end def test_builtin_time_converter_iso8601_date iso8601_string = "2018-01-14" time = Time.new(2018, 1, 14) assert_equal(time, CSV::Converters[:time][iso8601_string]) end def test_builtin_time_converter_iso8601_minute iso8601_string = "2018-01-14T22:25" time = Time.new(2018, 1, 14, 22, 25) assert_equal(time, CSV::Converters[:time][iso8601_string]) end def test_builtin_time_converter_iso8601_second iso8601_string = "2018-01-14T22:25:19" time = Time.new(2018, 1, 14, 22, 25, 19) assert_equal(time, CSV::Converters[:time][iso8601_string]) end def test_builtin_time_converter_iso8601_under_second iso8601_string = "2018-01-14T22:25:19.1" time = Time.new(2018, 1, 14, 22, 25, 19.1r) assert_equal(time, CSV::Converters[:time][iso8601_string]) end def test_builtin_time_converter_iso8601_under_second_offset iso8601_string = "2018-01-14T22:25:19.1+09:00" time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") assert_equal(time, CSV::Converters[:time][iso8601_string]) end def test_builtin_time_converter_iso8601_offset iso8601_string = "2018-01-14T22:25:19+09:00" time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") assert_equal(time, CSV::Converters[:time][iso8601_string]) end def test_builtin_time_converter_iso8601_utc iso8601_string = "2018-01-14T22:25:19Z" time = Time.utc(2018, 1, 14, 22, 25, 19) assert_equal(time, CSV::Converters[:time][iso8601_string]) end def test_builtin_time_converter_rfc3339_minute rfc3339_string = "2018-01-14 22:25" time = Time.new(2018, 1, 14, 22, 25) assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_second rfc3339_string = "2018-01-14 22:25:19" time = Time.new(2018, 1, 14, 22, 25, 19) assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_under_second rfc3339_string = "2018-01-14 22:25:19.1" time = Time.new(2018, 1, 14, 22, 25, 19.1r) assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_under_second_offset rfc3339_string = "2018-01-14 22:25:19.1+09:00" time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_offset rfc3339_string = "2018-01-14 22:25:19+09:00" time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_utc rfc3339_string = "2018-01-14 22:25:19Z" time = Time.utc(2018, 1, 14, 22, 25, 19) assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_tab_minute rfc3339_string = "2018-01-14\t22:25" time = Time.new(2018, 1, 14, 22, 25) assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_tab_second rfc3339_string = "2018-01-14\t22:25:19" time = Time.new(2018, 1, 14, 22, 25, 19) assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_tab_under_second rfc3339_string = "2018-01-14\t22:25:19.1" time = Time.new(2018, 1, 14, 22, 25, 19.1r) assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_tab_under_second_offset rfc3339_string = "2018-01-14\t22:25:19.1+09:00" time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_tab_offset rfc3339_string = "2018-01-14\t22:25:19+09:00" time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") assert_equal(time, CSV::Converters[:time][rfc3339_string]) end def test_builtin_time_converter_rfc3339_tab_utc rfc3339_string = "2018-01-14\t22:25:19Z" time = Time.utc(2018, 1, 14, 22, 25, 19) assert_equal(time, CSV::Converters[:time][rfc3339_string]) end end csv-3.3.5/test/csv/test_encodings.rb000066400000000000000000000324121501670011600174230ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "helper" class TestCSVEncodings < Test::Unit::TestCase extend DifferentOFS include CSVHelper def setup super require 'tempfile' @temp_csv_file = Tempfile.new(%w"test_csv. .csv") @temp_csv_path = @temp_csv_file.path @temp_csv_file.close end def teardown @temp_csv_file.close! super end ######################################## ### Hand Test Some Popular Encodings ### ######################################## def test_parses_utf8_encoding assert_parses( [ %w[ one two … ], %w[ 1 … 3 ], %w[ … 5 6 ] ], "UTF-8" ) end def test_parses_latin1_encoding assert_parses( [ %w[ one two Résumé ], %w[ 1 Résumé 3 ], %w[ Résumé 5 6 ] ], "ISO-8859-1" ) end def test_parses_utf16be_encoding assert_parses( [ %w[ one two … ], %w[ 1 … 3 ], %w[ … 5 6 ] ], "UTF-16BE" ) end def test_parses_shift_jis_encoding assert_parses( [ %w[ 一 二 三 ], %w[ 四 五 六 ], %w[ 七 八 九 ] ], "Shift_JIS" ) end ########################################################### ### Try Simple Reading for All Non-dummy Ruby Encodings ### ########################################################### def test_reading_with_most_encodings each_encoding do |encoding| begin assert_parses( [ %w[ abc def ], %w[ ghi jkl ] ], encoding ) rescue Encoding::ConverterNotFoundError fail("Failed to support #{encoding.name}.") end end end def test_regular_expression_escaping each_encoding do |encoding| begin assert_parses( [ %w[ abc def ], %w[ ghi jkl ] ], encoding, col_sep: "|" ) rescue Encoding::ConverterNotFoundError fail("Failed to properly escape #{encoding.name}.") end end end def test_read_with_default_encoding data = "abc" default_external = Encoding.default_external each_encoding do |encoding| File.open(@temp_csv_path, "wb", encoding: encoding) {|f| f << data} begin no_warnings do Encoding.default_external = encoding end result = CSV.read(@temp_csv_path)[0][0] ensure no_warnings do Encoding.default_external = default_external end end assert_equal(encoding, result.encoding) end end ####################################################################### ### Stress Test ASCII Compatible and Non-ASCII Compatible Encodings ### ####################################################################### def test_auto_line_ending_detection # arrange data to place a \r at the end of CSV's read ahead point encode_for_tests([["a" * 509]], row_sep: "\r\n") do |data| assert_equal("\r\n".encode(data.encoding), CSV.new(data).row_sep) end end def test_csv_chars_are_transcoded encode_for_tests([%w[abc def]]) do |data| %w[col_sep row_sep quote_char].each do |csv_char| assert_equal( "|".encode(data.encoding), CSV.new(data, csv_char.to_sym => "|").send(csv_char) ) end end end def test_parser_works_with_encoded_headers encode_for_tests([%w[one two three], %w[1 2 3]]) do |data| parsed = CSV.parse(data, headers: true) assert_all?(parsed.headers, "Wrong data encoding.") {|h| h.encoding == data.encoding} parsed.each do |row| assert_all?(row.fields, "Wrong data encoding.") {|f| f.encoding == data.encoding} end end end def test_built_in_converters_transcode_to_utf_8_then_convert encode_for_tests([%w[one two three], %w[1 2 3]]) do |data| parsed = CSV.parse(data, converters: :integer) assert_all?(parsed[0], "Wrong data encoding.") {|f| f.encoding == data.encoding} assert_equal([1, 2, 3], parsed[1]) end end def test_built_in_header_converters_transcode_to_utf_8_then_convert encode_for_tests([%w[one two three], %w[1 2 3]]) do |data| parsed = CSV.parse( data, headers: true, header_converters: :downcase ) assert_all?(parsed.headers, "Wrong data encoding.") {|h| h.encoding.name == "UTF-8"} assert_all?(parsed[0].fields, "Wrong data encoding.") {|f| f.encoding == data.encoding} end end def test_open_allows_you_to_set_encodings encode_for_tests([%w[abc def]]) do |data| # read and write in encoding File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data } CSV.open(@temp_csv_path, "rb:#{data.encoding.name}") do |csv| csv.each do |row| assert_all?(row, "Wrong data encoding.") {|f| f.encoding == data.encoding} end end # read and write with transcoding File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f| f << data end CSV.open(@temp_csv_path, "rb:UTF-32BE:#{data.encoding.name}") do |csv| csv.each do |row| assert_all?(row, "Wrong data encoding.") {|f| f.encoding == data.encoding} end end end end def test_foreach_allows_you_to_set_encodings encode_for_tests([%w[abc def]]) do |data| # read and write in encoding File.open(@temp_csv_path, "wb", encoding: data.encoding) { |f| f << data } CSV.foreach(@temp_csv_path, encoding: data.encoding) do |row| row.each {|f| assert_equal(f.encoding, data.encoding)} end # read and write with transcoding File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f| f << data end CSV.foreach( @temp_csv_path, encoding: "UTF-32BE:#{data.encoding.name}" ) do |row| assert_all?(row, "Wrong data encoding.") {|f| f.encoding == data.encoding} end end end def test_read_allows_you_to_set_encodings encode_for_tests([%w[abc def]]) do |data| # read and write in encoding File.open(@temp_csv_path, "wb:#{data.encoding.name}") { |f| f << data } rows = CSV.read(@temp_csv_path, encoding: data.encoding.name) assert_all?(rows.flatten, "Wrong data encoding.") {|f| f.encoding == data.encoding} # read and write with transcoding File.open(@temp_csv_path, "wb:UTF-32BE:#{data.encoding.name}") do |f| f << data end rows = CSV.read( @temp_csv_path, encoding: "UTF-32BE:#{data.encoding.name}" ) assert_all?(rows.flatten, "Wrong data encoding.") {|f| f.encoding == data.encoding} end end ################################# ### Write CSV in any Encoding ### ################################# def test_can_write_csv_in_any_encoding each_encoding do |encoding| # test generate_line with encoding hint begin csv = %w[abc d|ef].map { |f| f.encode(encoding) }. to_csv(col_sep: "|", encoding: encoding.name) rescue Encoding::ConverterNotFoundError next end assert_equal(encoding, csv.encoding) # test generate_line with encoding guessing from fields csv = %w[abc d|ef].map { |f| f.encode(encoding) }.to_csv(col_sep: "|") assert_equal(encoding, csv.encoding) # writing to files data = encode_ary([%w[abc d,ef], %w[123 456 ]], encoding) CSV.open(@temp_csv_path, "wb:#{encoding.name}") do |f| data.each { |row| f << row } end assert_equal(data, CSV.read(@temp_csv_path, encoding: encoding.name)) end end def test_encoding_is_upgraded_during_writing_as_needed data = ["foo".force_encoding("US-ASCII"), "\u3042"] assert_equal("US-ASCII", data.first.encoding.name) assert_equal("UTF-8", data.last.encoding.name) assert_equal("UTF-8", data.join('').encoding.name) assert_equal("UTF-8", data.to_csv.encoding.name) end def test_encoding_is_upgraded_for_ascii_content_during_writing_as_needed data = ["foo".force_encoding("ISO-8859-1"), "\u3042"] assert_equal("ISO-8859-1", data.first.encoding.name) assert_equal("UTF-8", data.last.encoding.name) assert_equal("UTF-8", data.join('').encoding.name) assert_equal("UTF-8", data.to_csv.encoding.name) end def test_encoding_is_not_upgraded_for_non_ascii_content_during_writing_as_needed data = ["\u00c0".encode("ISO-8859-1"), "\u3042"] assert_equal([ "ISO-8859-1", "UTF-8", ], data.collect {|field| field.encoding.name}) assert_raise(Encoding::CompatibilityError) do data.to_csv end end def test_explicit_encoding bug9766 = '[ruby-core:62113] [Bug #9766]' s = CSV.generate(encoding: "Windows-31J") do |csv| csv << ["foo".force_encoding("ISO-8859-1"), "\u3042"] end assert_equal(["foo,\u3042\n".encode(Encoding::Windows_31J), Encoding::Windows_31J], [s, s.encoding], bug9766) end def test_encoding_with_default_internal with_default_internal(Encoding::UTF_8) do s = CSV.generate(String.new(encoding: Encoding::Big5), encoding: Encoding::Big5) do |csv| csv << ["漢字"] end assert_equal(["漢字\n".encode(Encoding::Big5), Encoding::Big5], [s, s.encoding]) end end def test_row_separator_detection_with_invalid_encoding csv = CSV.new("invalid,\xF8\r\nvalid,x\r\n".force_encoding("UTF-8"), encoding: "UTF-8") assert_equal("\r\n", csv.row_sep) end def test_invalid_encoding_row_error csv = CSV.new("valid,x\rinvalid,\xF8\r".force_encoding("UTF-8"), encoding: "UTF-8", row_sep: "\r") error = assert_raise(CSV::InvalidEncodingError) do csv.shift csv.shift end assert_equal([Encoding::UTF_8, "Invalid byte sequence in UTF-8 in line 2."], [error.encoding, error.message]) end def test_string_input_transcode # U+3042 HIRAGANA LETTER A # U+3044 HIRAGANA LETTER I # U+3046 HIRAGANA LETTER U value = "\u3042\u3044\u3046" csv = CSV.new(value, encoding: "UTF-8:EUC-JP") assert_equal([[value.encode("EUC-JP")]], csv.read) end def test_string_input_set_encoding_string # U+3042 HIRAGANA LETTER A # U+3044 HIRAGANA LETTER I # U+3046 HIRAGANA LETTER U value = "\u3042\u3044\u3046".encode("EUC-JP") csv = CSV.new(value.dup.force_encoding("UTF-8"), encoding: "EUC-JP") assert_equal([[value.encode("EUC-JP")]], csv.read) end def test_string_input_set_encoding_encoding # U+3042 HIRAGANA LETTER A # U+3044 HIRAGANA LETTER I # U+3046 HIRAGANA LETTER U value = "\u3042\u3044\u3046".encode("EUC-JP") csv = CSV.new(value.dup.force_encoding("UTF-8"), encoding: Encoding.find("EUC-JP")) assert_equal([[value.encode("EUC-JP")]], csv.read) end private def assert_parses(fields, encoding, **options) encoding = Encoding.find(encoding) unless encoding.is_a? Encoding orig_fields = fields fields = encode_ary(fields, encoding) data = ary_to_data(fields, **options) parsed = CSV.parse(data, **options) assert_equal(fields, parsed) parsed.flatten.each_with_index do |field, i| assert_equal(encoding, field.encoding, "Field[#{i + 1}] was transcoded.") end File.open(@temp_csv_path, "wb") {|f| f.print(data)} CSV.open(@temp_csv_path, "rb:#{encoding}", **options) do |csv| csv.each_with_index do |row, i| assert_equal(fields[i], row) end end begin CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", **options) do |csv| csv.each_with_index do |row, i| assert_equal(orig_fields[i], row) end end unless encoding == __ENCODING__ rescue Encoding::ConverterNotFoundError end options[:encoding] = encoding.name CSV.open(@temp_csv_path, **options) do |csv| csv.each_with_index do |row, i| assert_equal(fields[i], row) end end options.delete(:encoding) options[:external_encoding] = encoding.name options[:internal_encoding] = __ENCODING__.name begin CSV.open(@temp_csv_path, **options) do |csv| csv.each_with_index do |row, i| assert_equal(orig_fields[i], row) end end unless encoding == __ENCODING__ rescue Encoding::ConverterNotFoundError end end def encode_ary(ary, encoding) ary.map { |row| row.map { |field| field.encode(encoding) } } end def ary_to_data(ary, **options) encoding = ary.flatten.first.encoding quote_char = (options[:quote_char] || '"').encode(encoding) col_sep = (options[:col_sep] || ",").encode(encoding) row_sep = (options[:row_sep] || "\n").encode(encoding) ary.map { |row| row.map { |field| [quote_char, field.encode(encoding), quote_char].join('') }.join(col_sep) + row_sep }.join('').encode(encoding) end def encode_for_tests(data, **options) yield ary_to_data(encode_ary(data, "UTF-8"), **options) yield ary_to_data(encode_ary(data, "UTF-16BE"), **options) end def each_encoding Encoding.list.each do |encoding| next if encoding.dummy? # skip "dummy" encodings yield encoding end end def no_warnings old_verbose, $VERBOSE = $VERBOSE, nil yield ensure $VERBOSE = old_verbose end end csv-3.3.5/test/csv/test_features.rb000066400000000000000000000260441501670011600172740ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false begin require "zlib" rescue LoadError end require_relative "helper" require "tempfile" class TestCSVFeatures < Test::Unit::TestCase extend DifferentOFS TEST_CASES = [ [%Q{a,b}, ["a", "b"]], [%Q{a,"""b"""}, ["a", "\"b\""]], [%Q{a,"""b"}, ["a", "\"b"]], [%Q{a,"b"""}, ["a", "b\""]], [%Q{a,"\nb"""}, ["a", "\nb\""]], [%Q{a,"""\nb"}, ["a", "\"\nb"]], [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]], [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]], [%Q{a,,,}, ["a", nil, nil, nil]], [%Q{,}, [nil, nil]], [%Q{"",""}, ["", ""]], [%Q{""""}, ["\""]], [%Q{"""",""}, ["\"",""]], [%Q{,""}, [nil,""]], [%Q{,"\r"}, [nil,"\r"]], [%Q{"\r\n,"}, ["\r\n,"]], [%Q{"\r\n,",}, ["\r\n,", nil]] ] def setup super @sample_data = <<-CSV line,1,abc line,2,"def\nghi" line,4,jkl CSV @csv = CSV.new(@sample_data) end def test_col_sep [";", "\t"].each do |sep| TEST_CASES.each do |test_case| assert_equal( test_case.last.map { |t| t.tr(",", sep) unless t.nil? }, CSV.parse_line( test_case.first.tr(",", sep), col_sep: sep ) ) end end assert_equal([",,,", nil], CSV.parse_line(",,,;", col_sep: ";")) end def test_col_sep_nil assert_raise_with_message(ArgumentError, ":col_sep must be 1 or more characters: nil") do CSV.parse(@sample_data, col_sep: nil) end end def test_col_sep_empty assert_raise_with_message(ArgumentError, ":col_sep must be 1 or more characters: \"\"") do CSV.parse(@sample_data, col_sep: "") end end def test_row_sep error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n") end assert_equal("Unquoted fields do not allow new line <\"\\n\"> in line 1.", error.message) assert_equal( ["1", "2", "3\n", "4", "5"], CSV.parse_line(%Q{1,2,"3\n",4,5\r\n}, row_sep: "\r\n")) end def test_quote_char TEST_CASES.each do |test_case| assert_equal(test_case.last.map {|t| t.tr('"', "'") unless t.nil?}, CSV.parse_line(test_case.first.tr('"', "'"), quote_char: "'" )) end end def test_quote_char_special_regexp_char TEST_CASES.each do |test_case| assert_equal(test_case.last.map {|t| t.tr('"', "|") unless t.nil?}, CSV.parse_line(test_case.first.tr('"', "|"), quote_char: "|")) end end def test_quote_char_special_regexp_char_liberal_parsing TEST_CASES.each do |test_case| assert_equal(test_case.last.map {|t| t.tr('"', "|") unless t.nil?}, CSV.parse_line(test_case.first.tr('"', "|"), quote_char: "|", liberal_parsing: true)) end end def test_csv_char_readers %w[col_sep row_sep quote_char].each do |reader| csv = CSV.new("abc,def", reader.to_sym => "|") assert_equal("|", csv.send(reader)) end end def test_row_sep_auto_discovery ["\r\n", "\n", "\r"].each do |line_end| data = "1,2,3#{line_end}4,5#{line_end}" discovered = CSV.new(data).row_sep assert_equal(line_end, discovered) end assert_equal("\n", CSV.new("\n\r\n\r").row_sep) assert_equal($/, CSV.new("").row_sep) assert_equal($/, CSV.new(STDERR).row_sep) end def test_line lines = [ %Q(\u{3000}abc,def\n), %Q(\u{3000}abc,"d\nef"\n), %Q(\u{3000}abc,"d\r\nef"\n), %Q(\u{3000}abc,"d\ref") ] csv = CSV.new(lines.join('')) lines.each do |line| csv.shift assert_equal(line, csv.line) end end def test_lineno assert_equal(5, @sample_data.lines.to_a.size) 4.times do |line_count| assert_equal(line_count, @csv.lineno) assert_not_nil(@csv.shift) assert_equal(line_count + 1, @csv.lineno) end assert_nil(@csv.shift) end def test_readline test_lineno @csv.rewind test_lineno end def test_unknown_options assert_raise_with_message(ArgumentError, /unknown keyword/) { CSV.new(@sample_data, unknown: :error) } assert_raise_with_message(ArgumentError, /unknown keyword/) { CSV.new(@sample_data, universal_newline: true) } end def test_skip_blanks assert_equal(4, @csv.to_a.size) @csv = CSV.new(@sample_data, skip_blanks: true) count = 0 @csv.each do |row| count += 1 assert_equal("line", row.first) end assert_equal(3, count) end def test_csv_behavior_readers %w[ unconverted_fields return_headers write_headers skip_blanks force_quotes ].each do |behavior| assert_not_predicate(CSV.new("abc,def"), "#{behavior}?", "Behavior defaulted to on.") csv = CSV.new("abc,def", behavior.to_sym => true) assert_predicate(csv, "#{behavior}?", "Behavior change now registered.") end end def test_converters_reader # no change assert_equal( [:integer], CSV.new("abc,def", converters: [:integer]).converters ) # just one assert_equal( [:integer], CSV.new("abc,def", converters: :integer).converters ) # expanded assert_equal( [:integer, :float], CSV.new("abc,def", converters: :numeric).converters ) # custom csv = CSV.new("abc,def", converters: [:integer, lambda { }]) assert_equal(2, csv.converters.size) assert_equal(:integer, csv.converters.first) assert_instance_of(Proc, csv.converters.last) end def test_header_converters_reader # no change hc = :header_converters assert_equal([:downcase], CSV.new("abc,def", hc => [:downcase]).send(hc)) # just one assert_equal([:downcase], CSV.new("abc,def", hc => :downcase).send(hc)) # custom csv = CSV.new("abc,def", hc => [:symbol, lambda { }]) assert_equal(2, csv.send(hc).size) assert_equal(:symbol, csv.send(hc).first) assert_instance_of(Proc, csv.send(hc).last) end # reported by Kev Jackson def test_failing_to_escape_col_sep assert_nothing_raised(Exception) { CSV.new(String.new, col_sep: "|") } end # reported by Chris Roos def test_failing_to_reset_headers_in_rewind csv = CSV.new("forename,surname", headers: true, return_headers: true) csv.each {|row| assert_predicate row, :header_row?} csv.rewind csv.each {|row| assert_predicate row, :header_row?} end def test_gzip_reader zipped = nil assert_nothing_raised(NoMethodError) do zipped = CSV.new( Zlib::GzipReader.open( File.join(File.dirname(__FILE__), "line_endings.gz") ) ) end assert_equal("\r\n", zipped.row_sep) ensure zipped.close end if defined?(Zlib::GzipReader) def test_gzip_writer Tempfile.create(%w"temp .gz") {|tempfile| tempfile.close file = tempfile.path zipped = nil assert_nothing_raised(NoMethodError) do zipped = CSV.new(Zlib::GzipWriter.open(file)) end zipped << %w[one two three] zipped << [1, 2, 3] zipped.close assert_include(Zlib::GzipReader.open(file) {|f| f.read}, $INPUT_RECORD_SEPARATOR, "@row_sep did not default") } end if defined?(Zlib::GzipWriter) def test_inspect_is_smart_about_io_types str = CSV.new("string,data").inspect assert_include(str, "io_type:StringIO", "IO type not detected.") str = CSV.new($stderr).inspect assert_include(str, "io_type:$stderr", "IO type not detected.") Tempfile.create(%w"temp .csv") {|tempfile| tempfile.close path = tempfile.path File.open(path, "w") { |csv| csv << "one,two,three\n1,2,3\n" } str = CSV.open(path) { |csv| csv.inspect } assert_include(str, "io_type:File", "IO type not detected.") } end def test_inspect_shows_key_attributes str = @csv.inspect %w[lineno col_sep row_sep quote_char].each do |attr_name| assert_match(/\b#{attr_name}:[^\s>]+/, str) end end def test_inspect_shows_headers_when_available csv = CSV.new("one,two,three\n1,2,3\n", headers: true) assert_include(csv.inspect, "headers:true", "Header hint not shown.") csv.shift # load headers assert_match(/headers:\[[^\]]+\]/, csv.inspect) end def test_inspect_encoding_is_ascii_compatible csv = CSV.new("one,two,three\n1,2,3\n".encode("UTF-16BE")) assert_send([Encoding, :compatible?, Encoding.find("US-ASCII"), csv.inspect.encoding], "inspect() was not ASCII compatible.") end def test_version assert_not_nil(CSV::VERSION) assert_instance_of(String, CSV::VERSION) assert_predicate(CSV::VERSION, :frozen?) assert_match(/\A\d\.\d\.\d\z/, CSV::VERSION) end def test_table_nil_equality assert_nothing_raised(NoMethodError) { CSV.parse("test", headers: true) == nil } end # non-seekable input stream for testing https://github.com/ruby/csv/issues/44 class DummyIO extend Forwardable def_delegators :@io, :gets, :read, :pos, :eof? # no seek or rewind! def initialize(data) @io = StringIO.new(data) end end def test_line_separator_autodetection_for_non_seekable_input_lf c = CSV.new(DummyIO.new("one,two,three\nfoo,bar,baz\n")) assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a end def test_line_separator_autodetection_for_non_seekable_input_cr c = CSV.new(DummyIO.new("one,two,three\rfoo,bar,baz\r")) assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a end def test_line_separator_autodetection_for_non_seekable_input_cr_lf c = CSV.new(DummyIO.new("one,two,three\r\nfoo,bar,baz\r\n")) assert_equal [["one", "two", "three"], ["foo", "bar", "baz"]], c.each.to_a end def test_line_separator_autodetection_for_non_seekable_input_1024_over_lf table = (1..10).map { |row| (1..200).map { |col| "row#{row}col#{col}" }.to_a }.to_a input = table.map { |line| line.join(",") }.join("\n") c = CSV.new(DummyIO.new(input)) assert_equal table, c.each.to_a end def test_line_separator_autodetection_for_non_seekable_input_1024_over_cr_lf table = (1..10).map { |row| (1..200).map { |col| "row#{row}col#{col}" }.to_a }.to_a input = table.map { |line| line.join(",") }.join("\r\n") c = CSV.new(DummyIO.new(input)) assert_equal table, c.each.to_a end def test_line_separator_autodetection_for_non_seekable_input_many_cr_only # input with lots of CRs (to make sure no bytes are lost due to look-ahead) c = CSV.new(DummyIO.new("foo\r" + "\r" * 9999 + "bar\r")) assert_equal [["foo"]] + [[]] * 9999 + [["bar"]], c.each.to_a end end csv-3.3.5/test/csv/test_patterns.rb000066400000000000000000000010061501670011600173050ustar00rootroot00000000000000# frozen_string_literal: true require_relative "helper" class TestCSVPatternMatching < Test::Unit::TestCase def test_hash case CSV::Row.new(%i{A B C}, [1, 2, 3]) in B: b, C: c assert_equal([2, 3], [b, c]) end end def test_hash_rest case CSV::Row.new(%i{A B C}, [1, 2, 3]) in B: b, **rest assert_equal([2, { A: 1, C: 3 }], [b, rest]) end end def test_array case CSV::Row.new(%i{A B C}, [1, 2, 3]) in *, matched assert_equal(3, matched) end end end csv-3.3.5/test/csv/test_row.rb000066400000000000000000000264071501670011600162700ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "helper" class TestCSVRow < Test::Unit::TestCase extend DifferentOFS def setup super @row = CSV::Row.new(%w{A B C A A}, [1, 2, 3, 4]) end def test_initialize # basic row = CSV::Row.new(%w{A B C}, [1, 2, 3]) assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([["A", 1], ["B", 2], ["C", 3]], row.to_a) # missing headers row = CSV::Row.new(%w{A}, [1, 2, 3]) assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([["A", 1], [nil, 2], [nil, 3]], row.to_a) # missing fields row = CSV::Row.new(%w{A B C}, [1, 2]) assert_not_nil(row) assert_instance_of(CSV::Row, row) assert_equal([["A", 1], ["B", 2], ["C", nil]], row.to_a) end def test_row_type # field rows row = CSV::Row.new(%w{A B C}, [1, 2, 3]) # implicit assert_not_predicate(row, :header_row?) assert_predicate(row, :field_row?) row = CSV::Row.new(%w{A B C}, [1, 2, 3], false) # explicit assert_not_predicate(row, :header_row?) assert_predicate(row, :field_row?) # header row row = CSV::Row.new(%w{A B C}, [1, 2, 3], true) assert_predicate(row, :header_row?) assert_not_predicate(row, :field_row?) end def test_headers assert_equal(%w{A B C A A}, @row.headers) end def test_field # by name assert_equal(2, @row.field("B")) assert_equal(2, @row["B"]) # alias # by index assert_equal(3, @row.field(2)) # by range assert_equal([2,3], @row.field(1..2)) # missing assert_nil(@row.field("Missing")) assert_nil(@row.field(10)) # minimum index assert_equal(1, @row.field("A")) assert_equal(1, @row.field("A", 0)) assert_equal(4, @row.field("A", 1)) assert_equal(4, @row.field("A", 2)) assert_equal(4, @row.field("A", 3)) assert_equal(nil, @row.field("A", 4)) assert_equal(nil, @row.field("A", 5)) end def test_fetch # only by name assert_equal(2, @row.fetch('B')) # missing header raises KeyError assert_raise KeyError do @row.fetch('foo') end # missing header yields itself to block assert_equal 'bar', @row.fetch('foo') { |header| header == 'foo' ? 'bar' : false } # missing header returns the given default value assert_equal 'bar', @row.fetch('foo', 'bar') # more than one vararg raises ArgumentError assert_raise ArgumentError do @row.fetch('foo', 'bar', 'baz') end end def test_has_key? assert_equal(true, @row.has_key?('B')) assert_equal(false, @row.has_key?('foo')) # aliases assert_equal(true, @row.header?('B')) assert_equal(false, @row.header?('foo')) assert_equal(true, @row.include?('B')) assert_equal(false, @row.include?('foo')) assert_equal(true, @row.member?('B')) assert_equal(false, @row.member?('foo')) assert_equal(true, @row.key?('B')) assert_equal(false, @row.key?('foo')) end def test_set_field # set field by name assert_equal(100, @row["A"] = 100) # set field by index assert_equal(300, @row[3] = 300) # set field by name and minimum index assert_equal([:a, :b, :c], @row["A", 4] = [:a, :b, :c]) # verify the changes assert_equal( [ ["A", 100], ["B", 2], ["C", 3], ["A", 300], ["A", [:a, :b, :c]] ], @row.to_a ) # assigning an index past the end assert_equal("End", @row[10] = "End") assert_equal( [ ["A", 100], ["B", 2], ["C", 3], ["A", 300], ["A", [:a, :b, :c]], [nil, nil], [nil, nil], [nil, nil], [nil, nil], [nil, nil], [nil, "End"] ], @row.to_a ) # assigning a new field by header assert_equal("New", @row[:new] = "New") assert_equal( [ ["A", 100], ["B", 2], ["C", 3], ["A", 300], ["A", [:a, :b, :c]], [nil, nil], [nil, nil], [nil, nil], [nil, nil], [nil, nil], [nil, "End"], [:new, "New"] ], @row.to_a ) end def test_append # add a value assert_equal(@row, @row << "Value") assert_equal( [ ["A", 1], ["B", 2], ["C", 3], ["A", 4], ["A", nil], [nil, "Value"] ], @row.to_a ) # add a pair assert_equal(@row, @row << %w{Header Field}) assert_equal( [ ["A", 1], ["B", 2], ["C", 3], ["A", 4], ["A", nil], [nil, "Value"], %w{Header Field} ], @row.to_a ) # a pair with Hash syntax assert_equal(@row, @row << {key: :value}) assert_equal( [ ["A", 1], ["B", 2], ["C", 3], ["A", 4], ["A", nil], [nil, "Value"], %w{Header Field}, [:key, :value] ], @row.to_a ) # multiple fields at once assert_equal(@row, @row.push(100, 200, [:last, 300])) assert_equal( [ ["A", 1], ["B", 2], ["C", 3], ["A", 4], ["A", nil], [nil, "Value"], %w{Header Field}, [:key, :value], [nil, 100], [nil, 200], [:last, 300] ], @row.to_a ) end def test_delete # by index assert_equal(["B", 2], @row.delete(1)) # by header assert_equal(["C", 3], @row.delete("C")) end def test_delete_if assert_equal(@row, @row.delete_if { |h, f| h == "A" and not f.nil? }) assert_equal([["B", 2], ["C", 3], ["A", nil]], @row.to_a) end def test_delete_if_without_block enum = @row.delete_if assert_instance_of(Enumerator, enum) assert_equal(@row.size, enum.size) assert_equal(@row, enum.each { |h, f| h == "A" and not f.nil? }) assert_equal([["B", 2], ["C", 3], ["A", nil]], @row.to_a) end def test_fields # all fields assert_equal([1, 2, 3, 4, nil], @row.fields) # by header assert_equal([1, 3], @row.fields("A", "C")) # by index assert_equal([2, 3, nil], @row.fields(1, 2, 10)) # by both assert_equal([2, 3, 4], @row.fields("B", "C", 3)) # with minimum indices assert_equal([2, 3, 4], @row.fields("B", "C", ["A", 3])) # by header range assert_equal([2, 3], @row.values_at("B".."C")) end def test_index # basic usage assert_equal(0, @row.index("A")) assert_equal(1, @row.index("B")) assert_equal(2, @row.index("C")) assert_equal(nil, @row.index("Z")) # with minimum index assert_equal(0, @row.index("A")) assert_equal(0, @row.index("A", 0)) assert_equal(3, @row.index("A", 1)) assert_equal(3, @row.index("A", 2)) assert_equal(3, @row.index("A", 3)) assert_equal(4, @row.index("A", 4)) assert_equal(nil, @row.index("A", 5)) end def test_queries # fields assert(@row.field?(4)) assert(@row.field?(nil)) assert(!@row.field?(10)) end def test_each # array style ary = @row.to_a @row.each do |pair| assert_equal(ary.first.first, pair.first) assert_equal(ary.shift.last, pair.last) end # hash style ary = @row.to_a @row.each do |header, field| assert_equal(ary.first.first, header) assert_equal(ary.shift.last, field) end # verify that we can chain the call assert_equal(@row, @row.each { }) # without block ary = @row.to_a enum = @row.each assert_instance_of(Enumerator, enum) assert_equal(@row.size, enum.size) enum.each do |pair| assert_equal(ary.first.first, pair.first) assert_equal(ary.shift.last, pair.last) end end def test_each_pair assert_equal([ ["A", 1], ["B", 2], ["C", 3], ["A", 4], ["A", nil], ], @row.each_pair.to_a) end def test_enumerable assert_equal( [["A", 1], ["A", 4], ["A", nil]], @row.select { |pair| pair.first == "A" } ) assert_equal(10, @row.inject(0) { |sum, (_, n)| sum + (n || 0) }) end def test_to_a row = CSV::Row.new(%w{A B C}, [1, 2, 3]).to_a assert_instance_of(Array, row) row.each do |pair| assert_instance_of(Array, pair) assert_equal(2, pair.size) end assert_equal([["A", 1], ["B", 2], ["C", 3]], row) end def test_to_hash hash = @row.to_hash assert_equal({"A" => @row["A"], "B" => @row["B"], "C" => @row["C"]}, hash) hash.keys.each_with_index do |string_key, h| assert_predicate(string_key, :frozen?) assert_same(string_key, @row.headers[h]) end end def test_to_csv # normal conversion assert_equal("1,2,3,4,\n", @row.to_csv) assert_equal("1,2,3,4,\n", @row.to_s) # alias # with options assert_equal( "1|2|3|4|\r\n", @row.to_csv(col_sep: "|", row_sep: "\r\n") ) end def test_array_delegation assert_not_empty(@row, "Row was empty.") assert_equal([@row.headers.size, @row.fields.size].max, @row.size) end def test_inspect_shows_header_field_pairs str = @row.inspect @row.each do |header, field| assert_include(str, "#{header.inspect}:#{field.inspect}", "Header field pair not found.") end end def test_inspect_encoding_is_ascii_compatible assert_send([Encoding, :compatible?, Encoding.find("US-ASCII"), @row.inspect.encoding], "inspect() was not ASCII compatible.") end def test_inspect_shows_symbol_headers_as_bare_attributes str = CSV::Row.new(@row.headers.map { |h| h.to_sym }, @row.fields).inspect @row.each do |header, field| assert_include(str, "#{header}:#{field.inspect}", "Header field pair not found.") end end def test_can_be_compared_with_other_classes assert_not_nil(CSV::Row.new([ ], [ ]), "The row was nil") end def test_can_be_compared_when_not_a_row r = @row == [] assert_equal false, r end def test_dig_by_index assert_equal(2, @row.dig(1)) assert_nil(@row.dig(100)) end def test_dig_by_header assert_equal(2, @row.dig("B")) assert_nil(@row.dig("Missing")) end def test_dig_cell row = CSV::Row.new(%w{A}, [["foo", ["bar", ["baz"]]]]) assert_equal("foo", row.dig(0, 0)) assert_equal("bar", row.dig(0, 1, 0)) assert_equal("foo", row.dig("A", 0)) assert_equal("bar", row.dig("A", 1, 0)) end def test_dig_cell_no_dig row = CSV::Row.new(%w{A}, ["foo"]) assert_raise(TypeError) do row.dig(0, 0) end assert_raise(TypeError) do row.dig("A", 0) end end def test_dup row = CSV::Row.new(["A"], ["foo"]) dupped_row = row.dup dupped_row["A"] = "bar" assert_equal(["foo", "bar"], [row["A"], dupped_row["A"]]) dupped_row.delete("A") assert_equal(["foo", nil], [row["A"], dupped_row["A"]]) end end csv-3.3.5/test/csv/test_table.rb000066400000000000000000000413711501670011600165450ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "helper" class TestCSVTable < Test::Unit::TestCase extend DifferentOFS def setup super @rows = [ CSV::Row.new(%w{A B C}, [1, 2, 3]), CSV::Row.new(%w{A B C}, [4, 5, 6]), CSV::Row.new(%w{A B C}, [7, 8, 9]) ] @table = CSV::Table.new(@rows) @header_table = CSV::Table.new( [CSV::Row.new(%w{A B C}, %w{A B C}, true)] + @rows ) @header_only_table = CSV::Table.new([], headers: %w{A B C}) end def test_initialize assert_not_nil(@table) assert_instance_of(CSV::Table, @table) end def test_modes assert_equal(:col_or_row, @table.mode) # non-destructive changes, intended for one shot calls cols = @table.by_col assert_equal(:col_or_row, @table.mode) assert_equal(:col, cols.mode) assert_equal(@table, cols) rows = @table.by_row assert_equal(:col_or_row, @table.mode) assert_equal(:row, rows.mode) assert_equal(@table, rows) col_or_row = rows.by_col_or_row assert_equal(:row, rows.mode) assert_equal(:col_or_row, col_or_row.mode) assert_equal(@table, col_or_row) # destructive mode changing calls assert_equal(@table, @table.by_row!) assert_equal(:row, @table.mode) assert_equal(@table, @table.by_col_or_row!) assert_equal(:col_or_row, @table.mode) end def test_headers assert_equal(@rows.first.headers, @table.headers) end def test_headers_empty t = CSV::Table.new([]) assert_equal Array.new, t.headers end def test_headers_only assert_equal(%w[A B C], @header_only_table.headers) end def test_headers_modified_by_row table = CSV::Table.new([], headers: ["A", "B"]) table << ["a", "b"] table.first << {"C" => "c"} assert_equal(["A", "B", "C"], table.headers) end def test_index ################## ### Mixed Mode ### ################## # by row @rows.each_index { |i| assert_equal(@rows[i], @table[i]) } assert_equal(nil, @table[100]) # empty row # by row with Range assert_equal([@table[1], @table[2]], @table[1..2]) # by col @rows.first.headers.each do |header| assert_equal(@rows.map { |row| row[header] }, @table[header]) end assert_equal([nil] * @rows.size, @table["Z"]) # empty col # by cell, row then col assert_equal(2, @table[0][1]) assert_equal(6, @table[1]["C"]) # by cell, col then row assert_equal(5, @table["B"][1]) assert_equal(9, @table["C"][2]) # with headers (by col) assert_equal(["B", 2, 5, 8], @header_table["B"]) ################### ### Column Mode ### ################### @table.by_col! assert_equal([2, 5, 8], @table[1]) assert_equal([2, 5, 8], @table["B"]) ################ ### Row Mode ### ################ @table.by_row! assert_equal(@rows[1], @table[1]) assert_raise(TypeError) { @table["B"] } ############################ ### One Shot Mode Change ### ############################ assert_equal(@rows[1], @table[1]) assert_equal([2, 5, 8], @table.by_col[1]) assert_equal(@rows[1], @table[1]) end def test_set_row_or_column ################## ### Mixed Mode ### ################## # set row @table[2] = [10, 11, 12] assert_equal([%w[A B C], [1, 2, 3], [4, 5, 6], [10, 11, 12]], @table.to_a) @table[3] = CSV::Row.new(%w[A B C], [13, 14, 15]) assert_equal( [%w[A B C], [1, 2, 3], [4, 5, 6], [10, 11, 12], [13, 14, 15]], @table.to_a ) # set col @table["Type"] = "data" assert_equal( [ %w[A B C Type], [1, 2, 3, "data"], [4, 5, 6, "data"], [10, 11, 12, "data"], [13, 14, 15, "data"] ], @table.to_a ) @table["Index"] = [1, 2, 3] assert_equal( [ %w[A B C Type Index], [1, 2, 3, "data", 1], [4, 5, 6, "data", 2], [10, 11, 12, "data", 3], [13, 14, 15, "data", nil] ], @table.to_a ) @table["B"] = [100, 200] assert_equal( [ %w[A B C Type Index], [1, 100, 3, "data", 1], [4, 200, 6, "data", 2], [10, nil, 12, "data", 3], [13, nil, 15, "data", nil] ], @table.to_a ) # verify resulting table assert_equal(<<-CSV, @table.to_csv) A,B,C,Type,Index 1,100,3,data,1 4,200,6,data,2 10,,12,data,3 13,,15,data, CSV # with headers @header_table["Type"] = "data" assert_equal(%w[Type data data data], @header_table["Type"]) ################### ### Column Mode ### ################### @table.by_col! @table[1] = [2, 5, 11, 14] assert_equal( [ %w[A B C Type Index], [1, 2, 3, "data", 1], [4, 5, 6, "data", 2], [10, 11, 12, "data", 3], [13, 14, 15, "data", nil] ], @table.to_a ) @table["Extra"] = "new stuff" assert_equal( [ %w[A B C Type Index Extra], [1, 2, 3, "data", 1, "new stuff"], [4, 5, 6, "data", 2, "new stuff"], [10, 11, 12, "data", 3, "new stuff"], [13, 14, 15, "data", nil, "new stuff"] ], @table.to_a ) ################ ### Row Mode ### ################ @table.by_row! @table[1] = (1..6).to_a assert_equal( [ %w[A B C Type Index Extra], [1, 2, 3, "data", 1, "new stuff"], [1, 2, 3, 4, 5, 6], [10, 11, 12, "data", 3, "new stuff"], [13, 14, 15, "data", nil, "new stuff"] ], @table.to_a ) assert_raise(TypeError) { @table["Extra"] = nil } end def test_set_by_col_with_header_row r = [ CSV::Row.new(%w{X Y Z}, [97, 98, 99], true) ] t = CSV::Table.new(r) t.by_col! t['A'] = [42] assert_equal(['A'], t['A']) end def test_each ###################### ### Mixed/Row Mode ### ###################### i = 0 @table.each do |row| assert_equal(@rows[i], row) i += 1 end # verify that we can chain the call assert_equal(@table, @table.each { }) # without block enum = @table.each assert_instance_of(Enumerator, enum) assert_equal(@table.size, enum.size) i = 0 enum.each do |row| assert_equal(@rows[i], row) i += 1 end ################### ### Column Mode ### ################### @table.by_col! headers = @table.headers @table.each do |header, column| assert_equal(headers.shift, header) assert_equal(@table[header], column) end # without block enum = @table.each assert_instance_of(Enumerator, enum) assert_equal(@table.headers.size, enum.size) headers = @table.headers enum.each do |header, column| assert_equal(headers.shift, header) assert_equal(@table[header], column) end ############################ ### One Shot Mode Change ### ############################ @table.by_col_or_row! @table.each { |row| assert_instance_of(CSV::Row, row) } @table.by_col.each { |tuple| assert_instance_of(Array, tuple) } @table.each { |row| assert_instance_of(CSV::Row, row) } end def test_each_by_col_duplicated_headers table = CSV.parse(<<-CSV, headers: true) a,a,,,b 1,2,3,4,5 11,12,13,14,15 CSV assert_equal([ ["a", ["1", "11"]], ["a", ["2", "12"]], [nil, ["3", "13"]], [nil, ["4", "14"]], ["b", ["5", "15"]], ], table.by_col.each.to_a) end def test_each_split yielded_values = [] @table.each do |column1, column2, column3| yielded_values << [column1, column2, column3] end assert_equal(@rows.collect(&:to_a), yielded_values) end def test_enumerable assert_equal( @rows.values_at(0, 2), @table.select { |row| (row["B"] % 2).zero? } ) assert_equal(@rows[1], @table.find { |row| row["C"] > 5 }) end def test_to_a assert_equal([%w[A B C], [1, 2, 3], [4, 5, 6], [7, 8, 9]], @table.to_a) # with headers assert_equal( [%w[A B C], [1, 2, 3], [4, 5, 6], [7, 8, 9]], @header_table.to_a ) end def test_to_csv csv = <<-CSV A,B,C 1,2,3 4,5,6 7,8,9 CSV # normal conversion assert_equal(csv, @table.to_csv) assert_equal(csv, @table.to_s) # alias # with options assert_equal( csv.gsub(",", "|").gsub("\n", "\r\n"), @table.to_csv(col_sep: "|", row_sep: "\r\n") ) assert_equal( csv.lines.to_a[1..-1].join(''), @table.to_csv(:write_headers => false) ) # with headers assert_equal(csv, @header_table.to_csv) end def test_to_csv_limit_positive assert_equal(<<-CSV, @table.to_csv(limit: 2)) A,B,C 1,2,3 4,5,6 CSV end def test_to_csv_limit_positive_over assert_equal(<<-CSV, @table.to_csv(limit: 5)) A,B,C 1,2,3 4,5,6 7,8,9 CSV end def test_to_csv_limit_zero assert_equal(<<-CSV, @table.to_csv(limit: 0)) A,B,C CSV end def test_to_csv_limit_negative assert_equal(<<-CSV, @table.to_csv(limit: -2)) A,B,C 1,2,3 4,5,6 CSV end def test_to_csv_limit_negative_over assert_equal(<<-CSV, @table.to_csv(limit: -5)) A,B,C CSV end def test_append # verify that we can chain the call assert_equal(@table, @table << [10, 11, 12]) # Array append assert_equal(CSV::Row.new(%w[A B C], [10, 11, 12]), @table[-1]) # Row append assert_equal(@table, @table << CSV::Row.new(%w[A B C], [13, 14, 15])) assert_equal(CSV::Row.new(%w[A B C], [13, 14, 15]), @table[-1]) end def test_delete_mixed_one ################## ### Mixed Mode ### ################## # delete a row assert_equal(@rows[1], @table.delete(1)) # delete a col assert_equal(@rows.map { |row| row["A"] }, @table.delete("A")) # verify resulting table assert_equal(<<-CSV, @table.to_csv) B,C 2,3 8,9 CSV end def test_delete_mixed_multiple ################## ### Mixed Mode ### ################## # delete row and col second_row = @rows[1] a_col = @rows.map { |row| row["A"] } a_col_without_second_row = a_col[0..0] + a_col[2..-1] assert_equal([ second_row, a_col_without_second_row, ], @table.delete(1, "A")) # verify resulting table assert_equal(<<-CSV, @table.to_csv) B,C 2,3 8,9 CSV end def test_delete_column ################### ### Column Mode ### ################### @table.by_col! assert_equal(@rows.map { |row| row[0] }, @table.delete(0)) assert_equal(@rows.map { |row| row["C"] }, @table.delete("C")) # verify resulting table assert_equal(<<-CSV, @table.to_csv) B 2 5 8 CSV end def test_delete_row ################ ### Row Mode ### ################ @table.by_row! assert_equal(@rows[1], @table.delete(1)) assert_raise(TypeError) { @table.delete("C") } # verify resulting table assert_equal(<<-CSV, @table.to_csv) A,B,C 1,2,3 7,8,9 CSV end def test_delete_with_blank_rows data = "col1,col2\nra1,ra2\n\nrb1,rb2" table = CSV.parse(data, :headers => true) assert_equal(["ra2", nil, "rb2"], table.delete("col2")) end def test_delete_if_row ###################### ### Mixed/Row Mode ### ###################### # verify that we can chain the call assert_equal(@table, @table.delete_if { |row| (row["B"] % 2).zero? }) # verify resulting table assert_equal(<<-CSV, @table.to_csv) A,B,C 4,5,6 CSV end def test_delete_if_row_without_block ###################### ### Mixed/Row Mode ### ###################### enum = @table.delete_if assert_instance_of(Enumerator, enum) assert_equal(@table.size, enum.size) # verify that we can chain the call assert_equal(@table, enum.each { |row| (row["B"] % 2).zero? }) # verify resulting table assert_equal(<<-CSV, @table.to_csv) A,B,C 4,5,6 CSV end def test_delete_if_column ################### ### Column Mode ### ################### @table.by_col! assert_equal(@table, @table.delete_if { |h, v| h > "A" }) assert_equal(<<-CSV, @table.to_csv) A 1 4 7 CSV end def test_delete_if_column_without_block ################### ### Column Mode ### ################### @table.by_col! enum = @table.delete_if assert_instance_of(Enumerator, enum) assert_equal(@table.headers.size, enum.size) assert_equal(@table, enum.each { |h, v| h > "A" }) assert_equal(<<-CSV, @table.to_csv) A 1 4 7 CSV end def test_delete_headers_only ################### ### Column Mode ### ################### @header_only_table.by_col! # delete by index assert_equal([], @header_only_table.delete(0)) assert_equal(%w[B C], @header_only_table.headers) # delete by header assert_equal([], @header_only_table.delete("C")) assert_equal(%w[B], @header_only_table.headers) end def test_values_at ################## ### Mixed Mode ### ################## # rows assert_equal(@rows.values_at(0, 2), @table.values_at(0, 2)) assert_equal(@rows.values_at(1..2), @table.values_at(1..2)) # cols assert_equal([[1, 3], [4, 6], [7, 9]], @table.values_at("A", "C")) assert_equal([[2, 3], [5, 6], [8, 9]], @table.values_at("B".."C")) ################### ### Column Mode ### ################### @table.by_col! assert_equal([[1, 3], [4, 6], [7, 9]], @table.values_at(0, 2)) assert_equal([[1, 3], [4, 6], [7, 9]], @table.values_at("A", "C")) ################ ### Row Mode ### ################ @table.by_row! assert_equal(@rows.values_at(0, 2), @table.values_at(0, 2)) assert_raise(TypeError) { @table.values_at("A", "C") } ############################ ### One Shot Mode Change ### ############################ assert_equal(@rows.values_at(0, 2), @table.values_at(0, 2)) assert_equal([[1, 3], [4, 6], [7, 9]], @table.by_col.values_at(0, 2)) assert_equal(@rows.values_at(0, 2), @table.values_at(0, 2)) end def test_array_delegation assert_not_empty(@table, "Table was empty.") assert_equal(@rows.size, @table.size) end def test_inspect_shows_current_mode str = @table.inspect assert_include(str, "mode:#{@table.mode}", "Mode not shown.") @table.by_col! str = @table.inspect assert_include(str, "mode:#{@table.mode}", "Mode not shown.") end def test_inspect_encoding_is_ascii_compatible assert_send([Encoding, :compatible?, Encoding.find("US-ASCII"), @table.inspect.encoding], "inspect() was not ASCII compatible." ) end def test_inspect_with_rows additional_rows = [ CSV::Row.new(%w{A B C}, [101, 102, 103]), CSV::Row.new(%w{A B C}, [104, 105, 106]), CSV::Row.new(%w{A B C}, [107, 108, 109]) ] table = CSV::Table.new(@rows + additional_rows) str_table = table.inspect assert_equal(<<-CSV, str_table) # A,B,C 1,2,3 4,5,6 7,8,9 101,102,103 104,105,106 CSV end def test_dig_mixed # by row assert_equal(@rows[0], @table.dig(0)) assert_nil(@table.dig(100)) # empty row # by col assert_equal([2, 5, 8], @table.dig("B")) assert_equal([nil] * @rows.size, @table.dig("Z")) # empty col # by row then col assert_equal(2, @table.dig(0, 1)) assert_equal(6, @table.dig(1, "C")) # by col then row assert_equal(5, @table.dig("B", 1)) assert_equal(9, @table.dig("C", 2)) end def test_dig_by_column @table.by_col! assert_equal([2, 5, 8], @table.dig(1)) assert_equal([2, 5, 8], @table.dig("B")) # by col then row assert_equal(5, @table.dig("B", 1)) assert_equal(9, @table.dig("C", 2)) end def test_dig_by_row @table.by_row! assert_equal(@rows[1], @table.dig(1)) assert_raise(TypeError) { @table.dig("B") } # by row then col assert_equal(2, @table.dig(0, 1)) assert_equal(6, @table.dig(1, "C")) end def test_dig_cell table = CSV::Table.new([CSV::Row.new(["A"], [["foo", ["bar", ["baz"]]]])]) # by row, col then cell assert_equal("foo", table.dig(0, "A", 0)) assert_equal(["baz"], table.dig(0, "A", 1, 1)) # by col, row then cell assert_equal("foo", table.dig("A", 0, 0)) assert_equal(["baz"], table.dig("A", 0, 1, 1)) end def test_dig_cell_no_dig table = CSV::Table.new([CSV::Row.new(["A"], ["foo"])]) # by row, col then cell assert_raise(TypeError) do table.dig(0, "A", 0) end # by col, row then cell assert_raise(TypeError) do table.dig("A", 0, 0) end end end csv-3.3.5/test/csv/test_tsv.rb000066400000000000000000000013521501670011600162650ustar00rootroot00000000000000require_relative "helper" class TestTSV < Test::Unit::TestCase def test_default_separator tsv = CSV::TSV.new(String.new) assert_equal("\t", tsv.col_sep) end def test_override_separator tsv = CSV::TSV.new(String.new, col_sep: ",") assert_equal(",", tsv.col_sep) end def test_read_tsv_data data = "a\tb\tc\n1\t2\t3" result = CSV::TSV.parse(data) assert_equal([["a", "b", "c"], ["1", "2", "3"]], result.to_a) end def test_write_tsv_data output = String.new CSV::TSV.generate(output) do |tsv| tsv << ["a", "b", "c"] tsv << ["1", "2", "3"] end assert_equal("a\tb\tc\n1\t2\t3\n", output) end def test_inheritance assert_kind_of(CSV, CSV::TSV.new(String.new)) end end csv-3.3.5/test/csv/write/000077500000000000000000000000001501670011600152165ustar00rootroot00000000000000csv-3.3.5/test/csv/write/test_converters.rb000066400000000000000000000025221501670011600207750ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" module TestCSVWriteConverters def test_one assert_equal(%Q[=a,=b,=c\n], generate_line(["a", "b", "c"], write_converters: ->(value) {"=" + value})) end def test_multiple assert_equal(%Q[=a_,=b_,=c_\n], generate_line(["a", "b", "c"], write_converters: [ ->(value) {"=" + value}, ->(value) {value + "_"}, ])) end def test_nil_value assert_equal(%Q[a,NaN,29\n], generate_line(["a", nil, 29], write_nil_value: "NaN")) end def test_empty_value assert_equal(%Q[a,,29\n], generate_line(["a", "", 29], write_empty_value: nil)) end end class TestCSVWriteConvertersGenerateLine < Test::Unit::TestCase include TestCSVWriteConverters extend DifferentOFS def generate_line(row, **kwargs) CSV.generate_line(row, **kwargs) end end class TestCSVWriteConvertersGenerate < Test::Unit::TestCase include TestCSVWriteConverters extend DifferentOFS def generate_line(row, **kwargs) CSV.generate(**kwargs) do |csv| csv << row end end end csv-3.3.5/test/csv/write/test_force_quotes.rb000066400000000000000000000043241501670011600213030ustar00rootroot00000000000000# frozen_string_literal: false require_relative "../helper" module TestCSVWriteForceQuotes def test_default assert_equal(%Q[1,2,3#{$INPUT_RECORD_SEPARATOR}], generate_line(["1", "2", "3"])) end def test_true assert_equal(%Q["1","2","3"#{$INPUT_RECORD_SEPARATOR}], generate_line(["1", "2", "3"], force_quotes: true)) end def test_false assert_equal(%Q[1,2,3#{$INPUT_RECORD_SEPARATOR}], generate_line(["1", "2", "3"], force_quotes: false)) end def test_field_name assert_equal(%Q["1",2,"3"#{$INPUT_RECORD_SEPARATOR}], generate_line(["1", "2", "3"], headers: ["a", "b", "c"], force_quotes: ["a", :c])) end def test_field_name_without_headers force_quotes = ["a", "c"] error = assert_raise(ArgumentError) do generate_line(["1", "2", "3"], force_quotes: force_quotes) end assert_equal(":headers is required when you use field name " + "in :force_quotes: " + "#{force_quotes.first.inspect}: #{force_quotes.inspect}", error.message) end def test_field_index assert_equal(%Q["1",2,"3"#{$INPUT_RECORD_SEPARATOR}], generate_line(["1", "2", "3"], force_quotes: [0, 2])) end def test_field_unknown force_quotes = [1.1] error = assert_raise(ArgumentError) do generate_line(["1", "2", "3"], force_quotes: force_quotes) end assert_equal(":force_quotes element must be field index or field name: " + "#{force_quotes.first.inspect}: #{force_quotes.inspect}", error.message) end end class TestCSVWriteForceQuotesGenerateLine < Test::Unit::TestCase include TestCSVWriteForceQuotes extend DifferentOFS def generate_line(row, **kwargs) CSV.generate_line(row, **kwargs) end end class TestCSVWriteForceQuotesGenerate < Test::Unit::TestCase include TestCSVWriteForceQuotes extend DifferentOFS def generate_line(row, **kwargs) CSV.generate(**kwargs) do |csv| csv << row end end end csv-3.3.5/test/csv/write/test_general.rb000066400000000000000000000154771501670011600202350ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" module TestCSVWriteGeneral include CSVHelper def test_tab assert_equal("\t#{$INPUT_RECORD_SEPARATOR}", generate_line(["\t"])) end def test_quote_character assert_equal(%Q[foo,"""",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", %Q["], "baz"])) end def test_quote_character_double assert_equal(%Q[foo,"""""",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", %Q[""], "baz"])) end def test_quote assert_equal(%Q[foo,"""bar""",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", %Q["bar"], "baz"])) end def test_quote_lf assert_equal(%Q["""\n","""\n"#{$INPUT_RECORD_SEPARATOR}], generate_line([%Q["\n], %Q["\n]])) end def test_quote_cr assert_equal(%Q["""\r","""\r"#{$INPUT_RECORD_SEPARATOR}], generate_line([%Q["\r], %Q["\r]])) end def test_quote_last assert_equal(%Q[foo,"bar"""#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", %Q[bar"]])) end def test_quote_lf_last assert_equal(%Q[foo,"\nbar"""#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", %Q[\nbar"]])) end def test_quote_lf_value_lf assert_equal(%Q[foo,"""\nbar\n"""#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", %Q["\nbar\n"]])) end def test_quote_lf_value_lf_nil assert_equal(%Q[foo,"""\nbar\n""",#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", %Q["\nbar\n"], nil])) end def test_cr assert_equal(%Q[foo,"\r",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "\r", "baz"])) end def test_lf assert_equal(%Q[foo,"\n",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "\n", "baz"])) end def test_cr_lf assert_equal(%Q[foo,"\r\n",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "\r\n", "baz"])) end def test_cr_dot_lf assert_equal(%Q[foo,"\r.\n",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "\r.\n", "baz"])) end def test_cr_lf_cr assert_equal(%Q[foo,"\r\n\r",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "\r\n\r", "baz"])) end def test_cr_lf_lf assert_equal(%Q[foo,"\r\n\n",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "\r\n\n", "baz"])) end def test_cr_lf_comma assert_equal(%Q["\r\n,"#{$INPUT_RECORD_SEPARATOR}], generate_line(["\r\n,"])) end def test_cr_lf_comma_nil assert_equal(%Q["\r\n,",#{$INPUT_RECORD_SEPARATOR}], generate_line(["\r\n,", nil])) end def test_comma assert_equal(%Q[","#{$INPUT_RECORD_SEPARATOR}], generate_line([","])) end def test_comma_double assert_equal(%Q[",",","#{$INPUT_RECORD_SEPARATOR}], generate_line([",", ","])) end def test_comma_and_value assert_equal(%Q[foo,"foo,bar",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "foo,bar", "baz"])) end def test_one_element assert_equal(%Q[foo#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo"])) end def test_nil_values_only assert_equal(%Q[,,#{$INPUT_RECORD_SEPARATOR}], generate_line([nil, nil, nil])) end def test_nil_double_only assert_equal(%Q[,#{$INPUT_RECORD_SEPARATOR}], generate_line([nil, nil])) end def test_nil_values assert_equal(%Q[foo,,,#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", nil, nil, nil])) end def test_nil_value_first assert_equal(%Q[,foo,baz#{$INPUT_RECORD_SEPARATOR}], generate_line([nil, "foo", "baz"])) end def test_nil_value_middle assert_equal(%Q[foo,,baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", nil, "baz"])) end def test_nil_value_last assert_equal(%Q[foo,baz,#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "baz", nil])) end def test_nil_empty assert_equal(%Q[,""#{$INPUT_RECORD_SEPARATOR}], generate_line([nil, ""])) end def test_nil_cr assert_equal(%Q[,"\r"#{$INPUT_RECORD_SEPARATOR}], generate_line([nil, "\r"])) end def test_values assert_equal(%Q[foo,bar#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "bar"])) end def test_semi_colon assert_equal(%Q[;#{$INPUT_RECORD_SEPARATOR}], generate_line([";"])) end def test_semi_colon_values assert_equal(%Q[;,;#{$INPUT_RECORD_SEPARATOR}], generate_line([";", ";"])) end def test_tab_values assert_equal(%Q[\t,\t#{$INPUT_RECORD_SEPARATOR}], generate_line(["\t", "\t"])) end def test_col_sep assert_equal(%Q[a;b;;c#{$INPUT_RECORD_SEPARATOR}], generate_line(["a", "b", nil, "c"], col_sep: ";")) assert_equal(%Q[a\tb\t\tc#{$INPUT_RECORD_SEPARATOR}], generate_line(["a", "b", nil, "c"], col_sep: "\t")) end def test_row_sep assert_equal(%Q[a,b,,c\r\n], generate_line(["a", "b", nil, "c"], row_sep: "\r\n")) end def test_force_quotes assert_equal(%Q["1","b","","already ""quoted"""#{$INPUT_RECORD_SEPARATOR}], generate_line([1, "b", nil, %Q{already "quoted"}], force_quotes: true)) end def test_encoding_utf8 assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}], generate_line(["あ" , "い", "う"])) end def test_encoding_euc_jp row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")} assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"), generate_line(row)) end def test_encoding_with_default_internal with_default_internal(Encoding::UTF_8) do row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")} assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"), generate_line(row, encoding: Encoding::EUC_JP)) end end def test_with_default_internal with_default_internal(Encoding::UTF_8) do row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")} assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"), generate_line(row)) end end end class TestCSVWriteGeneralGenerateLine < Test::Unit::TestCase include TestCSVWriteGeneral extend DifferentOFS def generate_line(row, **kwargs) CSV.generate_line(row, **kwargs) end end class TestCSVWriteGeneralGenerate < Test::Unit::TestCase include TestCSVWriteGeneral extend DifferentOFS def generate_line(row, **kwargs) CSV.generate(**kwargs) do |csv| csv << row end end end csv-3.3.5/test/csv/write/test_quote_empty.rb000066400000000000000000000034311501670011600211560ustar00rootroot00000000000000# -*- coding: utf-8 -*- # frozen_string_literal: false require_relative "../helper" module TestCSVWriteQuoteEmpty def test_quote_empty_default assert_equal(%Q["""",""#{$INPUT_RECORD_SEPARATOR}], generate_line([%Q["], ""])) end def test_quote_empty_false assert_equal(%Q["""",#{$INPUT_RECORD_SEPARATOR}], generate_line([%Q["], ""], quote_empty: false)) end def test_empty_default assert_equal(%Q[foo,"",baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "", "baz"])) end def test_empty_false assert_equal(%Q[foo,,baz#{$INPUT_RECORD_SEPARATOR}], generate_line(["foo", "", "baz"], quote_empty: false)) end def test_empty_only_default assert_equal(%Q[""#{$INPUT_RECORD_SEPARATOR}], generate_line([""])) end def test_empty_only_false assert_equal(%Q[#{$INPUT_RECORD_SEPARATOR}], generate_line([""], quote_empty: false)) end def test_empty_double_default assert_equal(%Q["",""#{$INPUT_RECORD_SEPARATOR}], generate_line(["", ""])) end def test_empty_double_false assert_equal(%Q[,#{$INPUT_RECORD_SEPARATOR}], generate_line(["", ""], quote_empty: false)) end end class TestCSVWriteQuoteEmptyGenerateLine < Test::Unit::TestCase include TestCSVWriteQuoteEmpty extend DifferentOFS def generate_line(row, **kwargs) CSV.generate_line(row, **kwargs) end end class TestCSVWriteQuoteEmptyGenerate < Test::Unit::TestCase include TestCSVWriteQuoteEmpty extend DifferentOFS def generate_line(row, **kwargs) CSV.generate(**kwargs) do |csv| csv << row end end end csv-3.3.5/test/lib/000077500000000000000000000000001501670011600140375ustar00rootroot00000000000000csv-3.3.5/test/lib/with_different_ofs.rb000066400000000000000000000013221501670011600202320ustar00rootroot00000000000000# frozen_string_literal: true module DifferentOFS is_output_field_separator_deprecated = false verbose, $VERBOSE = $VERBOSE, true stderr, $stderr = $stderr, StringIO.new begin ofs, $, = $,, "-" is_output_field_separator_deprecated = (not $stderr.string.empty?) ensure $, = ofs $stderr = stderr $VERBOSE = verbose end unless is_output_field_separator_deprecated module WithDifferentOFS def setup super @ofs, $, = $,, "-" end def teardown $, = @ofs super end end def self.extended(klass) super(klass) klass.const_set(:DifferentOFS, Class.new(klass).class_eval {include WithDifferentOFS}) end end end